@article {pmid37728044, year = {2023}, author = {Lin, Y and Zhu, Y and Cui, Y and Qian, H and Yuan, Q and Chen, R and Lin, Y and Chen, J and Zhou, X and Shi, C and He, H and Hu, T and Gu, C and Yu, X and Zhu, X and Wang, Y and Qian, Q and Zhang, C and Wang, F and Shang, L}, title = {Identification of natural allelic variation in TTL1 controlling thermotolerance and grain size by a rice super pan-genome.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13568}, pmid = {37728044}, issn = {1744-7909}, abstract = {Continuously increasing global temperatures present great challenges to food security. Grain size, one of the critical components determining grain yield in rice (Oryza sativa L.), is a prime target for genetic breeding. Thus, there is an immediate need for genetic improvement in rice to maintain grain yield under heat stress. However, quantitative trait loci (QTLs) endowing heat stress tolerance and grain size in rice are extremely rare. Here, we identified a novel negative regulator with pleiotropic effects, Thermo-Tolerance and grain Length 1 (TTL1), from the super pan-genomic and transcriptomic data. Loss-of-function mutations in TTL1 enhanced heat tolerance, and caused an increase in grain size by coordinating cell expansion and proliferation. TTL1 was shown to function as a transcriptional regulator and localized to the nucleus and cell membrane. Furthermore, haplotype analysis showed that hap [L] and hap [S] of TTL1 were obviously correlated with variations of thermotolerance and grain size in a core collection of cultivars. Genome evolution analysis of available rice germplasms suggested that TTL1 was selected during domestication of the indica and japonica rice subspecies, but still had much breeding potential for increasing grain length and thermotolerance. These findings provide insights into TTL1 as a novel potential target for development of high-yield and thermotolerant rice varieties. This article is protected by copyright. All rights reserved.}, }
@article {pmid37727231, year = {2023}, author = {Rios Galicia, B and Sáenz, JS and Yergaliyev, T and Camarinha-Silva, A and Seifert, J}, title = {Host specific adaptations of Ligilactobacillus aviarius to poultry.}, journal = {Current research in microbial sciences}, volume = {5}, number = {}, pages = {100199}, pmid = {37727231}, issn = {2666-5174}, abstract = {The genus Ligilactobacillus encompasses species adapted to vertebrate hosts and fermented food. Their genomes encode adaptations to the host lifestyle. Reports of gut microbiota from chicken and turkey gastrointestinal tract have shown a high persistence of Ligilactobacillus aviarius along the digestive system compared to other species found in the same host. However, its adaptations to poultry as a host has not yet been described. In this work, the pan-genome of Ligilactobacillus aviarius was explored to describe the functional adaptability to the gastrointestinal environment. The core genome is composed of 1179 gene clusters that are present at least in one copy that codifies to structural, ribosomal and biogenesis proteins. The rest of the identified regions were classified into three different functional clusters of orthologous groups (clusters) that codify carbohydrate metabolism, envelope biogenesis, viral defence mechanisms, and mobilome inclusions. The pan-genome of Ligilactobacillus aviarius is a closed pan-genome, frequently found in poultry and highly prevalent across chicken faecal samples. The genome of L. aviarius codifies different clusters of glycoside hydrolases and glycosyltransferases that mediate interactions with the host cells. Accessory features, such as antiviral mechanisms and prophage inclusions, variate amongst strains from different GIT sections. This information provides hints about the interaction of this species with viral particles and other bacterial species. This work highlights functional adaptability traits present in L. aviarius that make it a dominant key member of the poultry gut microbiota and enlightens the convergent ecological relation of this species to the poultry gut environment.}, }
@article {pmid37722405, year = {2023}, author = {Low, SJ and O'Neill, MT and Kerry, WJ and Krysiak, M and Papadakis, G and Whitehead, LW and Savic, I and Prestedge, J and Williams, L and Cooney, JP and Tran, T and Lim, CK and Caly, L and Towns, JM and Bradshaw, CS and Fairley, C and Chow, EPF and Chen, MY and Pellegrini, M and Pasricha, S and Williamson, DA}, title = {Rapid detection of monkeypox virus using a CRISPR-Cas12a mediated assay: a laboratory validation and evaluation study.}, journal = {The Lancet. Microbe}, volume = {}, number = {}, pages = {}, doi = {10.1016/S2666-5247(23)00148-9}, pmid = {37722405}, issn = {2666-5247}, abstract = {BACKGROUND: The 2022 outbreak of mpox (formerly known as monkeypox) led to the spread of monkeypox virus (MPXV) in over 110 countries, demanding effective disease management and surveillance. As current diagnostics rely largely on centralised laboratory testing, our objective was to develop a simple rapid point-of-care assay to detect MPXV in clinical samples using isothermal amplification coupled with CRISPR and CRISPR-associated protein (Cas) technology.
METHODS: In this proof-of-concept study, we developed a portable isothermal amplification CRISPR-Cas12a-based assay for the detection of MPXV. We designed a panel of 22 primer-guide RNA sets using pangenome and gene-agnostic approaches, and subsequently shortlisted the three sets producing the strongest signals for evaluation of analytical sensitivity and specificity using a fluorescence-based readout. The set displaying 100% specificity and the lowest limit of detection (LOD) was selected for further assay validation using both a fluorescence-based and lateral-flow readout. Assay specificity was confirmed using a panel of viral and bacterial pathogens. Finally, we did a blind concordance study on genomic DNA extracted from 185 clinical samples, comparing assay results with a gold-standard quantitative PCR (qPCR) assay. We identified the optimal time to detection and analysed the performance of the assay relative to qPCR using receiver operating characteristic (ROC) curves. We also assessed the compatibility with lateral-flow strips, both visually and computationally, where strips were interpreted blinded to the fluorescence results on the basis of the presence or absence of test bands.
FINDINGS: With an optimal run duration of approximately 45 min from isothermal amplification to CRISPR-assay readout, the MPXV recombinase polymerase amplification CRISPR-Cas12a-based assay with the selected primer-guide set had an LOD of 1 copy per μL and 100% specificity against tested viral pathogens. Blinded concordance testing of 185 clinical samples resulted in 100% sensitivity (95% CI 89·3-100) and 99·3% specificity (95% CI 95·7-100) using the fluorescence readout. For optimal time to detection by fluorescence readout, we estimated the areas under the ROC curve to be 0·98 at 2 min and 0·99 at 4 min. Lateral-flow strips had 100% sensitivity (89·3-100) and 98·6% specificity (94·7-100) with both visual and computational assessment. Overall, lateral-flow results were highly concordant with fluorescence-based readouts (179 of 185 tests, 96·8% concordant), with discrepancies associated with low viral load samples.
INTERPRETATION: Our assay for the diagnosis of mpox displayed good performance characteristics compared with qPCR. Although optimisation of the assay will be required before deployment, its usability and versatility present a potential solution to MPXV detection in low-resource and remote settings, as well as a means of community-based, on-site testing.
FUNDING: Victorian Medical Research Accelerator Fund and the Australian Government Department of Health.}, }
@article {pmid37714713, year = {2023}, author = {Dai, X and Bian, P and Hu, D and Luo, F and Huang, Y and Jiao, S and Wang, X and Gong, M and Li, R and Cai, Y and Wen, J and Yang, Q and Deng, W and Nanaei, HA and Wang, Y and Wang, F and Zhang, Z and Rosen, BD and Heller, R and Jiang, Y}, title = {A Chinese indicine pangenome reveals a wealth of novel structural variants introgressed from other Bos species.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277481.122}, pmid = {37714713}, issn = {1549-5469}, abstract = {Chinese indicine cattle harbor a much higher genetic diversity compared with other domestic cattle, but their genome architecture remains uninvestigated. Using PacBio HiFi sequencing data from 10 Chinese indicine cattle across southern China, we assembled 20 high-quality partially phased genomes and integrated them into a multiassembly graph containing 148.5 Mb (5.6%) of novel sequence. We identified 156,009 high-confidence nonredundant structural variants (SVs) and 206 SV hotspots spanning ∼195 Mb of gene-rich sequence. We detected 34,249 archaic introgressed fragments in Chinese indicine cattle covering 1.93 Gb (73.3%) of the genome. We inferred an average of 3.8%, 3.2%, 1.4%, and 0.5% of introgressed sequence originating, respectively, from banteng-like, kouprey-like, gayal-like, and gaur-like Bos species, as well as 0.6% of unknown origin. Introgression from multiple donors might have contributed to the genetic diversity of Chinese indicine cattle. Altogether, this study highlights the contribution of interspecies introgression to the genomic architecture of an important livestock population and shows how exotic genomic elements can contribute to the genetic variation available for selection.}, }
@article {pmid37710263, year = {2023}, author = {Zhu, Q and Dovletgeldiyev, A and Shen, C and Li, K and Hu, S and He, Z}, title = {Comparative genomic analysis of Fusobacterium nucleatum reveals high intra-species diversity and cgmlst marker construction.}, journal = {Gut pathogens}, volume = {15}, number = {1}, pages = {43}, pmid = {37710263}, issn = {1757-4749}, abstract = {BACKGROUND: Fusobacterium nucleatum is a one of the most important anaerobic opportunistic pathogens in the oral and intestinal tracts of human and animals. It can cause various diseases such as infections, Lemierre's syndrome, oral cancer and colorectal cancer. The comparative genomic studies on the population genome level, have not been reported.
RESULTS: We analyzed all publicly available Fusobacterium nucleatums' genomic data for a comparative genomic study, focusing on the pan-genomic features, virulence genes, plasmid genomes and developed cgmlst molecular markers. We found the pan-genome shows a clear open tendency and most of plasmids in Fusobacterium nucleatum are mainly transmitted intraspecifically.
CONCLUSIONS: Our comparative analysis of Fusobacterium nucleatum systematically revealed the open pan-genomic features and phylogenetic tree based on cgmlst molecular markers. What's more, we also identified common plasmid typing among genomes. We hope that our study will provide a theoretical basis for subsequent functional studies.}, }
@article {pmid37710174, year = {2023}, author = {Mahboob, S and Ullah, N and Farhan Ul Haque, M and Rauf, W and Iqbal, M and Ali, A and Rahman, M}, title = {Genomic characterization and comparative genomic analysis of HS-associated Pasteurella multocida serotype B:2 strains from Pakistan.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {546}, pmid = {37710174}, issn = {1471-2164}, support = {NRPU-7254//Higher Education Commision, Pakistan/ ; }, abstract = {BACKGROUND: Haemorrhagic septicaemia (HS) is a highly fatal and predominant disease in livestock, particularly cattle and buffalo in the tropical regions of the world. Pasteurella multocida (P. multocida), serotypes B:2 and E:2, are reported to be the main causes of HS wherein serotype B:2 is more common in Asian countries including Pakistan and costs heavy financial losses every year. As yet, very little molecular and genomic information related to the HS-associated serotypes of P. multocida isolated from Pakistan is available. Therefore, this study aimed to explore the characteristics of novel bovine isolates of P. multocida serotype B:2 at the genomic level and perform comparative genomic analysis of various P. multocida strains from Pakistan to better understand the genetic basis of pathogenesis and virulence.
RESULTS: To understand the genomic variability and pathogenomics, we characterized three HS-associated P. multocida serotype B:2 strains isolated from the Faisalabad (PM1), Peshawar (PM2) and Okara (PM3) districts of Punjab, Pakistan. Together with the other nine publicly available Pakistani-origin P. multocida strains and a reference strain Pm70, a comparative genomic analysis was performed. The sequenced strains were characterized as serotype B and belong to ST-122. The strains contain no plasmids; however, each strain contains at least two complete prophages. The pan-genome analysis revealed a higher number of core genes indicating a close resemblance to the studied genomes and very few genes (1%) of the core genome serve as a part of virulence, disease, and defense mechanisms. We further identified that studied P. multocida B:2 strains harbor common antibiotic resistance genes, specifically PBP3 and EF-Tu. Remarkably, the distribution of virulence factors revealed that OmpH and plpE were not present in any P. multocida B:2 strains while the presence of these antigens was reported uniformly in all serotypes of P. multocida.
CONCLUSION: This study's findings indicate the absence of OmpH and PlpE in the analyzed P. multocida B:2 strains, which are known surface antigens and provide protective immunity against P. multocida infection. The availability of additional genomic data on P. multocida B:2 strains from Pakistan will facilitate the development of localized therapeutic agents and rapid diagnostic tools specifically targeting HS-associated P. multocida B:2 strains.}, }
@article {pmid37695773, year = {2023}, author = {Le Naour-Vernet, M and Charriat, F and Gracy, J and Cros-Arteil, S and Ravel, S and Veillet, F and Meusnier, I and Padilla, A and Kroj, T and Cesari, S and Gladieux, P}, title = {Adaptive evolution in virulence effectors of the rice blast fungus Pyricularia oryzae.}, journal = {PLoS pathogens}, volume = {19}, number = {9}, pages = {e1011294}, doi = {10.1371/journal.ppat.1011294}, pmid = {37695773}, issn = {1553-7374}, abstract = {Plant pathogens secrete proteins called effectors that target host cellular processes to promote disease. Recently, structural genomics has identified several families of fungal effectors that share a similar three-dimensional structure despite remarkably variable amino-acid sequences and surface properties. To explore the selective forces that underlie the sequence variability of structurally-analogous effectors, we focused on MAX effectors, a structural family of effectors that are major determinants of virulence in the rice blast fungus Pyricularia oryzae. Using structure-informed gene annotation, we identified 58 to 78 MAX effector genes per genome in a set of 120 isolates representing seven host-associated lineages. The expression of MAX effector genes was primarily restricted to the early biotrophic phase of infection and strongly influenced by the host plant. Pangenome analyses of MAX effectors demonstrated extensive presence/absence polymorphism and identified gene loss events possibly involved in host range adaptation. However, gene knock-in experiments did not reveal a strong effect on virulence phenotypes suggesting that other evolutionary mechanisms are the main drivers of MAX effector losses. MAX effectors displayed high levels of standing variation and high rates of non-synonymous substitutions, pointing to widespread positive selection shaping the molecular diversity of MAX effectors. The combination of these analyses with structural data revealed that positive selection acts mostly on residues located in particular structural elements and at specific positions. By providing a comprehensive catalog of amino acid polymorphism, and by identifying the structural determinants of the sequence diversity, our work will inform future studies aimed at elucidating the function and mode of action of MAX effectors.}, }
@article {pmid37695632, year = {2023}, author = {Naveed, M and Mahmood, S and Aziz, T and Azeem, A and Hussain, I and Waseem, M and Ali, A and Alharbi, M and Alshammari, A and Alasmari, AF}, title = {Designing a novel chimeric multi-epitope vaccine subunit against Staphylococcus argenteus through artificial intelligence approach integrating pan-genome analysis, in vitro identification, and immunogenicity profiling.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-16}, doi = {10.1080/07391102.2023.2256881}, pmid = {37695632}, issn = {1538-0254}, abstract = {Staphylococcus argenteus is a newly identified pathogen that causes respiratory tract infections, skin infections, such as cellulitis, abscesses, and impetigo, and currently, there is no licensed vaccine available against it. To develop a vaccine against S. argenteus, a bacterial pan-genome analysis was applied to identify potential vaccine candidates. A total of 4908 core proteins were retrieved and utilized for identifying four proteins, including SG38 Panton-Valentine leukocidin LukS-PV protein, SG62 staphylococcal enterotoxin type A protein, SG39 enterotoxin B protein, and SG43 enterotoxin type C3 protein as potential vaccine candidates. Epitopes were predicted for these proteins using different types of B and T-cell epitope prediction tools, and only those with a non-toxic profile, antigenic, non-allergenic, and immunogenic were selected. The selected epitopes were linked to each other to form a multi-epitope vaccine construct, which was further linked to the PADRE sequence (AKFVAAWTLKAAA) and 50s ribosomal L7/L12 protein to enhance the vaccine's antigenicity. The three-dimensional structure of the vaccine construct was assessed to determine its binding affinity with key Toll-like receptor 9 (TLR-9) and Toll-like receptor 5 (TLR-5) immune cell receptors. Our findings demonstrate that the vaccine exhibits favorable binding interactions with these immune cell receptors, indicating its potential efficacy. Molecular dynamic simulations further confirmed the accessibility of vaccine epitopes to the host immune system, substantiating its ability to elicit protective immune responses. Taken together, this study highlights the promising candidacy of the modeled vaccine construct for future in vivo and in vitro experimental investigations.Communicated by Ramaswamy H. Sarma.}, }
@article {pmid37692398, year = {2023}, author = {Villacís, JE and Castelán-Sánchez, HG and Rojas-Vargas, J and Rodríguez-Cruz, UE and Albán, V and Reyes, JA and Meza-Rodríguez, PM and Dávila-Ramos, S and Villavicencio, F and Galarza, M and Gestal, MC}, title = {Emergence of Raoultella ornithinolytica in human infections from different hospitals in Ecuador with OXA-48-producing resistance.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1216008}, doi = {10.3389/fmicb.2023.1216008}, pmid = {37692398}, issn = {1664-302X}, abstract = {PURPOSE: The purpose of this study was to highlight the clinical and molecular features of 13 Raoultella ornithinolytica strains isolated from clinical environments in Ecuador, and to perform comparative genomics with previously published genomes of Raoultella spp. As Raoultella is primarily found in environmental, clinical settings, we focused our work on identifying mechanisms of resistance that can provide this bacterium an advantage to establish and persist in hospital environments.
METHODS: We analyzed 13 strains of Raoultella ornithinolytica isolated from patients with healthcare associated infections (HAI) in three hospitals in Quito and one in Santo Domingo de Los Tsáchilas, Ecuador, between November 2017 and April 2018. These isolates were subjected to phenotypic antimicrobial susceptibility testing, end-point polymerase chain reaction (PCR) to detect the presence of carbapenemases and whole-genome sequencing.
RESULTS: Polymerase chain reaction revealed that seven isolates were positive isolates for blaOXA-48 and one for blaKPC-2 gene. Of the seven strains that presented the blaOXA-48 gene, six harbored it on an IncFII plasmid, one was inserted into the bacterial chromosome. The blaKPC gene was detected in an IncM2/IncR plasmid. From the bioinformatics analysis, nine genomes had the gene blaOXA-48, originating from Ecuador. Moreover, all R. ornithinolytica strains contained the ORN-1 gene, which confers resistance for β-lactams, such as penicillins and cephalosporins. Comparative genome analysis of the strains showed that the pangenome of R. ornithinolytica is considered an open pangenome, with 27.77% of core genes, which could be explained by the fact that the antibiotic resistance genes in the ancestral reconstruction are relatively new, suggesting that this genome is constantly incorporating new genes.
CONCLUSION: These results reveal the genome plasticity of R. ornithinolytica, particularly in acquiring antibiotic-resistance genes. The genomic surveillance and infectious control of these uncommon species are important since they may contribute to the burden of antimicrobial resistance and human health.}, }
@article {pmid37690289, year = {2023}, author = {Sarker, P and Mitro, A and Hoque, H and Hasan, MN and Nurnabi Azad Jewel, GM}, title = {Identification of potential novel therapeutic drug target against Elizabethkingia anophelis by integrative pan and subtractive genomic analysis: An in silico approach.}, journal = {Computers in biology and medicine}, volume = {165}, number = {}, pages = {107436}, doi = {10.1016/j.compbiomed.2023.107436}, pmid = {37690289}, issn = {1879-0534}, abstract = {Elizabethkingia anophelis is a human pathogen responsible for severe nosocomial infections in neonates and immunocompromised patients. The significantly higher mortality rate from E. anophelis infections and the lack of available regimens highlight the critical need to explore novel drug targets. The current study investigated effective novel drug targets by employing a comprehensive in silico subtractive genomic approach integrated with pangenomic analysis of E. anophelis strains. A total of 2809 core genomic proteins were found by pangenomic analysis of non-paralogous proteins. Subsequently, 156 pathogen-specific, 442 choke point, 202 virulence factor, 53 antibiotic resistant and 119 host-pathogen interacting proteins were identified in E. anophelis. By subtractive genomic approach, at first 791 proteins were found to be indispensable for the survival of E. anophelis. 558 and 315 proteins were detected as non-homologous to human and gut microflora respectively. Following that 245 cytoplasmic, 245 novel, and 23 broad-spectrum targets were selected and finally four proteins were considered as potential therapeutic targets of E. anophelis based on highest degree score in PPI network. Among those, three proteins were subjected to molecular docking and subsequent MD simulation as one protein did not contain a plausible binding pocket with sufficient surface area and volume. All the complexes were found to be stable and compact in 100 ns molecular dynamics simulation studies as measured by RMSD, RMSF, and Rg. These three short-listed targets identified in this study may lead to the development of novel antimicrobials capable of curing infections and pave the way to prevent and control the disease progression caused by the deadly agent E. anophelis.}, }
@article {pmid37684624, year = {2023}, author = {Nageeb, WM and Hetta, HF}, title = {Pangenome analysis of Corynebacterium striatum: insights into a neglected multidrug-resistant pathogen.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {252}, pmid = {37684624}, issn = {1471-2180}, abstract = {BACKGROUND: Over the past two decades, Corynebacterium striatum has been increasingly isolated from clinical cultures with most isolates showing increased antimicrobial resistance (AMR) to last resort agents. Advances in the field of pan genomics would facilitate the understanding of the clinical significance of such bacterial species previously thought to be among commensals paving the way for identifying new drug targets and control strategies.
METHODS: We constructed a pan-genome using 310 genome sequences of C. striatum. Pan-genome analysis was performed using three tools including Roary, PIRATE, and PEPPAN. AMR genes and virulence factors have been studied in relation to core genome phylogeny. Genomic Islands (GIs), Integrons, and Prophage regions have been explored in detail.
RESULTS: The pan-genome ranges between a total of 5253-5857 genes with 2070 - 1899 core gene clusters. Some antimicrobial resistance genes have been identified in the core genome portion, but most of them were located in the dispensable genome. In addition, some well-known virulence factors described in pathogenic Corynebacterium species were located in the dispensable genome. A total of 115 phage species have been identified with only 44 intact prophage regions.
CONCLUSION: This study presents a detailed comparative pangenome report of C. striatum. The species show a very slowly growing pangenome with relatively high number of genes in the core genome contributing to lower genomic variation. Prophage elements carrying AMR and virulence elements appear to be infrequent in the species. GIs appear to offer a prominent role in mobilizing antibiotic resistance genes in the species and integrons occur at a frequency of 50% in the species. Control strategies should be directed against virulence and resistance determinants carried on the core genome and those frequently occurring in the accessory genome.}, }
@article {pmid37679681, year = {2023}, author = {Wang, Y and Xu, X and Chen, H and Yang, F and Xu, B and Wang, K and Liu, Q and Liang, G and Zhang, R and Jiao, X and Zhang, Y}, title = {Assessment of beneficial effects and identification of host adaptation-associated genes of Ligilactobacillus salivarius isolated from badgers.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {530}, pmid = {37679681}, issn = {1471-2164}, support = {PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; }, abstract = {BACKGROUND: Ligilactobacillus salivarius has been frequently isolated from the gut microbiota of humans and domesticated animals and has been studied as a candidate probiotic. Badger (Meles meles) is known as a "generalist" species that consumes complex foods and exhibits tolerance and resistance to certain pathogens, which can be partly attributed to the beneficial microbes such as L. salivarius in the gut microbiota. However, our understanding of the beneficial traits and genomic features of badger-originated L. salivarius remains elusive.
RESULTS: In this study, nine L. salivarius strains were isolated from wild badgers' feces, one of which exhibited good probiotic properties. Complete genomes of the nine L. salivarius strains were generated, and comparative genomic analysis was performed with the publicly available complete genomes of L. salivarius obtained from humans and domesticated animals. The strains originating from badgers harbored a larger genome, a higher number of protein-coding sequences, and functionally annotated genes than those originating from humans and chickens. The pan-genome phylogenetic tree demonstrated that the strains originating from badgers formed a separate clade, and totally 412 gene families (12.6% of the total gene families in the pan-genome) were identified as genes gained by the last common ancestor of the badger group. The badger group harbored significantly more gene families responsible for the degradation of complex carbohydrate substrates and production of polysaccharides than strains from other hosts; many of these were acquired by gene gain events.
CONCLUSIONS: A candidate probiotic and nine L. salivarius complete genomes were obtained from the badgers' gut microbiome, and several beneficial genes were identified to be specifically present in the badger-originated strains that were gained in the evolution. Our study provides novel insights into the adaptation of L. salivarius to the intestinal habitat of wild badgers and provides valuable strain and genome resources for the development of L. salivarius as a probiotic.}, }
@article {pmid37679363, year = {2023}, author = {Liu, F and Zhao, J and Sun, H and Xiong, C and Sun, X and Wang, X and Wang, Z and Jarret, R and Wang, J and Tang, B and Xu, H and Hu, B and Suo, H and Yang, B and Ou, L and Li, X and Zhou, S and Yang, S and Liu, Z and Yuan, F and Pei, Z and Ma, Y and Dai, X and Wu, S and Fei, Z and Zou, X}, title = {Genomes of cultivated and wild Capsicum species provide insights into pepper domestication and population differentiation.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {5487}, pmid = {37679363}, issn = {2041-1723}, support = {1855585//National Science Foundation (NSF)/ ; }, abstract = {Pepper (Capsicum spp.) is one of the earliest cultivated crops and includes five domesticated species, C. annuum var. annuum, C. chinense, C. frutescens, C. baccatum var. pendulum and C. pubescens. Here, we report a pepper graph pan-genome and a genome variation map of 500 accessions from the five domesticated Capsicum species and close wild relatives. We identify highly differentiated genomic regions among the domesticated peppers that underlie their natural variations in flowering time, characteristic flavors, and unique resistances to biotic and abiotic stresses. Domestication sweeps detected in C. annuum var. annuum and C. baccatum var. pendulum are mostly different, and the common domestication traits, including fruit size, shape and pungency, are achieved mainly through the selection of distinct genomic regions between these two cultivated species. Introgressions from C. baccatum into C. chinense and C. frutescens are detected, including those providing genetic sources for various biotic and abiotic stress tolerances.}, }
@article {pmid37676357, year = {2023}, author = {Huang, B and Yan, H and Sun, M and Jin, Y}, title = {Novel discovery in roles of structural variations and RWP-RK transcription factors in heat tolerance for pearl millet.}, journal = {Stress biology}, volume = {3}, number = {1}, pages = {12}, doi = {10.1007/s44154-023-00092-3}, pmid = {37676357}, issn = {2731-0450}, abstract = {Global warming adversely affects crop production worldwide. Massive efforts have been undertaken to study mechanisms regulating heat tolerance in plants. However, the roles of structural variations (SVs) in heat stress tolerance remain unclear. In a recent article, Yan et al. (Nat Genet 1-12, 2023) constructed the first pan-genome of pearl millet (Pennisetum glaucum) and identified key SVs linked to genes involved in regulating plant tolerance to heat stress for an important crop with a superior ability to thrive in extremely hot and arid climates. Through multi-omics analyses integrating by pan-genomics, comparative genomics, transcriptomics, population genetics and and molecular biological technologies, they found RWP-RK transcription factors cooperating with endoplasmic reticulum-related genes play key roles in heat tolerance in pearl millet. The results in this paper provided novel insights to advance the understanding of the genetic and genomic basis of heat tolerance and an exceptional resource for molecular breeding to improve heat tolerance in pearl millet and other crops.}, }
@article {pmid37676306, year = {2023}, author = {González-Gómez, JP and Lozano-Aguirre, LF and Medrano-Félix, JA and Chaidez, C and Gerba, CP and Betancourt, WQ and Castro-Del Campo, N}, title = {Evaluation of nuclear and mitochondrial phylogenetics for the subtyping of Cyclospora cayetanensis.}, journal = {Parasitology research}, volume = {}, number = {}, pages = {}, pmid = {37676306}, issn = {1432-1955}, abstract = {Cyclospora cayetanensis is an enteric coccidian parasite responsible for gastrointestinal disease transmitted through contaminated food and water. It has been documented in several countries, mostly with low-socioeconomic levels, although major outbreaks have hit developed countries. Detection methods based on oocyst morphology, staining, and molecular testing have been developed. However, the current MLST panel offers an opportunity for enhancement, as amplification of all molecular markers remains unfeasible in the majority of samples. This study aims to address this challenge by evaluating two approaches for analyzing the genetic diversity of C. cayetanensis and identifying reliable markers for subtyping: core homologous genes and mitochondrial genome analysis. A pangenome was constructed using 36 complete genomes of C. cayetanensis, and a haplotype network and phylogenetic analysis were conducted using 33 mitochondrial genomes. Through the analysis of the pangenome, 47 potential markers were identified, emphasizing the need for more sequence data to achieve comprehensive characterization. Additionally, the analysis of mitochondrial genomes revealed 19 single-nucleotide variations that can serve as characteristic markers for subtyping this parasite. These findings not only contribute to the selection of molecular markers for C. cayetanensis subtyping, but they also drive the knowledge toward the potential development of a comprehensive genotyping method for this parasite.}, }
@article {pmid37671027, year = {2023}, author = {Lee, H and Greer, SU and Pavlichin, DS and Zhou, B and Urban, AE and Weissman, T and , and Ji, HP}, title = {Pan-conserved segment tags identify ultra-conserved sequences across assemblies in the human pangenome.}, journal = {Cell reports methods}, volume = {3}, number = {8}, pages = {100543}, pmid = {37671027}, issn = {2667-2375}, abstract = {The human pangenome, a new reference sequence, addresses many limitations of the current GRCh38 reference. The first release is based on 94 high-quality haploid assemblies from individuals with diverse backgrounds. We employed a k-mer indexing strategy for comparative analysis across multiple assemblies, including the pangenome reference, GRCh38, and CHM13, a telomere-to-telomere reference assembly. Our k-mer indexing approach enabled us to identify a valuable collection of universally conserved sequences across all assemblies, referred to as "pan-conserved segment tags" (PSTs). By examining intervals between these segments, we discerned highly conserved genomic segments and those with structurally related polymorphisms. We found 60,764 polymorphic intervals with unique geo-ethnic features in the pangenome reference. In this study, we utilized ultra-conserved sequences (PSTs) to forge a link between human pangenome assemblies and reference genomes. This methodology enables the examination of any sequence of interest within the pangenome, using the reference genome as a comparative framework.}, }
@article {pmid37668148, year = {2023}, author = {Mentasti, M and David, S and Turton, J and Morgan, M and Turner, L and Westlake, J and Jenkins, J and Williams, C and Rey, S and Watkins, J and Daniel, V and Mitchell, S and Forbes, G and Wootton, M and Jones, L}, title = {Clonal expansion and rapid characterization of Klebsiella pneumoniae ST1788, an otherwise uncommon strain spreading in Wales, UK.}, journal = {Microbial genomics}, volume = {9}, number = {9}, pages = {}, doi = {10.1099/mgen.0.001104}, pmid = {37668148}, issn = {2057-5858}, abstract = {A multidrug-resistant strain of Klebsiella pneumoniae (Kp) sequence type (ST) 1788, an otherwise uncommon ST worldwide, was isolated from 65 patients at 11 hospitals and 11 general practices across South and West Wales, UK, between February 2019 and November 2021. A collection of 97 Kp ST1788 isolates (including 94 from Wales) was analysed to investigate the diversity and spread across Wales and to identify molecular marker(s) to aid development of a strain-specific real-time PCR. Whole genome sequencing (WGS) was performed with Illumina technology and the data were used to perform phylogenetic analyses. Pan-genome analysis of further Kp genome collections was used to identify an ST1788-specific gene target; a real-time PCR was then validated against a panel of 314 strains and 218 broth-enriched screening samples. Low genomic diversity was demonstrated amongst the 94 isolates from Wales. Evidence of spread within and across healthcare facilities was found. A yersiniabactin locus and the KL2 capsular locus were identified in 85/94 (90.4 %) and 94/94 (100 %) genomes respectively; bla SHV-232, bla TEM-1, bla CTX-M-15 and bla OXA-1 were simultaneously carried by 86/94 (91.5 %) isolates; 4/94 (4.3 %) isolates also carried bla OXA-48 carbapenemase. Aminoglycoside and fluoroquinolone resistance markers were found in 94/94 (100 %) and 86/94 (91.5 %) isolates respectively. The ST1788-specific real-time PCR was 100 % sensitive and specific. Our analyses demonstrated recent clonal expansion and spread of Kp ST1788 in the community and across healthcare facilities in South and West Wales with isolates carrying well-defined antimicrobial resistance and virulence markers. An ST1788-specific marker was also identified, enabling rapid and reliable preliminary characterization of isolates by real-time PCR. This study confirms the utility of WGS in investigating novel strains and in aiding proactive implementation of molecular tools to assist infection control specialists.}, }
@article {pmid37667515, year = {2023}, author = {Baker, JL}, title = {Illuminating the oral microbiome and its host interactions: recent Advancements in omics and bioinformatics technologies in the context of oral microbiome research.}, journal = {FEMS microbiology reviews}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsre/fuad051}, pmid = {37667515}, issn = {1574-6976}, abstract = {The oral microbiota has an enormous impact on human health, with oral dysbiosis now linked to many oral and systemic diseases. Recent advancements in sequencing, mass spectrometry, bioinformatics, computational biology, and machine learning are revolutionizing oral microbiome research, enabling analysis at an unprecedented scale and level of resolution using omics approaches. This review contains a comprehensive perspective of the current state-of-the-art tools available to perform genomics, metagenomics, phylogenomics, pangenomics, transcriptomics, proteomics, metabolomics, lipidomics, and multi-omics analysis on (all) microbiomes, and then provides examples of how the techniques have been applied to research of the oral microbiome, specifically. Key findings of these studies and remaining challenges for the field are highlighted. Although the methods discussed here are placed in the context of their contributions to oral microbiome research specifically, they are pertinent to the study of any microbiome, and the intended audience of this includes researchers would simply like to get an introduction to microbial omics and/or an update on the latest omics methods. Continued research of the oral microbiota using omics approaches is crucial and will lead to dramatic improvements in human health, longevity, and quality of life.}, }
@article {pmid37662009, year = {2023}, author = {Li, Z and Zhou, X and Liao, D and Liu, R and Zhao, X and Wang, J and Zhong, Q and Zeng, Z and Peng, Y and Tan, Y and Yang, Z}, title = {Comparative genomics and DNA methylation analysis of Pseudomonas aeruginosa clinical isolate PA3 by single-molecule real-time sequencing reveals new targets for antimicrobials.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1180194}, pmid = {37662009}, issn = {2235-2988}, abstract = {INTRODUCTION: Pseudomonas aeruginosa (P.aeruginosa) is an important opportunistic pathogen with broad environmental adaptability and complex drug resistance. Single-molecule real-time (SMRT) sequencing technique has longer read-length sequences, more accuracy, and the ability to identify epigenetic DNA alterations.
METHODS: This study applied SMRT technology to sequence a clinical strain P. aeruginosa PA3 to obtain its genome sequence and methylation modification information. Genomic, comparative, pan-genomic, and epigenetic analyses of PA3 were conducted.
RESULTS: General genome annotations of PA3 were discovered, as well as information about virulence factors, regulatory proteins (RPs), secreted proteins, type II toxin-antitoxin (TA) pairs, and genomic islands. A genome-wide comparison revealed that PA3 was comparable to other P. aeruginosa strains in terms of identity, but varied in areas of horizontal gene transfer (HGT). Phylogenetic analysis showed that PA3 was closely related to P. aeruginosa 60503 and P. aeruginosa 8380. P. aeruginosa's pan-genome consists of a core genome of roughly 4,300 genes and an accessory genome of at least 5,500 genes. The results of the epigenetic analysis identified one main methylation sites, N6-methyladenosine (m6A) and 1 motif (CATNNNNNNNTCCT/AGGANNNNNNNATG). 16 meaningful methylated sites were picked. Among these, purH, phaZ, and lexA are of great significance playing an important role in the drug resistance and biological environment adaptability of PA3, and the targeting of these genes may benefit further antibacterial studies.
DISUCSSION: This study provided a detailed visualization and DNA methylation information of the PA3 genome and set a foundation for subsequent research into the molecular mechanism of DNA methyltransferase-controlled P. aeruginosa pathogenicity.}, }
@article {pmid37659733, year = {2023}, author = {Sharma, N and Raman, H and Wheeler, D and Kalenahalli, Y and Sharma, R}, title = {Data-driven approaches to improve water-use efficiency and drought resistance in crop plants.}, journal = {Plant science : an international journal of experimental plant biology}, volume = {}, number = {}, pages = {111852}, doi = {10.1016/j.plantsci.2023.111852}, pmid = {37659733}, issn = {1873-2259}, abstract = {With the increasing population, there lies a pressing demand for food, feed and fibre, while the changing climatic conditions pose severe challenges for agricultural production worldwide. Water is the lifeline for crop production; thus, enhancing crop water-use efficiency (WUE) and improving drought resistance in crop varieties are crucial for overcoming these challenges. Genetically-driven improvements in yield, WUE and drought tolerance traits can buffer the worst effects of climate change on crop production in dry areas. While traditional crop breeding approaches have delivered impressive results in increasing yield, the methods remain time-consuming and are often limited by the existing allelic variation present in the germplasm. Significant advances in breeding and high-throughput omics technologies in parallel with smart agriculture practices have created avenues to dramatically speed up the process of trait improvement by leveraging the vast volumes of genomic and phenotypic data. For example, individual genome and pan-genome assemblies, along with transcriptomic, metabolomic and proteomic data from germplasm collections, characterised at phenotypic levels, could be utilised to identify marker-trait associations and superior haplotypes for crop genetic improvement. In addition, these omics approaches enable the identification of genes involved in pathways leading to the expression of a trait, thereby providing an understanding of the genetic, physiological and biochemical basis of trait variation. These data-driven gene discoveries and validation approaches are essential for crop improvement pipelines, including genomic breeding, speed breeding and gene editing. Herein, we provide an overview of prospects presented using big data-driven approaches (including artificial intelligence and machine learning) to harness new genetic gains for breeding programs and develop drought-tolerant crop varieties with favourable WUE and high-yield potential traits.}, }
@article {pmid37655941, year = {2023}, author = {Meyer, S and Laval, L and Pimenta, M and González-Flores, Y and Gaschet, M and Couvé-Deacon, E and Barraud, O and Dagot, C and Ploy, MC}, title = {[Tracking transfers of resistance-carrying bacteria between animals, humans and the environment].}, journal = {Comptes rendus biologies}, volume = {}, number = {}, pages = {}, doi = {10.5802/crbiol.114}, pmid = {37655941}, issn = {1768-3238}, abstract = {The fight against antibiotic resistance must incorporate the "One Health" concept to be effective. This means having a holistic approach embracing the different ecosystems, human, animal, and environment. Transfers of resistance genes may exist between these three domains and different stresses related to the exposome may influence these transfers. Various targeted or pan-genomic molecular biology techniques can be used to better characterise the dissemination of bacterial clones and to identify exchanges of genes and mobile genetic elements between ecosystems.}, }
@article {pmid37653687, year = {2023}, author = {Dixon, TA and Walker, RSK and Pretorius, IS}, title = {Visioning synthetic futures for yeast research within the context of current global techno-political trends.}, journal = {Yeast (Chichester, England)}, volume = {}, number = {}, pages = {}, doi = {10.1002/yea.3897}, pmid = {37653687}, issn = {1097-0061}, support = {//Australian Research Council/ ; }, abstract = {Yeast research is entering into a new period of scholarship, with new scientific tools, new questions to ask and new issues to consider. The politics of emerging and critical technology can no longer be separated from the pursuit of basic science in fields, such as synthetic biology and engineering biology. Given the intensifying race for technological leadership, yeast research is likely to attract significant investment from government, and that it offers huge opportunities to the curious minded from a basic research standpoint. This article provides an overview of new directions in yeast research with a focus on Saccharomyces cerevisiae, and places these trends in their geopolitical context. At the highest level, yeast research is situated within the ongoing convergence of the life sciences with the information sciences. This convergent effect is most strongly pronounced in areas of AI-enabled tools for the life sciences, and the creation of synthetic genomes, minimal genomes, pan-genomes, neochromosomes and metagenomes using computer-assisted design tools and methodologies. Synthetic yeast futures encompass basic and applied science questions that will be of intense interest to government and nongovernment funding sources. It is essential for the yeast research community to map and understand the context of their research to ensure their collaborations turn global challenges into research opportunities.}, }
@article {pmid37646934, year = {2023}, author = {Bayer, PE and Edwards, D}, title = {Investigating Pangenome Graphs Using Wheat Panache.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2703}, number = {}, pages = {23-29}, pmid = {37646934}, issn = {1940-6029}, abstract = {Pangenome graphs quickly become the central data structure representing the diversity of variation we see across related genomes. Pangenome graphs have been published for some species, including plants of agronomic interest. However, visualizing these graphs is not easy as the graphs are large, and variants within these graphs are complex. Tools are needed to visualize graph data structures. Here, we present a workflow to search and visualize a wheat pangenome graph using Wheat Panache. The approach presented assists researchers interested in wheat genomics.}, }
@article {pmid37645952, year = {2023}, author = {McLaughlin, M and Fiebig, A and Crosson, S}, title = {XRE Transcription Factors Conserved in Caulobacter and φCbK Modulate Adhesin Development and Phage Production.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.08.20.554034}, pmid = {37645952}, abstract = {Upon infection, transcriptional shifts in both a host bacterium and its invading phage determine host and viral fitness. The xenobiotic response element (XRE) family of transcription factors (TFs), which are commonly encoded by bacteria and phages, regulate diverse features of bacterial cell physiology and impact phage infection dynamics. Through a pangenome analysis of Caulobacter species isolated from soil and aquatic ecosystems, we uncovered an apparent radiation of a paralogous XRE TF gene cluster, several of which have established functions in the regulation of holdfast adhesin development and biofilm formation in C. crescentus . We further discovered related XRE TFs across the class Alphaproteobacteria and its phages, including the φCbK Caulophage, suggesting that members of this gene cluster impact host-phage interactions. Here we show that that a closely related group of XRE proteins, encoded by both C. crescentus and φCbK, can form heteromeric associations and control the transcription of a common gene set, influencing processes including holdfast development and the production of φCbK virions. The φCbK XRE paralog, tgrL , is highly expressed at the earliest stages of infection and can directly repress transcription of hfiA , a potent holdfast inhibitor, and gafYZ , a transcriptional activator of prophage-like gene transfer agents (GTAs) encoded on the C. crescentus chromosome. XRE proteins encoded from the C. crescentus chromosome also directly repress gafYZ transcription, revealing a functionally redundant set of host regulators that may protect against spurious production of GTA particles and inadvertent cell lysis. Deleting host XRE transcription factors reduced φCbK burst size, while overexpressing these genes or φCbK tgrL rescued this burst defect. We conclude that an XRE TF gene cluster, shared by C. crescentus and φCbK, plays an important role in adhesion regulation under phage-free conditions, and influences host-phage dynamics during infection.}, }
@article {pmid37645873, year = {2023}, author = {Shivakumar, VS and Ahmed, OY and Kovaka, S and Zakeri, M and Langmead, B}, title = {Sigmoni: classification of nanopore signal with a compressed pangenome index.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.08.15.553308}, pmid = {37645873}, abstract = {Improvements in nanopore sequencing necessitate efficient classification methods, including pre-filtering and adaptive sampling algorithms that enrich for reads of interest. Signal-based approaches circumvent the computational bottleneck of basecalling. But past methods for signal-based classification do not scale efficiently to large, repetitive references like pangenomes, limiting their utility to partial references or individual genomes. We introduce Sigmoni: a rapid, multiclass classification method based on the r -index that scales to references of hundreds of Gbps. Sigmoni quantizes nanopore signal into a discrete alphabet of picoamp ranges. It performs rapid, approximate matching using matching statistics, classifying reads based on distributions of picoamp matching statistics and co-linearity statistics. Sigmoni is 10-100 × faster than previous methods for adaptive sampling in host depletion experiments with improved accuracy, and can query reads against large microbial or human pangenomes.}, }
@article {pmid37644736, year = {2023}, author = {Le, VV and Ko, SR and Kang, M and Jeong, S and Oh, HM and Ahn, CY}, title = {Comparative Genome analysis of the Genus Curvibacter and the Description of Curvibacter microcysteis sp. nov. and Curvibacter cyanobacteriorum sp. nov., Isolated from Fresh Water during the Cyanobacterial Bloom Period.}, journal = {Journal of microbiology and biotechnology}, volume = {33}, number = {11}, pages = {1-10}, doi = {10.4014/jmb.2306.06017}, pmid = {37644736}, issn = {1738-8872}, abstract = {The three Gram-negative, catalase- and oxidase-positive bacterial strains RS43T, HBC28, and HBC61[T], were isolated from fresh water and subjected to a polyphasic study. Comparison of 16S rRNA gene sequence initially indicated that strains RS43[T], HBC28, and HBC61[T] were closely related to species of genus Curvibacter and shared the highest sequence similarity of 98.14%, 98.21%, and 98.76%, respectively, with Curvibacter gracilis 7-1[T]. Phylogenetic analysis based on genome sequences placed all strains within the genus Curvibacter. The average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values between the three strains and related type strains supported their recognition as two novel genospecies in the genus Curvibacter. Comparative genomic analysis revealed that the genus possessed an open pangenome. Based on KEGG BlastKOALA analyses, Curvibacter species have the potential to metabolize benzoate, phenylacetate, catechol, and salicylate, indicating their potential use in the elimination of these compounds from the water systems. The results of polyphasic characterization indicated that strain RS43T and HBC61[T] represent two novel species, for which the name Curvibacter microcysteis sp. nov. (type strain RS43[T] (=KCTC 92793T=LMG 32714[T]) and Curvibacter cyanobacteriorum sp. nov. (type strain HBC61[T] =KCTC 92794[T] =LMG 32713[T]) are proposed.}, }
@article {pmid37639729, year = {2023}, author = {Prajapati, A and Yogisharadhya, R and Mohanty, NN and Mendem, SK and Chanda, MM and Siddaramappa, S and Shivachandra, SB}, title = {Comparative genome analysis of Pasteurella multocida strains of porcine origin.}, journal = {Genome}, volume = {}, number = {}, pages = {}, doi = {10.1139/gen-2023-0021}, pmid = {37639729}, issn = {1480-3321}, abstract = {Pasteurella multocida causes acute/chronic pasteurellosis in porcine resulting in considerable economic losses globally. The draft genomes of two Indian strains NIVEDIPm17 (serogroup D) and NIVEDIPm36 (serogroup A) were sequenced. A total of 2182- 2284 coding sequences (CDSs) were predicted along with 5-6 rRNA and 45-46 tRNA genes in the genomes. Multi locus sequence analysis and LPS genotyping showed the presence of ST50: genotype 07 and ST74: genotype 06 in NIVEDIPm17 and NIVEDIPm36, respectively. Pangenome analysis of 61 strains showed the presence of 1653 core genes, 167 soft core genes, 750 shell genes, and 1820 cloud genes. Analysis of virulence-associated genes in 61 genomes indicated the presence of nanB, exbB, exbD. ptfA, ompA, ompH, fur, plpB, fimA, sodA, sodC, tonB, and omp87 in all strains. The 61 genomes contained genes encoding tetracycline (54%), streptomycin (48%), sulphonamide (28%), tigecycline (25%), chloramphenicol (21%), amikacin (7%), cephalosporin (5%) and trimethoprim (5%) resistance. MLST revealed that ST50 was the most common (34%), followed by ST74 (26%), ST13 (24%), ST287 (5%), ST09 (5%), ST122 (3%), and ST07 (2%). SNP and core genome-based phylogenetic analysis clustered the strains in to 3 major clusters. In conclusion, we described the various virulence factors, mobile genetic elements and antimicrobial resistance genes in pangenome of P. multocida of porcine origin besides a rare presence of LPS genotype 7 in serogroup D..}, }
@article {pmid37636268, year = {2023}, author = {Yang, Z and Guarracino, A and Biggs, PJ and Black, MA and Ismail, N and Wold, JR and Merriman, TR and Prins, P and Garrison, E and de Ligt, J}, title = {Pangenome graphs in infectious disease: a comprehensive genetic variation analysis of Neisseria meningitidis leveraging Oxford Nanopore long reads.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1225248}, pmid = {37636268}, issn = {1664-8021}, abstract = {Whole genome sequencing has revolutionized infectious disease surveillance for tracking and monitoring the spread and evolution of pathogens. However, using a linear reference genome for genomic analyses may introduce biases, especially when studies are conducted on highly variable bacterial genomes of the same species. Pangenome graphs provide an efficient model for representing and analyzing multiple genomes and their variants as a graph structure that includes all types of variations. In this study, we present a practical bioinformatics pipeline that employs the PanGenome Graph Builder and the Variation Graph toolkit to build pangenomes from assembled genomes, align whole genome sequencing data and call variants against a graph reference. The pangenome graph enables the identification of structural variants, rearrangements, and small variants (e.g., single nucleotide polymorphisms and insertions/deletions) simultaneously. We demonstrate that using a pangenome graph, instead of a single linear reference genome, improves mapping rates and variant calling for both simulated and real datasets of the pathogen Neisseria meningitidis. Overall, pangenome graphs offer a promising approach for comparative genomics and comprehensive genetic variation analysis in infectious disease. Moreover, this innovative pipeline, leveraging pangenome graphs, can bridge variant analysis, genome assembly, population genetics, and evolutionary biology, expanding the reach of genomic understanding and applications.}, }
@article {pmid37630674, year = {2023}, author = {Aguirre-Sánchez, JR and Quiñones, B and Ortiz-Muñoz, JA and Prieto-Alvarado, R and Vega-López, IF and Martínez-Urtaza, J and Lee, BG and Chaidez, C}, title = {Comparative Genomic Analyses of Virulence and Antimicrobial Resistance in Citrobacter werkmanii, an Emerging Opportunistic Pathogen.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, doi = {10.3390/microorganisms11082114}, pmid = {37630674}, issn = {2076-2607}, support = {CRIS Project Number 2030-42000-055-00D//United States Department of Agriculture (USDA), Agricultural Research Service (ARS)/ ; Laboratorio Nacional para la Investigación en Inocuidad Alimentaria (LANIIA)//Centro de Investigación y Desarrollo A. C. (CIAD) in Culiacán, Sinaloa/ ; }, abstract = {Citrobacter werkmanii is an emerging and opportunistic human pathogen found in developing countries and is a causative agent of wound, urinary tract, and blood infections. The present study conducted comparative genomic analyses of a C. werkmanii strain collection from diverse geographical locations and sources to identify the relevant virulence and antimicrobial resistance genes. Pangenome analyses divided the examined C. werkmanii strains into five distinct clades; the subsequent classification identified genes with functional roles in carbohydrate and general metabolism for the core genome and genes with a role in secretion, adherence, and the mobilome for the shell and cloud genomes. A maximum-likelihood phylogenetic tree with a heatmap, showing the virulence and antimicrobial genes' presence or absence, demonstrated the presence of genes with functional roles in secretion systems, adherence, enterobactin, and siderophore among the strains belonging to the different clades. C. werkmanii strains in clade V, predominantly from clinical sources, harbored genes implicated in type II and type Vb secretion systems as well as multidrug resistance to aminoglycoside, beta-lactamase, fluoroquinolone, phenicol, trimethoprim, macrolides, sulfonamide, and tetracycline. In summary, these comparative genomic analyses have demonstrated highly pathogenic and multidrug-resistant genetic profiles in C. werkmanii strains, indicating a virulence potential for this commensal and opportunistic human pathogen.}, }
@article {pmid37630640, year = {2023}, author = {van der Lee, TAJ and van Gent-Pelzer, MPE and Jonkheer, EM and Brankovics, B and Houwers, IM and van der Wolf, JM and Bonants, PJM and van Duivenbode, I and Vreeburg, RAM and Nas, M and Smit, S}, title = {An Efficient Triplex TaqMan Quantitative PCR to Detect a Blackleg-Causing Lineage of Pectobacterium brasiliense in Potato Based on a Pangenome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, doi = {10.3390/microorganisms11082080}, pmid = {37630640}, issn = {2076-2607}, support = {TU-16022//Dutch Ministry of Agriculture, Nature and Food Safety/ ; }, abstract = {P. brasiliense is an important bacterial pathogen causing blackleg (BL) in potatoes. Nevertheless, P. brasiliense is often detected in seed lots that do not develop any of the typical blackleg symptoms in the potato crop when planted. Field bioassays identified that P. brasiliense strains can be categorized into two distinct classes, some able to cause blackleg symptoms and some unable to do it. A comparative pangenomic approach was performed on 116 P. brasiliense strains, of which 15 were characterized as BL-causing strains and 25 as non-causative. In a genetically homogeneous clade comprising all BL-causing P. brasiliense strains, two genes only present in the BL-causing strains were identified, one encoding a predicted lysozyme inhibitor Lprl (LZI) and one encoding a putative Toll/interleukin-1 receptor (TIR) domain-containing protein. TaqMan assays for the specific detection of BL-causing P. brasiliense were developed and integrated with the previously developed generic P. brasiliense assay into a triplex TaqMan assay. This simultaneous detection makes the scoring more efficient as only a single tube is needed, and it is more robust as BL-causing strains of P. brasiliense should be positive for all three assays. Individual P. brasiliense strains were found to be either positive for all three assays or only for the P. brasiliense assay. In potato samples, the mixed presence of BL-causing and not BL-causing P. brasiliense strains was observed as shown by the difference in Ct value of the TaqMan assays. However, upon extension of the number of strains, it became clear that in recent years additional BL-causing lineages of P. brasiliense were detected for which additional assays must be developed.}, }
@article {pmid37630590, year = {2023}, author = {Mevada, V and Patel, R and Dudhagara, P and Chaudhari, R and Vohra, M and Khan, V and J H Shyu, D and Chen, YY and Zala, D}, title = {Whole Genome Sequencing and Pan-Genomic Analysis of Multidrug-Resistant Vibrio cholerae VC01 Isolated from a Clinical Sample.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, doi = {10.3390/microorganisms11082030}, pmid = {37630590}, issn = {2076-2607}, abstract = {Cholera, a disease caused by the Vibrio cholerae bacteria, threatens public health worldwide. The organism mentioned above has a significant historical record of being identified as a prominent aquatic environmental pollutant capable of adapting its phenotypic and genotypic traits to react to host patients effectively. This study aims to elucidate the heterogeneity of the sporadic clinical strain of V. cholerae VC01 among patients residing in Silvasa. The study involved conducting whole-genome sequencing of the isolate obtained from patients exhibiting symptoms, including those not commonly observed in clinical practice. The strain was initially identified through a combination of biochemical analysis, microscopy, and 16s rRNA-based identification, followed by type strain-based identification. The investigation demonstrated the existence of various genetic alterations and resistance profiles against multiple drugs, particularly chloramphenicol (catB9), florfenicol (floR), oxytetracycline (tet(34)), sulfonamide (sul2), and Trimethoprim (dfrA1). The pan-genomic analysis indicated that 1099 distinct clusters were detected within the genome sequences of recent isolates worldwide. The present study helps to establish a correlation between the mutation and the coexistence of antimicrobial resistance toward current treatment.}, }
@article {pmid37628823, year = {2023}, author = {Li, H and Song, K and Zhang, X and Wang, D and Dong, S and Liu, Y and Yang, L}, title = {Application of Multi-Perspectives in Tea Breeding and the Main Directions.}, journal = {International journal of molecular sciences}, volume = {24}, number = {16}, pages = {}, doi = {10.3390/ijms241612643}, pmid = {37628823}, issn = {1422-0067}, support = {SDAIT-25-01//The Foundation of Innovation Team Project for Modern Agricultural Industrious Technology System of Shandong Province/ ; YDZX2022123//Special Funds for Local Scientific and Technological Development Guided by the Central Government/ ; }, abstract = {Tea plants are an economically important crop and conducting research on tea breeding contributes to enhancing the yield and quality of tea leaves as well as breeding traits that satisfy the requirements of the public. This study reviews the current status of tea plants germplasm resources and their utilization, which has provided genetic material for the application of multi-omics, including genomics and transcriptomics in breeding. Various molecular markers for breeding were designed based on multi-omics, and available approaches in the direction of high yield, quality and resistance in tea plants breeding are proposed. Additionally, future breeding of tea plants based on single-cellomics, pangenomics, plant-microbe interactions and epigenetics are proposed and provided as references. This study aims to provide inspiration and guidance for advancing the development of genetic breeding in tea plants, as well as providing implications for breeding research in other crops.}, }
@article {pmid37623951, year = {2023}, author = {Pitta, JLLP and Bezerra, MF and Fernandes, DLRDS and Block, T and Novaes, AS and Almeida, AMP and Rezende, AM}, title = {Genomic Analysis of Yersinia pestis Strains from Brazil: Search for Virulence Factors and Association with Epidemiological Data.}, journal = {Pathogens (Basel, Switzerland)}, volume = {12}, number = {8}, pages = {}, doi = {10.3390/pathogens12080991}, pmid = {37623951}, issn = {2076-0817}, abstract = {Yersinia pestis, the etiological agent of the plague, is considered a genetically homogeneous species. Brazil is currently in a period of epidemiological silence but plague antibodies are still detected in sentinel animals, suggesting disease activity in the sylvatic cycle. The present study deployed an in silico approach to analyze virulence factors among 407 Brazilian genomes of Y. pestis belonging to the Fiocruz Collection (1966-1997). The pangenome analysis associated several known virulence factors of Y. pestis in clades according to the presence or absence of genes. Four main strain clades (C, E, G, and H) exhibited the absence of various virulence genes. Notably, clade G displayed the highest number of absent genes, while clade E showed a significant absence of genes related to the T6SS secretion system and clade H predominantly demonstrated the absence of plasmid-related genes. These results suggest attenuation of virulence in these strains over time. The cgMLST analysis associated genomic and epidemiological data highlighting evolutionary patterns related to the isolation years and outbreaks of Y. pestis in Brazil. Thus, the results contribute to the understanding of the genetic diversity and virulence within Y. pestis and the potential for utilizing genomic data in epidemiological investigations.}, }
@article {pmid37620118, year = {2023}, author = {Horsfield, ST and Tonkin-Hill, G and Croucher, NJ and Lees, JA}, title = {Accurate and fast graph-based pangenome annotation and clustering with ggCaller.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277733.123}, pmid = {37620118}, issn = {1549-5469}, abstract = {Bacterial genomes differ in both gene content and sequence mutations, which underlies extensive phenotypic diversity, including variation in susceptibility to antimicrobials or vaccine-induced immunity. To identify and quantify important variants, all genes within a population must be predicted, functionally annotated and clustered, representing the pangenome. Despite the volume of genome data available, gene prediction and annotation are currently conducted in isolation on individual genomes, which is computationally inefficient and frequently inconsistent across genomes. Here, we introduce the open-source software graph-gene-caller (ggCaller). ggCaller combines gene prediction, functional annotation, and clustering into a single workflow using population-wide de Bruijn Graphs, removing redundancy in gene annotation, and resulting in more accurate gene predictions and orthologue clustering. We applied ggCaller to simulated and real-world bacterial datasets containing hundreds or thousands of genomes, comparing it to current state-of-the-art tools. ggCaller has considerable speed-ups with equivalent or greater accuracy, particularly with datasets containing complex sources of error, such as assembly contamination or fragmentation. ggCaller is also an important extension to bacterial genome-wide association studies, enabling querying of annotated graphs for functional analyses. We highlight this application by functionally annotating DNA sequences with significant associations to tetracycline and macrolide resistance in Streptococcus pneumoniae, identifying key resistance determinants that were missed when using only a single reference genome. ggCaller is a novel bacterial genome analysis tool with applications in bacterial evolution and epidemiology.}, }
@article {pmid37612339, year = {2023}, author = {Jang, J and Jung, J and Lee, YH and Lee, S and Baik, M and Kim, H}, title = {Chromosome-level genome assembly of Korean native cattle and pangenome graph of 14 Bos taurus assemblies.}, journal = {Scientific data}, volume = {10}, number = {1}, pages = {560}, pmid = {37612339}, issn = {2052-4463}, support = {NRF-2021R1A2C2094111//National Research Foundation of Korea (NRF)/ ; }, abstract = {This study presents the first chromosome-level genome assembly of Hanwoo, an indigenous Korean breed of Bos taurus taurus. This is the first genome assembly of Asian taurus breed. Also, we constructed a pangenome graph of 14 B. taurus genome assemblies. The contig N50 was over 55 Mb, the scaffold N50 was over 89 Mb and a genome completeness of 95.8%, as estimated by BUSCO using the mammalian set, indicated a high-quality assembly. 48.7% of the genome comprised various repetitive elements, including DNAs, tandem repeats, long interspersed nuclear elements, and simple repeats. A total of 27,314 protein-coding genes were identified, including 25,302 proteins with inferred gene names and 2,012 unknown proteins. The pangenome graph of 14 B. taurus autosomes revealed 528.47 Mb non-reference regions in total and 61.87 Mb Hanwoo-specific regions. Our Hanwoo assembly and pangenome graph provide valuable resources for studying B. taurus populations.}, }
@article {pmid37610465, year = {2023}, author = {Szuhaj, M and Kakuk, B and Wirth, R and Rákhely, G and Kovács, KL and Bagi, Z}, title = {Regulation of the methanogenesis pathways by hydrogen at transcriptomic level in time.}, journal = {Applied microbiology and biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37610465}, issn = {1432-0614}, support = {2020-3.1.2-ZFR-KVG-2020-00009//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; K143198//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; FK123902//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; 2019-2.1.13-TÉT_IN-2020-00016//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; PD 132145//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; }, abstract = {The biomethane formation from 4 H2 + CO2 by pure cultures of two methanogens, Methanocaldococcus fervens and Methanobacterium thermophilum, has been studied. The goal of the study was to understand the regulation of the enzymatic steps associated with biomethane biosynthesis by H2, using metagenomic, pan-genomic, and transcriptomic approaches. Methanogenesis in the autotrophic methanogen M. fervens could be easily "switched off" and "switched on" by H2/CO2 within about an hour. In contrast, the heterotrophic methanogen M. thermophilum was practically insensitive to the addition of the H2/CO2 trigger although this methanogen also converted H2/CO2 to CH4. From practical points of view, the regulatory function of H2/CO2 suggests that in the power-to-gas (P2G) renewable excess electricity conversion and storage systems, the composition of the biomethane-generating methanogenic community is essential for sustainable operation. In addition to managing the specific hydrogenotrophic methanogenesis biochemistry, H2/CO2 affected several, apparently unrelated, metabolic pathways. The redox-regulated overall biochemistry and symbiotic relationships in the methanogenic communities should be explored in order to make the P2G technology more efficient. KEY POINTS : • Hydrogenotrophic methanogens may respond distinctly to H2/CO2 in bio-CH4 formation. • H2/CO2 can also activate metabolic routes, which are apparently unrelated to methanogenesis. • Sustainable conversion of the fluctuating renewable electricity to bio-CH4 is an option.}, }
@article {pmid37608158, year = {2023}, author = {Guo, Y}, title = {Pangenome and the diversity of potato species.}, journal = {Nature food}, volume = {4}, number = {8}, pages = {638}, doi = {10.1038/s43016-023-00830-w}, pmid = {37608158}, issn = {2662-1355}, }
@article {pmid37599459, year = {2023}, author = {Alsaiari, AA and Hakami, MA and Alotaibi, BS and Alkhalil, SS and Alkhorayef, N and Khan, K and Jalal, K}, title = {Delineating multi-epitopes vaccine designing from membrane protein CL5 against all monkeypox strains: a pangenome reverse vaccinology approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-22}, doi = {10.1080/07391102.2023.2248301}, pmid = {37599459}, issn = {1538-0254}, abstract = {The recently identified monkeypox virus (MPXV or mpox) is a zoonotic orthopox virus that infects humans and causes diseases with traits like smallpox. The world health organization (WHO) estimates that 3-6% of MPXV cases result in death. As it might impact everyone globally, like COVID, and become the next pandemic, the cure for this disease is important for global public health. The high incidence and disease ratio of MPXV necessitates immediate efforts to design a unique vaccine candidate capable of addressing MPXV diseases. Here, we used a computational pan-genome-based vaccine design strategy for all currently reported 19 MPXV strains acquired from different regions of the world. Thus, this study's objective was to develop a new and safe vaccine candidate against MPXV by targeting the membrane CL5 protein; identified after the pangenome analysis. Proteomics and reverse vaccinology have covered up all of the MPXV epitopes that would usually stimulate robust host immune responses. Following this, only two mapped (MHC-I, MHC-II, and B-cell) epitopes were observed to be extremely effective that can be used in the construction of CL5 protein vaccine candidates. The suggested vaccine (V5) candidate from eight vaccine models was shown to be antigenic, non-allergenic, and stable (with 213 amino acids). The vaccine's candidate efficacy was evaluated by using many in silico methods to predict, improve, and validate its 3D structure. Molecular docking and molecular dynamics simulations further reveal that the proposed vaccine candidate ensemble has a high interaction energy with the HLAs and TRL2/4 immunological receptors under study. Later, the vaccine sequence was used to generate an expression vector for the E. coli K12 strain. Further study uncovers that V5 was highly immunogenic because it produced robust primary, secondary, and tertiary immune responses. Eventually, the use of computer-aided vaccine designing may significantly reduce costs and speed up the process of developing vaccines. Although, the results of this research are promising, however, more research (experimental; in vivo, and in vitro studies) is needed to verify the biological efficacy of the proposed vaccine against MPXV.Communicated by Ramaswamy H. Sarma.}, }
@article {pmid37596715, year = {2023}, author = {Sun, Y and Zheng, C and Zhou, J and Zhen, M and Wei, X and Yan, X and Guo, X and Zheng, L and Shao, M and Li, C and Qin, D and Zhang, J and Xiong, L and Xing, J and Huang, B and Dong, Z and Cheng, P and Yu, G}, title = {Pathogen Profile of Klebsiella variicola, the Causative Agent of Banana Sheath Rot.}, journal = {Plant disease}, volume = {}, number = {}, pages = {PDIS09222018RE}, doi = {10.1094/PDIS-09-22-2018-RE}, pmid = {37596715}, issn = {0191-2917}, abstract = {Banana (Musa spp.) is an important fruit and food crop worldwide. In recent years, banana sheath rot has become a major problem in banana cultivation, causing plant death and substantial economic losses. Nevertheless, the pathogen profile of this disease has not been fully characterized. Klebsiella variicola is a versatile bacterium capable of colonizing different hosts, such as plants, humans, insects, and animals, and is recognized as an emerging pathogen in various hosts. In this study, we obtained 12 bacterial isolates from 12 different banana samples showing banana sheath rot in Guangdong and Guangxi Provinces, China. Phylogenetic analysis based on 16S rRNA sequences confirmed that all 12 isolates were K. variicola strains. We sequenced the genomes of these strains, performed comparative genomic analysis with other sequenced K. variicola strains, and found a lack of consistency in accessory gene content among these K. variicola strains. However, prediction based on the pan-genome of K. variicola revealed 22 unique virulence factors carried by the 12 pathogenic K. variicola isolates. Microbiome and microbial interaction network analysis of endophytes between the healthy tissues of diseased plants and healthy plants of two cultivars showed that Methanobacterium negatively interacts with Klebsiella in banana plants and that Herbaspirillum might indirectly inhibit Methanobacterium to promote Klebsiella growth. These results suggest that banana sheath rot is caused by the imbalance of plant endophytes and opportunistic pathogenic bacteria, providing an important basis for research and control of this disease.[Formula: see text] Copyright © 2023 The Author(s). This is an open access article distributed under the CC BY-NC-ND 4.0 International license.}, }
@article {pmid37596178, year = {2023}, author = {Mertz, P and Hentgen, V and Boursier, G and Delon, J and Georgin-Lavialle, S}, title = {[Monogenic auto-inflammatory diseases associated with actinopathies: A review of the literature].}, journal = {La Revue de medecine interne}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.revmed.2023.06.005}, pmid = {37596178}, issn = {1768-3122}, abstract = {Auto-inflammatory diseases (AIDs) are diseases resulting from an inappropriate activation of innate immunity in the absence of any infection. The field of monogenic AIDs is constantly expanding, with the discovery of new pathologies and pathophysiological mechanisms thanks to pangenomic sequencing. Actinopathies with auto-inflammatory manifestations are a new emerging group of AIDs, linked to defects in the regulation of the actin cytoskeleton dynamics. These diseases most often begin in the neonatal period and combine to varying degrees a more or less severe primary immune deficiency, cytopenias (especially thrombocytopenia), auto-inflammatory manifestations (especially cutaneous and digestive), atopic and auto-immune manifestations. The diagnosis is to be evoked essentially in front of a cutaneous-digestive auto-inflammation picture of early onset, associated with a primary immune deficiency and thrombocytopenia or a tendency to bleed. Some of these diseases have specificities, including a risk of macrophagic activation syndrome or a tendency to atopy or lymphoproliferation. We propose here a review of the literature on these new diseases, with a proposal for a practical approach according to the main associated biological abnormalities and some clinical particularities. However, the diagnosis remains genetic, and several differential diagnoses must be considered. The pathophysiology of these diseases is not yet fully elucidated, and studies are needed to better clarify the inherent mechanisms that can guide the choice of therapies. In most cases, the severity of the picture indicates allogeneic marrow transplantation.}, }
@article {pmid37594286, year = {2023}, author = {Kim, M and Cha, IT and Lee, KE and Li, M and Park, SJ}, title = {Pangenome analysis provides insights into the genetic diversity, metabolic versatility, and evolution of the genus Flavobacterium.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0100323}, doi = {10.1128/spectrum.01003-23}, pmid = {37594286}, issn = {2165-0497}, abstract = {Members of the genus Flavobacterium are widely distributed and produce various polysaccharide-degrading enzymes. Many species in the genus have been isolated and characterized. However, few studies have focused on marine isolates or fish pathogens, and in-depth genomic analyses, particularly comparative analyses of isolates from different habitat types, are lacking. Here, we isolated 20 strains of the genus from various environments in South Korea and sequenced their full-length genomes. Combined with published sequence data, we examined genomic traits, evolution, environmental adaptation, and putative metabolic functions in total 187 genomes of isolated species in Flavobacterium categorized as marine, host-associated, and terrestrial including freshwater. A pangenome analysis revealed a correlation between genome size and coding or noncoding density. Flavobacterium spp. had high levels of diversity, allowing for novel gene repertories via recombination events. Defense-related genes only accounted for approximately 3% of predicted genes in all Flavobacterium genomes. While genes involved in metabolic pathways did not differ with respect to isolation source, there was substantial variation in genomic traits; in particular, the abundances of tRNAs and rRNAs were higher in the host-associdated group than in other groups. One genome in the host-associated group contained a Microviridae prophage closely related to an enterobacteria phage. The proteorhodopsin gene was only identified in four terrestrial strains isolated for this study. Furthermore, recombination events clearly influenced genomic diversity and may contribute to the response to environmental stress. These findings shed light on the high genetic variation in Flavobacterium and functional roles in diverse ecosystems as a result of their metabolic versatility. IMPORTANCE The genus Flavobacterium is a diverse group of bacteria that are found in a variety of environments. While most species of this genus are harmless and utilize organic substrates such as proteins and polysaccharides, some members may play a significant role in the cycling for organic substances within their environments. Nevertheless, little is known about the genomic dynamics and/or metabolic capacity of Flavobacterium. Here, we found that Flavobacterium species may have an open pangenome, containing a variety of diverse and novel gene repertoires. Intriguingly, we discovered that one genome (classified into host-associated group) contained a Microviridae prophage closely related to that of enterobacteria. Proteorhodopsin may be expressed under conditions of light or oxygen pressure in some strains isolated for this study. Our findings significantly contribute to the understanding of the members of the genus Flavobacterium diversity exploration and will provide a framework for the way for future ecological characterizations.}, }
@article {pmid37592233, year = {2023}, author = {Zhang, X and Xiao, L and Liu, J and Tian, Q and Xie, J}, title = {Trade-off in genome turnover events leading to adaptive evolution of Microcystis aeruginosa species complex.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {462}, pmid = {37592233}, issn = {1471-2164}, support = {32101368//National Natural Science Foundation of China/ ; 2022YFE0119600//National Key Research and Development Program of China/ ; }, abstract = {BACKGROUND: Numerous studies in the past have expanded our understanding of the genetic differences of global distributed cyanobacteria that originated around billions of years ago, however, unraveling how gene gain and loss drive the genetic evolution of cyanobacterial species, and the trade-off of these evolutionary forces are still the central but poorly understood issues.
RESULTS: To delineate the contribution of gene flow in mediating the hereditary differentiation and shaping the microbial evolution, a global genome-wide study of bloom-forming cyanobacterium, Microcystis aeruginosa species complex, provided robust evidence for genetic diversity, reflected by enormous variation in gene repertoire among various strains. Mathematical extrapolation showed an 'open' microbial pan-genome of M. aeruginosa species, since novel genes were predicted to be introduced after new genomes were sequenced. Identification of numerous horizontal gene transfer's signatures in genome regions of interest suggested that genome expansion via transformation and phage-mediated transduction across bacterial lineage as an evolutionary route may contribute to the differentiation of Microcystis functions (e.g., carbohydrate metabolism, amino acid metabolism, and energy metabolism). Meanwhile, the selective loss of some dispensable genes at the cost of metabolic versatility is as a mean of adaptive evolution that has the potential to increase the biological fitness.
CONCLUSIONS: Now that the recruitment of novel genes was accompanied by a parallel loss of some other ones, a trade-off in gene content may drive the divergent differentiation of M. aeruginosa genomes. Our study provides a genetic framework for the evolution of M. aeruginosa species and illustrates their possible evolutionary patterns.}, }
@article {pmid37587248, year = {2023}, author = {Pei, Z and Li, X and Cui, S and Yang, B and Lu, W and Zhao, J and Mao, B and Chen, W}, title = {Population genomics of Lacticaseibacillus paracasei: pan-genome, integrated prophage, antibiotic resistance, and carbohydrate utilization.}, journal = {World journal of microbiology & biotechnology}, volume = {39}, number = {10}, pages = {280}, pmid = {37587248}, issn = {1573-0972}, support = {32172173//National Natural Science Foundation of China/ ; 31972086//National Natural Science Foundation of China/ ; 2021YFD2100700//Key Technologies Research and Development Program/ ; }, abstract = {Lacticaseibacillus paracasei has beneficial effects on human health and holds promising potential as a probiotic for use in the development of functional foods, especially dairy products. This species can adapt to a variety of ecological niches and presents fundamental carbohydrate metabolism and tolerance to environmental stresses. However, the population structure, ecology, and antibiotic resistance of Lc. paracasei in diverse ecological niches are poorly understood. Reclassification of Lc. paracasei as a separate species of Lacticaseibacillus has stimulated renewed interest in its research, and a deeper interpretation of it will be important for screening strains beneficial to human health. Here, we collected 121 self-isolated and 268 publicly available Lc. paracasei genomes discussed how genomic approaches have advanced our understanding of its taxonomy, ecology, evolution, diversity, integrated prophage-related element distribution, antibiotic resistance, and carbohydrate utilization. Moreover, for the Lc. paracasei strains isolated in this study, we assessed the inducibility of integrated prophages in their genomes and determined the phenotypes that presented tolerance to multiple antibiotics to provide evidence for safety evaluations of Lc. paracasei during the fermentation processes.}, }
@article {pmid37580659, year = {2023}, author = {Ma, C and Li, M and Peng, H and Lan, M and Tao, L and Li, C and Wu, C and Bai, H and Zhong, Y and Zhong, S and Qin, R and Li, F and Li, J and He, J}, title = {Mesomycoplasma ovipneumoniae from goats with respiratory infection: pathogenic characteristics, population structure, and genomic features.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {220}, pmid = {37580659}, issn = {1471-2180}, support = {AB16380106//Guangxi key research and development plan/ ; 19-50-40-A-04//Guangxi Key Laboratory of Veterinary Biotechnology Independent Research Topic/ ; }, abstract = {BACKGROUND: Mycoplasma ovipneumoniae is a critical pathogen that causes respiratory diseases that threaten Caprini health and cause economic damage. A genome-wide study of M. ovipneumoniae will help understand the pathogenic characteristics of this microorganism.
RESULTS: Toxicological pathology and whole-genome sequencing of nine M. ovipneumoniae strains isolated from goats were performed using an epidemiological survey. These strains exhibited anterior ventral lung consolidation, typical of bronchopneumonia in goats. Average nucleotide identity and phylogenetic analysis based on whole-genome sequences showed that all M. ovipneumoniae strains clustered into two clades, largely in accordance with their geographical origins. The pan-genome of the 23 M. ovipneumoniae strains contained 5,596 genes, including 385 core, 210 soft core, and 5,001 accessory genes. Among these genes, two protein-coding genes were annotated as cilium adhesion and eight as paralog surface adhesins when annotated to VFDB, and no antibiotic resistance-related genes were predicted. Additionally, 23 strains carried glucosidase-related genes (ycjT and group_1595) and glucosidase-related genes (atpD_2), indicating that M. ovipneumoniae possesses a wide range of glycoside hydrolase activities.
CONCLUSIONS: The population structure and genomic features identified in this study will facilitate further investigations into the pathogenesis of M. ovipneumoniae and lay the foundation for the development of preventive and therapeutic methods.}, }
@article {pmid37580306, year = {2023}, author = {Alexandrov, N and Wang, T and Blair, L and Nadon, B and Sayer, D}, title = {HLA-OLI: A new MHC class I pseudogene and HLA-Y are located on a 60 kb indel in the human MHC between HLA-W and HLA-J.}, journal = {HLA}, volume = {}, number = {}, pages = {}, doi = {10.1111/tan.15180}, pmid = {37580306}, issn = {2059-2310}, abstract = {Analysis of publicly available whole-genome sequence data from the Human Pangenome Project and the 1000 Genomes Project has identified a DNA segment of approximately 60 kb in the major histocompatibility complex (MHC) between HLA-W and HLA-J that is present in some MHC haplotypes but not others. This DNA segment is largely repeat element-rich but includes the pseudogene HLA-Y, thus pinpointing the location of this pseudogene, and a new HLA class I sequence we have called HLA-OLI. HLA-OLI clusters phylogenetically with the HLA class I pseudogenes, HLA-P and HLA-W, and appears to have a similar genetic structure. The availability of whole-genome sequence data from diverse populations enables a detailed characterization of the MHC at the population level and will have implications for understanding MHC disease associations and the non-HLA MHC factors that impact unrelated hematopoietic cell transplant outcomes.}, }
@article {pmid37578072, year = {2023}, author = {Khan, K and Burki, S and Alsaiari, AA and Alhuthali, HM and Alharthi, NS and Jalal, K}, title = {A therapeutic epitopes-based vaccine engineering against Salmonella enterica XDR strains for typhoid fever: a Pan-vaccinomics approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-15}, doi = {10.1080/07391102.2023.2246587}, pmid = {37578072}, issn = {1538-0254}, abstract = {A prevalent food-borne pathogen, Salmonella enterica serotypes Typhi, is responsible for gastrointestinal and systemic infections globally. Salmonella vaccines are the most effective, however, producing a broad-spectrum vaccine remains challenging due to Salmonella's many serotypes. Efforts are urgently required to develop a novel vaccine candidate that can tackle all S. Typhi strains because of their high resistance to multiple kinds of antibiotics (particularly the XDR H58 strain). In this work, we used a computational pangenome-based vaccine design technique on all available (n = 119) S. Typhi reference genomes and identified one TonB-dependent siderophore receptor (WP_001034967.1) as highly conserved and prospective vaccine candidates from the predicted core genome (n = 3,351). The applied pan-proteomics and Immunoinformatic approaches help in the identification of four epitopes that may trigger adequate host body immune responses. Furthermore, the proposed vaccine ensemble demonstrates a stable binding conformation with the examined immunological receptor (HLAs and TRL2/4) and has large interaction energy determined via molecular docking and molecular dynamics simulation techniques. Eventually, an expression vector for the Escherichia. coli K12 strain was constructed from the vaccine sequence. Additional analysis revealed that the vaccine may help to elicit strong immune responses for typhoid infections, however, experimental analysis is required to verify the vaccine's effectiveness based on these results. Moreover, the applied computer-assisted vaccine design may considerably decrease vaccine development costs and speed up the process. The study's findings are intriguing, but they must be evaluated in the experimental labs to confirm the developed vaccine's biological efficiency against XDR S. Typhi.Communicated by Ramaswamy H. Sarma.}, }
@article {pmid37577683, year = {2023}, author = {Yocca, AE and Platts, A and Alger, E and Teresi, S and Mengist, MF and Benevenuto, J and Felipe V Ferrão, L and Jacobs, M and Babinski, M and Magallanes-Lundback, M and Bayer, P and Golicz, A and Humann, JL and Main, D and Espley, RV and Chagné, D and Albert, NW and Montanari, S and Vorsa, N and Polashock, J and Díaz-Garcia, L and Zalapa, J and Bassil, NV and Munoz, PR and Iorizzo, M and Edger, PP}, title = {Blueberry and cranberry pangenomes as a resource for future genetic studies and breeding efforts.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.07.31.551392}, pmid = {37577683}, abstract = {Domestication of cranberry and blueberry began in the United States in the early 1800s and 1900s, respectively, and in part owing to their flavors and health-promoting benefits are now cultivated and consumed worldwide. The industry continues to face a wide variety of production challenges (e.g. disease pressures) as well as a demand for higher-yielding cultivars with improved fruit quality characteristics. Unfortunately, molecular tools to help guide breeding efforts for these species have been relatively limited compared with those for other high-value crops. Here, we describe the construction and analysis of the first pangenome for both blueberry and cranberry. Our analysis of these pangenomes revealed both crops exhibit great genetic diversity, including the presence-absence variation of 48.4% genes in highbush blueberry and 47.0% genes in cranberry. Auxiliary genes, those not shared by all cultivars, are significantly enriched with molecular functions associated with disease resistance and the biosynthesis of specialized metabolites, including compounds previously associated with improving fruit quality traits. The discovery of thousands of genes, not present in the previous reference genomes for blueberry and cranberry, will serve as the basis of future research and as potential targets for future breeding efforts. The pangenome, as a multiple-sequence alignment, as well as individual annotated genomes, are publicly available for analysis on the Genome Database for Vaccinium - a curated and integrated web-based relational database. Lastly, the core-gene predictions from the pangenomes will serve useful to develop a community genotyping platform to guide future molecular breeding efforts across the family.}, }
@article {pmid37576785, year = {2023}, author = {Qiu, J and Shi, Y and Zhao, F and Xu, Y and Xu, H and Dai, Y and Cao, Y}, title = {The Pan-Genomic Analysis of Corynebacterium striatum Revealed its Genetic Characteristics as an Emerging Multidrug-Resistant Pathogen.}, journal = {Evolutionary bioinformatics online}, volume = {19}, number = {}, pages = {11769343231191481}, pmid = {37576785}, issn = {1176-9343}, abstract = {Corynebacterium striatum is a Gram-positive bacterium that is straight or slightly curved and non-spore-forming. Although it was originally believed to be a part of the normal microbiome of human skin, a growing number of studies have identified it as a cause of various chronic diseases, bacteremia, and respiratory infections. However, despite its increasing importance as a pathogen, the genetic characteristics of the pathogen population, such as genomic characteristics and differences, the types of resistance genes and virulence factors carried by the pathogen and their distribution in the population are poorly understood. To address these knowledge gaps, we conducted a pan-genomic analysis of 314 strains of C. striatum isolated from various tissues and geographic locations. Our analysis revealed that C. striatum has an open pan-genome, comprising 5692 gene families, including 1845 core gene families, 2362 accessory gene families, and 1485 unique gene families. We also found that C. striatum exhibits a high degree of diversity across different sources, but strains isolated from skin tissue are more conserved. Furthermore, we identified 53 drug resistance genes and 42 virulence factors by comparing the strains to the drug resistance gene database (CARD) and the pathogen virulence factor database (VFDB), respectively. We found that these genes and factors are widely distributed among C. striatum, with 77.7% of strains carrying 2 or more resistance genes and displaying primary resistance to aminoglycosides, tetracyclines, lincomycin, macrolides, and streptomycin. The virulence factors are primarily associated with pathogen survival within the host, iron uptake, pili, and early biofilm formation. In summary, our study provides insights into the population diversity, resistance genes, and virulence factors ofC. striatum from different sources. Our findings could inform future research and clinical practices in the diagnosis, prevention, and treatment of C. striatum-associated diseases.}, }
@article {pmid37576287, year = {2023}, author = {Liu, L and Yu, W and Cai, K and Ma, S and Wang, Y and Ma, Y and Zhao, H}, title = {Identification of vaccine candidates against rhodococcus equi by combining pangenome analysis with a reverse vaccinology approach.}, journal = {Heliyon}, volume = {9}, number = {8}, pages = {e18623}, pmid = {37576287}, issn = {2405-8440}, abstract = {Rhodococcus equi (R. equi) is a zoonotic opportunistic pathogen that can cause life-threatening infections. The rapid evolution of multidrug-resistant R. equi and the fact that there is no currently licensed effective vaccine against R. equi warrant the need for vaccine development. Reverse vaccinology (RV), which involves screening a pathogen's entire genome and proteome using various web-based prediction tools, is considered one of the most effective approaches for identifying vaccine candidates. Here, we performed a pangenome analysis to determine the core proteins of R. equi. We then used the RV approach to examine the subcellular localization, host and gut flora homology, antigenicity, transmembrane helices, physicochemical properties, and immunogenicity of the core proteins to select potential vaccine candidates. The vaccine candidates were then subjected to epitope mapping to predict the exposed antigenic epitopes that possess the ability to bind with major histocompatibility complex I/II (MHC I/II) molecules. These vaccine candidates and epitopes will form a library of elements for the development of a polyvalent or universal vaccine against R. equi. Sixteen R. equi complete proteomes were found to contain 6,238 protein families, and the core proteins consisted of 3,969 protein families (∼63.63% of the pangenome), reflecting a low degree of intraspecies genomic variability. From the pool of core proteins, 483 nonhost homologous membrane and extracellular proteins were screened, and 12 vaccine candidates were finally identified according to their antigenicity, physicochemical properties and other factors. These included four cell wall/membrane/envelope biogenesis proteins; four amino acid transport and metabolism proteins; one cell cycle control, cell division and chromosome partitioning protein; one carbohydrate transport and metabolism protein; one secondary metabolite biosynthesis, transport and catabolism protein; and one defense mechanism protein. All 12 vaccine candidates have an experimentally validated 3D structure available in the protein data bank (PDB). Epitope mapping of the candidates showed that 16 MHC I epitopes and 13 MHC II epitopes with the strongest immunogenicity were exposed on the protein surface, indicating that they could be used to develop a polypeptide vaccine. Thus, we utilized an analytical strategy that combines pangenome analysis and RV to generate a peptide antigen library that simplifies the development of multivalent or universal vaccines against R. equi and can be applied to the development of other vaccines.}, }
@article {pmid37575187, year = {2023}, author = {Chao, KH and Chen, PW and Seshia, SA and Langmead, B}, title = {WGT: Tools and algorithms for recognizing, visualizing, and generating Wheeler graphs.}, journal = {iScience}, volume = {26}, number = {8}, pages = {107402}, pmid = {37575187}, issn = {2589-0042}, abstract = {A Wheeler graph represents a collection of strings in a way that is particularly easy to index and query. Such a graph is a practical choice for representing a graph-shaped pangenome, and it is the foundation for current graph-based pangenome indexes. However, there are no practical tools to visualize or to check graphs that may have the Wheeler properties. Here, we present Wheelie, an algorithm that combines a renaming heuristic with a permutation solver (Wheelie-PR) or a Satisfiability Modulo Theory (SMT) solver (Wheelie-SMT) to check whether a given graph has the Wheeler properties, a problem that is NP-complete in general. Wheelie can check a variety of random and real-world graphs in far less time than any algorithm proposed to date. It can check a graph with 1,000s of nodes in seconds. We implement these algorithms together with complementary visualization tools in the WGT toolkit, available as open source software at https://github.com/Kuanhao-Chao/Wheeler_Graph_Toolkit.}, }
@article {pmid37573136, year = {2023}, author = {Kokate, PP and Bales, E and Joyner, D and Hazen, TC and Techtmann, SM}, title = {Biogeographic patterns in populations of marine pseudoalteromonas atlantica isolates.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnad081}, pmid = {37573136}, issn = {1574-6968}, abstract = {Intra-specific genomic diversity is well documented in microbes. The question, however, remains whether natural selection or neutral evolution is the major contributor to this diversity. We undertook this study to estimate genomic diversity in Pseudoalteromonas atlantica populations and whether the diversity, if present, could be attributed to environmental factors or distance effects. We isolated and sequenced twenty-three strains of P. atlantica from three geographically distant deep marine basins and performed comparative genomic analyses to study the genomic diversity of populations among these basins. Average nucleotide identity followed a strictly geographical pattern. In two out of three locations, the strains within the location exhibited > 99.5% identity, whereas, among locations, the strains showed < 98.11% identity. Phylogenetic and pangenome analysis also reflected the biogeographical separation of the strains. Strains from the same location shared many accessory genes and clustered closely on the phylogenetic tree. Phenotypic diversity between populations was studied in ten out of twenty-three strains testing carbon and nitrogen source utilization, and osmotolerance. A genetic basis for phenotypic diversity could be established in most cases but was apparently not influenced by local environmental conditions. Our study suggests that neutral evolution may have a substantial role in the biodiversity of P. atlantica.}, }
@article {pmid37571822, year = {2023}, author = {Raza, A and Bohra, A and Garg, V and Varshney, RK}, title = {Back to wild relatives for future breeding through super-pangenome.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2023.08.005}, pmid = {37571822}, issn = {1752-9867}, }
@article {pmid37567624, year = {2023}, author = {Rajput, A and Chauhan, SM and Mohite, OS and Hyun, JC and Ardalani, O and Jahn, LJ and Sommer, MO and Palsson, BO}, title = {Pangenome analysis reveals the genetic basis for taxonomic classification of the Lactobacillaceae family.}, journal = {Food microbiology}, volume = {115}, number = {}, pages = {104334}, doi = {10.1016/j.fm.2023.104334}, pmid = {37567624}, issn = {1095-9998}, abstract = {Lactobacillaceae represent a large family of important microbes that are foundational to the food industry. Many genome sequences of Lactobacillaceae strains are now available, enabling us to conduct a comprehensive pangenome analysis of this family. We collected 3591 high-quality genomes from public sources and found that: 1) they contained enough genomes for 26 species to perform a pangenomic analysis, 2) the normalized Heap's coefficient λ (a measure of pangenome openness) was found to have an average value of 0.27 (ranging from 0.07 to 0.37), 3) the pangenome openness was correlated with the abundance and genomic location of transposons and mobilomes, 4) the pangenome for each species was divided into core, accessory, and rare genomes, that highlight the species-specific properties (such as motility and restriction-modification systems), 5) the pangenome of Lactiplantibacillus plantarum (which contained the highest number of genomes found amongst the 26 species studied) contained nine distinct phylogroups, and 6) genome mining revealed a richness of detected biosynthetic gene clusters, with functions ranging from antimicrobial and probiotic to food preservation, but ∼93% were of unknown function. This study provides the first in-depth comparative pangenomics analysis of the Lactobacillaceae family.}, }
@article {pmid37556679, year = {2023}, author = {Hill, H and Mitsi, E and Nikolaou, E and Blizard, A and Pojar, S and Howard, A and Hyder-Wright, A and Devin, J and Reiné, J and Robinson, R and Solórzano, C and Jochems, SP and Kenny-Nyazika, T and Ramos-Sevillano, E and Weight, CM and Myerscough, C and McLenaghan, D and Morton, B and Gibbons, E and Farrar, M and Randles, V and Burhan, H and Chen, T and Shandling, AD and Campo, JJ and Heyderman, RS and Gordon, SB and Brown, JS and Collins, AM and Ferreira, DM}, title = {A Randomised Controlled Trial of Nasal Immunisation with Live Virulence Attenuated Streptococcus pneumoniae Strains Using Human Infection Challenge.}, journal = {American journal of respiratory and critical care medicine}, volume = {}, number = {}, pages = {}, doi = {10.1164/rccm.202302-0222OC}, pmid = {37556679}, issn = {1535-4970}, abstract = {RATIONALE: Pneumococcal pneumonia remains a global health problem. Pneumococcal colonisation increases local and systemic protective immunity, suggesting nasal administration of live attenuated S. pneumoniae strains could help prevent infections.
OBJECTIVES: We used a controlled human infection model to investigate whether nasopharyngeal colonisation with attenuated S. pneumoniae strains protected against re-colonisation with wild-type (WT) S. pneumoniae (Spn).
METHODS: Healthy adults aged 18-50 years were randomised (1:1:1:1) for nasal administration twice (two weeks interval) with saline, WT Spn6B (BHN418) or one of two genetically modified Spn6B strains - SpnA1 (∆fhs/piaA) or SpnA3 (∆proABC/piaA) (Stage I). After 6 months, participants were challenged with SpnWT to assess protection against the homologous serotype (Stage II).
MEASUREMENTS AND MAIN RESULTS: 125 participants completed both study stages as per intention to treat. No Serious Adverse Events were reported. In Stage I, colonisation rates were similar amongst groups: SpnWT 58.1% (18/31), SpnA1 60% (18/30) and SpnA3 59.4% (19/32). Anti-Spn nasal IgG levels post-colonisation were similar in all groups whilst serum IgG responses were higher in the SpnWT and SpnA1 groups than the SpnA3 group. In colonised individuals, increases in IgG responses were identified against 197 Spn protein antigens and serotype 6 capsular polysaccharide using a pangenome array. Participants given SpnWT or SpnA1 in stage 1 were partially protected against homologous challenge with SpnWT (29% and 30% recolonisation rates, respectively) at stage II, whereas those exposed to SpnA3 achieved recolonisation rate similar to control group group (50% vs 47%, respectively).
CONCLUSION: Nasal colonisation with genetically modified live attenuated Spn was safe and induced protection against recolonisation, suggesting nasal adminstration of live attenuated Spn could be an effective stategy for preventing pneumococcal infections.}, }
@article {pmid37555725, year = {2023}, author = {Wei, F and Liang, X and Shi, JC and Luo, J and Qiu, LJ and Li, XX and Lu, LJ and Wen, Y and Feng, J}, title = {Pan-genomic Analysis Identifies the Chinese Strain as a New Subspecies of Xanthomonas fragariae.}, journal = {Plant disease}, volume = {}, number = {}, pages = {}, doi = {10.1094/PDIS-05-23-0933-SC}, pmid = {37555725}, issn = {0191-2917}, abstract = {Xanthomonas fragariae (X. fragariae) is classified as a quarantine pathogen by the European and Mediterranean Plant Protection Organization. It commonly induces typical angular leaf spot (ALS) symptoms in strawberry leaves. X. fragariae strains from China (YL19, SHAQP01, and YLX21) exhibit ALS symptoms in leaves and more severe symptoms of dry cavity rot in strawberry crowns. Conversely, strains from other countries do not cause severe dry cavity rot symptoms in strawberries. Employing multilocus sequence analysis (MLSA), average nucleotide identity (ANI), and amino acid identity (AAI), we determined that Chinese strains of X. fragariae are genetically distinct from other strains and can be considered a new subspecies. Subsequent analysis of 63 X. fragariae genomes published at NCBI using IPGA and EDGAR3.0 revealed the pan-genomic profile, with 1680 shared genes present in the all 63 strains, including 71 virulence-related genes. Additionally, we identified 123 genes exclusive to the of all Chinese strains, encompassing 12 virulence-related genes. The qRT-PCR analysis demonstrated that the expression of XopD, XopG1, CE8, GT2 and GH121, out of 12 virulence-related genes of Chinese strains (YL19) exhibited a constant increase in the early stages (6 hpi, 24 hpi, 54 hpi, and 96 hpi) of strawberry leaf infected by YL19. Hence, the presence of XopD, XopG1, CE8, GT2, and GH121 in Chinese strains may play important roles in the early infection process of Chinese strains. These findings offer novel insights into comprehending the population structure and variation in the pathogenic capacity of X. fragariae.}, }
@article {pmid37553643, year = {2023}, author = {Hyun, JC and Palsson, BO}, title = {Reconstruction of the last bacterial common ancestor from 183 pangenomes reveals a versatile ancient core genome.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {183}, pmid = {37553643}, issn = {1474-760X}, support = {T32GM8806/NH/NIH HHS/United States ; }, abstract = {BACKGROUND: Cumulative sequencing efforts have yielded enough genomes to construct pangenomes for dozens of bacterial species and elucidate intraspecies gene conservation. Given the diversity of organisms for which this is achievable, similar analyses for ancestral species are feasible through the integration of pangenomics and phylogenetics, promising deeper insights into the nature of ancient life.
RESULTS: We construct pangenomes for 183 bacterial species from 54,085 genomes and identify their core genomes using a novel statistical model to estimate genome-specific error rates and underlying gene frequencies. The core genomes are then integrated into a phylogenetic tree to reconstruct the core genome of the last bacterial common ancestor (LBCA), yielding three main results: First, the gene content of modern and ancestral core genomes are diverse at the level of individual genes but are similarly distributed by functional category and share several poorly characterized genes. Second, the LBCA core genome is distinct from any individual modern core genome but has many fundamental biological systems intact, especially those involving translation machinery and biosynthetic pathways to all major nucleotides and amino acids. Third, despite this metabolic versatility, the LBCA core genome likely requires additional non-core genes for viability, based on comparisons with the minimal organism, JCVI-Syn3A.
CONCLUSIONS: These results suggest that many cellular systems commonly conserved in modern bacteria were not just present in ancient bacteria but were nearly immutable with respect to short-term intraspecies variation. Extending this analysis to other domains of life will likely provide similar insights into more distant ancestral species.}, }
@article {pmid37546276, year = {2023}, author = {Gao, Z and Bian, J and Lu, F and Jiao, Y and He, H}, title = {Triticeae crop genome biology: an endless frontier.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1222681}, pmid = {37546276}, issn = {1664-462X}, abstract = {Triticeae, the wheatgrass tribe, includes several major cereal crops and their wild relatives. Major crops within the Triticeae are wheat, barley, rye, and oat, which are important for human consumption, animal feed, and rangeland protection. Species within this tribe are known for their large genomes and complex genetic histories. Powered by recent advances in sequencing technology, researchers worldwide have made progress in elucidating the genomes of Triticeae crops. In addition to assemblies of high-quality reference genomes, pan-genome studies have just started to capture the genomic diversities of these species, shedding light on our understanding of the genetic basis of domestication and environmental adaptation of Triticeae crops. In this review, we focus on recent signs of progress in genome sequencing, pan-genome analyses, and resequencing analysis of Triticeae crops. We also propose future research avenues in Triticeae crop genomes, including identifying genome structure variations, the association of genomic regions with desired traits, mining functions of the non-coding area, introgression of high-quality genes from wild Triticeae resources, genome editing, and integration of genomic resources.}, }
@article {pmid37542576, year = {2023}, author = {Hong, H and Yang, SM and Kim, E and Kim, HJ and Park, SH}, title = {Comprehensive metagenomic analysis of stress-resistant and -sensitive Listeria monocytogenes.}, journal = {Applied microbiology and biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37542576}, issn = {1432-0614}, support = {E0210702-01//Korea Food Research Institute/ ; }, abstract = {Listeria monocytogenes is a pathogenic bacterium which can live in adverse environments (low pH, high salinity, and low temperature). Even though there are various whole genome sequencing (WGS) data on L. monocytogenes, investigations on genetic differences between stress-resistant and -sensitive L. monocytogenes grown under stress environments have been not fully examined. This study aims to investigate and compare genetic characteristics between stress-resistant and -sensitive L. monocytogenes using whole genome sequencing (WGS). A total of 47 L. monocytogenes strains (43 stress-resistant and 4 stress-sensitive) were selected based on the stress-resistance tests under pH 3, 5% salt concentration, and 1 °C. The sequencing library for WGS was prepared and sequenced using an Illumina MiSeq. Genetic characteristics of two different L. monocytogenes groups were examined to analyze the pangenome, functionality, virulence, antibiotic resistance, core, and unique genes. The functionality of unique genes in the stress-resistant L. monocytogenes was distinct compared to the stress-sensitive L. monocytogenes, such as carbohydrate and nucleotide transport and metabolism. The lisR virulence gene was detected more in the stress-resistant L. monocytogenes than in the stress-sensitive group. Five stress-resistant L. monocytogenes strains possessed tet(M) antibiotic resistance gene. This is the first study suggesting that deep genomic characteristics of L. monocytogenes may have different resistance level under stress conditions. This new insight will aid in understanding the genetic relationship between stress-resistant and -sensitive L. monocytogenes strains isolated from diverse resources. KEY POINTS: • Whole genomes of L. monocytogenes isolated from three different sources were analyzed. • Differences in two L. monocytogenes groups were identified in functionality, virulence, and antibiotic resistance genes. • This study first examines the association between resistances and whole genomes of stress-resistant and -sensitive L. monocytogenes.}, }
@article {pmid37538845, year = {2023}, author = {Morales-Olavarría, M and Nuñez-Belmar, J and González, D and Vicencio, E and Rivas-Pardo, JA and Cortez, C and Cárdenas, JP}, title = {Phylogenomic analysis of the Porphyromonas gingivalis - Porphyromonas gulae duo: approaches to the origin of periodontitis.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1226166}, pmid = {37538845}, issn = {1664-302X}, abstract = {Porphyromonas gingivalis is an oral human pathogen associated with the onset and progression of periodontitis, a chronic immune-inflammatory disease characterized by the destruction of the teeth-supporting tissue. P. gingivalis belongs to the genus Porphyromonas, which is characterized by being composed of Gram-negative, asaccharolytic, non-spore-forming, non-motile, obligatory anaerobic species, inhabiting niches such as the oral cavity, urogenital tract, gastrointestinal tract and infected wound from different mammals including humans. Among the Porphyromonas genus, P. gingivalis stands out for its specificity in colonizing the human oral cavity and its keystone pathogen role in periodontitis pathogenesis. To understand the evolutionary process behind P. gingivalis in the context of the Pophyoromonas genus, in this study, we performed a comparative genomics study with publicly available Porphyromonas genomes, focused on four main objectives: (A) to confirm the phylogenetic position of P. gingivalis in the Porphyromonas genus by phylogenomic analysis; (B) the definition and comparison of the pangenomes of P. gingivalis and its relative P. gulae; and (C) the evaluation of the gene family gain/loss events during the divergence of P. gingivalis and P. gulae; (D) the evaluation of the evolutionary pressure (represented by the calculation of Tajima-D values and dN/dS ratios) comparing gene families of P. gingivalis and P. gulae. Our analysis found 84 high-quality assemblies representing P. gingivalis and 14 P. gulae strains (from a total of 233 Porphyromonas genomes). Phylogenomic analysis confirmed that P. gingivalis and P. gulae are highly related lineages, close to P. loveana. Both organisms harbored open pangenomes, with a strong core-to-accessory ratio for housekeeping genes and a negative ratio for unknown function genes. Our analyses also characterized the gene set differentiating P. gulae from P. gingivalis, mainly associated with unknown functions. Relevant virulence factors, such as the FimA, Mfa1, and the hemagglutinins, are conserved in P. gulae, P. gingivalis, and P. loveana, suggesting that the origin of those factors occurred previous to the P. gulae - P. gingivalis divergence. These results suggest an unexpected evolutionary relationship between the P. gulae - P. gingivalis duo and P. loveana, showing more clues about the origin of the role of those organisms in periodontitis.}, }
@article {pmid37537691, year = {2023}, author = {Wu, D and Xie, L and Sun, Y and Huang, Y and Jia, L and Dong, C and Shen, E and Ye, CY and Qian, Q and Fan, L}, title = {A syntelog-based pan-genome provides insights into rice domestication and de-domestication.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {179}, pmid = {37537691}, issn = {1474-760X}, support = {2022C02032//Department of Science and Technology of Zhejiang Province/ ; 2020C02002//Department of Science and Technology of Zhejiang Province/ ; 31971865//National Natural Science Foundation of China/ ; BX20220269//National Postdoctoral Program for Innovative Talents/ ; }, abstract = {BACKGROUND: Asian rice is one of the world's most widely cultivated crops. Large-scale resequencing analyses have been undertaken to explore the domestication and de-domestication genomic history of Asian rice, but the evolution of rice is still under debate.
RESULTS: Here, we construct a syntelog-based rice pan-genome by integrating and merging 74 high-accuracy genomes based on long-read sequencing, encompassing all ecotypes and taxa of Oryza sativa and Oryza rufipogon. Analyses of syntelog groups illustrate subspecies divergence in gene presence-and-absence and haplotype composition and identify massive genomic regions putatively introgressed from ancient Geng/japonica to ancient Xian/indica or its wild ancestor, including almost all well-known domestication genes and a 4.5-Mbp centromere-spanning block, supporting a single domestication event in main rice subspecies. Genomic comparisons between weedy and cultivated rice highlight the contribution from wild introgression to the emergence of de-domestication syndromes in weedy rice.
CONCLUSIONS: This work highlights the significance of inter-taxa introgression in shaping diversification and divergence in rice evolution and provides an exploratory attempt by utilizing the advantages of pan-genomes in evolutionary studies.}, }
@article {pmid37531401, year = {2023}, author = {Burgaya, J and Marin, J and Royer, G and Condamine, B and Gachet, B and Clermont, O and Jaureguy, F and Burdet, C and Lefort, A and de Lastours, V and Denamur, E and Galardini, M and Blanquart, F and , }, title = {The bacterial genetic determinants of Escherichia coli capacity to cause bloodstream infections in humans.}, journal = {PLoS genetics}, volume = {19}, number = {8}, pages = {e1010842}, doi = {10.1371/journal.pgen.1010842}, pmid = {37531401}, issn = {1553-7404}, abstract = {Escherichia coli is both a highly prevalent commensal and a major opportunistic pathogen causing bloodstream infections (BSI). A systematic analysis characterizing the genomic determinants of extra-intestinal pathogenic vs. commensal isolates in human populations, which could inform mechanisms of pathogenesis, diagnostic, prevention and treatment is still lacking. We used a collection of 912 BSI and 370 commensal E. coli isolates collected in France over a 17-year period (2000-2017). We compared their pangenomes, genetic backgrounds (phylogroups, STs, O groups), presence of virulence-associated genes (VAGs) and antimicrobial resistance genes, finding significant differences in all comparisons between commensal and BSI isolates. A machine learning linear model trained on all the genetic variants derived from the pangenome and controlling for population structure reveals similar differences in VAGs, discovers new variants associated with pathogenicity (capacity to cause BSI), and accurately classifies BSI vs. commensal strains. Pathogenicity is a highly heritable trait, with up to 69% of the variance explained by bacterial genetic variants. Lastly, complementing our commensal collection with an older collection from 1980, we predict that pathogenicity continuously increased through 1980, 2000, to 2010. Together our findings imply that E. coli exhibit substantial genetic variation contributing to the transition between commensalism and pathogenicity and that this species evolved towards higher pathogenicity.}, }
@article {pmid37530223, year = {2023}, author = {Sun, M and Yan, H and Zhang, A and Jin, Y and Lin, C and Luo, L and Wu, B and Fan, Y and Tian, S and Cao, X and Wang, Z and Luo, J and Yang, Y and Jia, J and Zhou, P and Tang, Q and Jones, CS and Varshney, RK and Srivastava, RK and He, M and Xie, Z and Wang, X and Feng, G and Nie, G and Huang, D and Zhang, X and Zhu, F and Huang, L}, title = {Milletdb: a multi-omics database to accelerate the research of functional genomics and molecular breeding of millets.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14136}, pmid = {37530223}, issn = {1467-7652}, support = {CARS-34//CARS/ ; SCCXTD-2021-16//Modern Agricultural Industry System Sichuan Forage Innovation Team/ ; 31771866//National Natural Science Foundation of China/ ; 32071867//National Natural Science Foundation of China/ ; 2021YFYZ0013//Sichuan Province Research Grant/ ; }, abstract = {Millets are a class of nutrient-rich coarse cereals with high resistance to abiotic stress; thus, they guarantee food security for people living in areas with extreme climatic conditions and provide stress-related genetic resources for other crops. However, no platform is available to provide a comprehensive and systematic multi-omics analysis for millets, which seriously hinders the mining of stress-related genes and the molecular breeding of millets. Here, a free, web-accessible, user-friendly millets multi-omics database platform (Milletdb, http://milletdb.novogene.com) has been developed. The Milletdb contains six millets and their one related species genomes, graph-based pan-genomics of pearl millet, and stress-related multi-omics data, which enable Milletdb to be the most complete millets multi-omics database available. We stored GWAS (genome-wide association study) results of 20 yield-related trait data obtained under three environmental conditions [field (no stress), early drought and late drought] for 2 years in the database, allowing users to identify stress-related genes that support yield improvement. Milletdb can simplify the functional genomics analysis of millets by providing users with 20 different tools (e.g., 'Gene mapping', 'Co-expression', 'KEGG/GO Enrichment' analysis, etc.). On the Milletdb platform, a gene PMA1G03779.1 was identified through 'GWAS', which has the potential to modulate yield and respond to different environmental stresses. Using the tools provided by Milletdb, we found that the stress-related PLATZs TFs (transcription factors) family expands in 87.5% of millet accessions and contributes to vegetative growth and abiotic stress responses. Milletdb can effectively serve researchers in the mining of key genes, genome editing and molecular breeding of millets.}, }
@article {pmid37529582, year = {2023}, author = {Liang, J and Duan, R and Qin, S and Lv, D and He, Z and Zhang, H and Duan, Q and Xi, J and Chun, H and Fu, G and Zheng, X and Tang, D and Wu, W and Han, H and Jing, H and Wang, X}, title = {The complex genomic diversity of Yersinia pestis on the long-term plague foci in Qinghai-Tibet plateau.}, journal = {Ecology and evolution}, volume = {13}, number = {8}, pages = {e10387}, pmid = {37529582}, issn = {2045-7758}, abstract = {Plague is a typical natural focus disease that circulates in different ecology of vectors and reservoir hosts. We conducted genomic population and phylogenetic analyses of the Yersinia pestis collected from the 12 natural plague foci in China with more than 20 kinds of hosts and vectors. Different ecological landscapes with specific hosts, vectors, and habitat which shape various niches for Y. pestis. The phylogeographic diversity of Y. pestis in different kinds plague foci in China showed host niches adaptation. Most natural plague foci strains are region-and focus-specific, with one predominant subpopulation; but the isolates from the Qinghai-Tibet plateau harbor a higher genetic diversity than other foci. The Y. pestis from Marmota himalayana plague foci are defined as the ancestors of different populations at the root of the evolutionary tree, suggesting several different evolutionary paths to other foci. It has the largest pan-genome and widest SNP distances with most accessory genes enriched in mobilome functions (prophages, transposons). Geological barriers play an important role in the maintenance of local Y. pestis species and block the introduction of non-native strains. This study provides new insights into the control of plague outbreaks and epidemics, deepened the understanding of the evolutionary history of MHPF (M. himalayana plague focus) in China. The population structure and identify clades among different natural foci of China renewed the space cognition of the plague.}, }
@article {pmid37526693, year = {2023}, author = {Campillo-Balderas, JA and Lazcano, A and Cottom-Salas, W and Jácome, R and Becerra, A}, title = {Pangenomic Analysis of Nucleo-Cytoplasmic Large DNA Viruses. I: The Phylogenetic Distribution of Conserved Oxygen-Dependent Enzymes Reveals a Capture-Gene Process.}, journal = {Journal of molecular evolution}, volume = {}, number = {}, pages = {}, pmid = {37526693}, issn = {1432-1432}, support = {IN214421//DGAPA-PAPIIT, UNAM/ ; }, abstract = {The Nucleo-Cytoplasmic Large DNA Viruses (NCLDVs) infect a wide range of eukaryotic species, including amoeba, algae, fish, amphibia, arthropods, birds, and mammals. This group of viruses has linear or circular double-stranded DNA genomes whose size spans approximately one order of magnitude, from 100 to 2500 kbp. The ultimate origin of this peculiar group of viruses remains an open issue. Some have argued that NCLDVs' origin may lie in a bacteriophage ancestor that increased its genome size by subsequent recruitment of eukaryotic and bacterial genes. Others have suggested that NCLDVs families originated from cells that underwent an irreversible process of genome reduction. However, the hypothesis that a number of NCLDVs sequences have been recruited from the host genomes has been largely ignored. In the present work, we have performed pangenomic analyses of each of the seven known NCLDVs families. We show that these families' core- and shell genes have cellular homologs, supporting possible escaping-gene events as part of its evolution. Furthermore, the detection of sequences that belong to two protein families (small chain ribonucleotide reductase and Erv1/Air) and to one superfamily [2OG-Fe(II) oxygenases] that are for distribution in all NCLDVs core and shell clusters encoding for oxygen-dependent enzymes suggests that the highly conserved core these viruses originated after the Proterozoic Great Oxidation Event that transformed the terrestrial atmosphere 2.4-2.3 Ga ago.}, }
@article {pmid37526649, year = {2023}, author = {Rodrigues, JA and Blankenship, HM and Cha, W and Mukherjee, S and Sloup, RE and Rudrik, JT and Soehnlen, M and Manning, SD}, title = {Pangenomic analyses of antibiotic-resistant Campylobacter jejuni reveal unique lineage distributions and epidemiological associations.}, journal = {Microbial genomics}, volume = {9}, number = {8}, pages = {}, doi = {10.1099/mgen.0.001073}, pmid = {37526649}, issn = {2057-5858}, abstract = {Application of whole-genome sequencing (WGS) to characterize foodborne pathogens has advanced our understanding of circulating genotypes and evolutionary relationships. Herein, we used WGS to investigate the genomic epidemiology of Campylobacter jejuni, a leading cause of foodborne disease. Among the 214 strains recovered from patients with gastroenteritis in Michigan, USA, 85 multilocus sequence types (STs) were represented and 135 (63.1 %) were phenotypically resistant to at least one antibiotic. Horizontally acquired antibiotic resistance genes were detected in 128 (59.8 %) strains and the genotypic resistance profiles were mostly consistent with the phenotypes. Core-gene phylogenetic reconstruction identified three sequence clusters that varied in frequency, while a neighbour-net tree detected significant recombination among the genotypes (pairwise homoplasy index P<0.01). Epidemiological analyses revealed that travel was a significant contributor to pangenomic and ST diversity of C. jejuni, while some lineages were unique to rural counties and more commonly possessed clinically important resistance determinants. Variation was also observed in the frequency of lineages over the 4 year period with chicken and cattle specialists predominating. Altogether, these findings highlight the importance of geographically specific factors, recombination and horizontal gene transfer in shaping the population structure of C. jejuni. They also illustrate the usefulness of WGS data for predicting antibiotic susceptibilities and surveillance, which are important for guiding treatment and prevention strategies.}, }
@article {pmid37525145, year = {2023}, author = {Safar, HA and Alatar, F and Nasser, K and Al-Ajmi, R and Alfouzan, W and Mustafa, AS}, title = {The impact of applying various de novo assembly and correction tools on the identification of genome characterization, drug resistance, and virulence factors of clinical isolates using ONT sequencing.}, journal = {BMC biotechnology}, volume = {23}, number = {1}, pages = {26}, pmid = {37525145}, issn = {1472-6750}, abstract = {Oxford Nanopore sequencing technology (ONT) is currently widely used due to its affordability, simplicity, and reliability. Despite the advantage ONT has over next-generation sequencing in detecting resistance genes in mobile genetic elements, its relatively high error rate (10-15%) is still a deterrent. Several bioinformatic tools are freely available for raw data processing and obtaining complete and more accurate genome assemblies. In this study, we evaluated the impact of using mix-and-matched read assembly (Flye, Canu, Wtdbg2, and NECAT) and read correction (Medaka, NextPolish, and Racon) tools in generating complete and accurate genome assemblies, and downstream genomic analysis of nine clinical Escherichia coli isolates. Flye and Canu assemblers were the most robust in genome assembly, and Medaka and Racon correction tools significantly improved assembly parameters. Flye functioned well in pan-genome analysis, while Medaka increased the number of core genes detected. Flye, Canu, and NECAT assembler functioned well in detecting antimicrobial resistance genes (AMR), while Wtdbg2 required correction tools for better detection. Flye was the best assembler for detecting and locating both virulence and AMR genes (i.e., chromosomal vs. plasmid). This study provides insight into the performance of several read assembly and read correction tools for analyzing ONT sequencing reads for clinical isolates.}, }
@article {pmid37524789, year = {2023}, author = {O'Donnell, S and Yue, JX and Saada, OA and Agier, N and Caradec, C and Cokelaer, T and De Chiara, M and Delmas, S and Dutreux, F and Fournier, T and Friedrich, A and Kornobis, E and Li, J and Miao, Z and Tattini, L and Schacherer, J and Liti, G and Fischer, G}, title = {Telomere-to-telomere assemblies of 142 strains characterize the genome structural landscape in Saccharomyces cerevisiae.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {37524789}, issn = {1546-1718}, support = {ANR-16-CE 12-0019//Agence Nationale de la Recherche (French National Research Agency)/ ; ANR-15-IDEX-01//Agence Nationale de la Recherche (French National Research Agency)/ ; 772505//EC | EC Seventh Framework Programm | FP7 Ideas: European Research Council (FP7-IDEAS-ERC - Specific Programme: "Ideas" Implementing the Seventh Framework Programme of the European Community for Research, Technological Development and Demonstration Activities (2007 to 2013))/ ; }, abstract = {Pangenomes provide access to an accurate representation of the genetic diversity of species, both in terms of sequence polymorphisms and structural variants (SVs). Here we generated the Saccharomyces cerevisiae Reference Assembly Panel (ScRAP) comprising reference-quality genomes for 142 strains representing the species' phylogenetic and ecological diversity. The ScRAP includes phased haplotype assemblies for several heterozygous diploid and polyploid isolates. We identified circa (ca.) 4,800 nonredundant SVs that provide a broad view of the genomic diversity, including the dynamics of telomere length and transposable elements. We uncovered frequent cases of complex aneuploidies where large chromosomes underwent large deletions and translocations. We found that SVs can impact gene expression near the breakpoints and substantially contribute to gene repertoire evolution. We also discovered that horizontally acquired regions insert at chromosome ends and can generate new telomeres. Overall, the ScRAP demonstrates the benefit of a pangenome in understanding genome evolution at population scale.}, }
@article {pmid37512795, year = {2023}, author = {Jaén-Luchoro, D and Kahnamouei, A and Yazdanshenas, S and Lindblom, A and Samuelsson, E and Åhrén, C and Karami, N}, title = {Comparative Genomic Analysis of ST131 Subclade C2 of ESBL-Producing E. coli Isolates from Patients with Recurrent and Sporadic Urinary Tract Infections.}, journal = {Microorganisms}, volume = {11}, number = {7}, pages = {}, doi = {10.3390/microorganisms11071622}, pmid = {37512795}, issn = {2076-2607}, support = {ALFGBG-725361//Region Västra Götaland/ ; VGFOUREG-929979//Region Västra Götaland/ ; 2020-02518//Sahlgrenska University Hospital/ ; }, abstract = {The global emergence of extended-spectrum beta-lactamase-producing Escherichia coli (ESBL-E. coli), mainly causing urinary tract infections (UTI), is a major threat to human health. ESBL-E. coli sequence type (ST) 131 is the dominating clone worldwide, especially its subclade C2. Patients developing recurrent UTI (RUTI) due to ST131 subclade C2 appear to have an increased risk of recurrent infections. We have thus compared the whole genome of ST131 subclade C2 isolates from 14 patients with RUTI to those from 14 patients with sporadic UTI (SUTI). We aimed to elucidate if isolates causing RUTI can be associated with specific genomic features. Paired isolates from patients with RUTI were identical, presenting 2-18 single nucleotide polymorphism (SNP) differences for all six patients investigated. Comparative genomic analyses, including virulence factors, antibiotic resistance, pangenome and SNP analyses did not find any pattern associated with isolates causing RUTI. Despite extensive whole genome analyses, an increased risk of recurrences seen in patients with UTI due to ST131 subclade C2 isolates could not be explained by bacterial genetic differences in the two groups of isolates. Hence, additional factors that could aid in identifying bacterial properties contributing to the increased risk of RUTI due to ESBL-E. coli ST131 subclade C2 remains to be explored.}, }
@article {pmid37511853, year = {2023}, author = {Panova, VV and Dolinnaya, NG and Novoselov, KA and Savitskaya, VY and Chernykh, IS and Kubareva, EA and Alexeevski, AV and Zvereva, MI}, title = {Conserved G-Quadruplex-Forming Sequences in Mammalian TERT Promoters and Their Effect on Mutation Frequency.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {7}, pages = {}, doi = {10.3390/life13071478}, pmid = {37511853}, issn = {2075-1729}, support = {21-14-00161//Russian Science Foundation/ ; }, abstract = {Somatic mutations in the promoter region of the human telomerase reverse transcriptase (hTERT) gene have been identified in many types of cancer. The hTERT promoter is known to be enriched with sequences that enable the formation of G-quadruplex (G4) structures, whose presence is associated with elevated mutagenicity and genome instability. Here, we used a bioinformatics tool (QGRS mapper) to search for G4-forming sequences (G4 motifs) in the 1000 bp TERT promoter regions of 141 mammalian species belonging to 20 orders, 5 of which, including primates and predators, contain more than 10 species. Groups of conserved G4 motifs and single-nucleotide variants within these groups were discovered using a block alignment approach (based on the Nucleotide PanGenome explorer). It has been shown that: (i) G4 motifs are predominantly located in the region proximal to the transcription start site (up to 400 bp) and are over-represented on the non-coding strand of the TERT promoters, (ii) 11 to 22% of the G4 motifs found are evolutionarily conserved across the related organisms, and (iii) a statistically significant higher frequency of nucleotide substitutions in the conserved G4 motifs compared to the surrounding regions was confirmed only for the order Primates. These data support the assumption that G4s can interfere with the DNA repair process and affect the evolutionary adaptation of organisms and species.}, }
@article {pmid37510288, year = {2023}, author = {Leszczyńska, K and Święcicka, I and Daniluk, T and Lebensztejn, D and Chmielewska-Deptuła, S and Leszczyńska, D and Gawor, J and Kliber, M}, title = {Escherichia albertii as a Potential Enteropathogen in the Light of Epidemiological and Genomic Studies.}, journal = {Genes}, volume = {14}, number = {7}, pages = {}, doi = {10.3390/genes14071384}, pmid = {37510288}, issn = {2073-4425}, support = {project No. 2015/17/B/NZ6/03470 entitled "Escherichia albertii as a potential enteropathogen in the light of epidemiological and genomic studies"//National Science Center/ ; }, abstract = {Escherichia albertii is a new enteropathogen of humans and animals. The aim of the study was to assess the prevalence and pathogenicity of E. albertii strains isolated in northeastern Poland using epidemiological and genomic studies. In 2015-2018, a total of 1154 fecal samples from children and adults, 497 bird droppings, 212 food samples, 92 water samples, and 500 lactose-negative E. coli strains were tested. A total of 42 E. albertii strains were isolated. The PCR method was suitable for their rapid identification. In total, 33.3% of E. albertii isolates were resistant to one antibiotic, and 16.7% to two. Isolates were sensitive to cefepime, imipenem, levofloxacin, gentamicin, trimethoprim/sulfamethoxazole, and did not produce ESBL β-lactamases. High genetic variability of E. albertii has been demonstrated. In the PFGE method, 90.5% of the strains had distinct pulsotypes. In MLST typing, 85.7% of strains were assigned distinct sequence types (STs), of which 64% were novel ST types. Cytolethal distending toxin (CDT) and Paa toxin genes were found in 100% of E. albertii isolates. Genes encoding toxins, IbeA, CdtB type 2, Tsh and Shiga (Stx2f), were found in 26.2%, 9.7%, 1.7%, and 0.4% of E. albertii isolates, respectively. The chromosome size of the tested strains ranged from 4,573,338 to 5,141,010 bp (average 4,784,003 bp), and at least one plasmid was present in all strains. The study contributes to a more accurate assessment of the genetic diversity of E. albertii and the potential threat it poses to public health.}, }
@article {pmid37503282, year = {2023}, author = {Joglekar, P and Conlan, S and Lee-Lin, SQ and Deming, C and Kashaf, SS and , and Kong, HH and Segre, JA}, title = {Integrated genomic and functional analyses of human skin-associated Staphylococcus reveals extensive inter- and intra-species diversity.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.06.22.546190}, pmid = {37503282}, abstract = {UNLABELLED: Human skin is stably colonized by a distinct microbiota that functions together with epidermal cells to maintain a protective physical barrier. Staphylococcus , a prominent genus of the skin microbiota, participates in colonization resistance, tissue repair, and host immune regulation in strain specific manners. To unlock the potential of engineering skin microbial communities, we aim to fully characterize the functional diversity of this genus within the context of the skin environment. We conducted metagenome and pan-genome analyses of isolates obtained from distinct body sites of healthy volunteers, providing a detailed biogeographic depiction of staphylococcal species that colonize our skin. S. epidermidis , S. capitis, and S. hominis were the most abundant species present in all volunteers and were detected at all body sites. Pan-genome analysis of these three species revealed that the genus-core was dominated by central metabolism genes. Species-specific core genes were enriched in host colonization functions. The majority (∼68%) of genes were detected only in a fraction of isolate genomes, underscoring the immense strain-specific gene diversity. Conspecific genomes grouped into phylogenetic clades, exhibiting body site preference. Each clade was enriched for distinct gene-sets that are potentially involved in site tropism. Finally, we conducted gene expression studies of select isolates showing variable growth phenotypes in skin-like medium. In vitro expression revealed extensive intra- and inter-species gene expression variation, substantially expanding the functional diversification within each species. Our study provides an important resource for future ecological and translational studies to examine the role of shared and strain-specific staphylococcal genes within the skin environment.
SIGNIFICANCE: The bacterial genus Staphylococcus is a prominent member of the human skin microbiome, performing important and diverse functions such as tuning immunity, driving tissue repair, and preventing pathogen colonization. Each of these functions is carried out by a subset of staphylococcal strains, displaying differences in gene content and regulation. Delineating the genomic and functional diversity of Staphylococcus will enable researchers to unlock the potential of engineering skin communities to promote health. Here, we present a comprehensive multi-omics analysis to characterize the inter- and intra-species diversity present in human skin-associated staphylococci. Our study is the first to conduct a detailed pan-genome comparison between prominent skin staphylococcal species giving a valuable insight into gene sharing and provides an important resource.}, }
@article {pmid37502876, year = {2023}, author = {Ahmed, NM and Joglekar, P and Deming, C and , and Lemon, KP and Kong, HH and Segre, JA and Conlan, S}, title = {Genomic characterization of the C. tuberculostearicum species complex, a ubiquitous member of the human skin microbiome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.06.16.545375}, pmid = {37502876}, abstract = {UNLABELLED: Corynebacterium is a predominant genus in the skin microbiome, yet its genetic diversity on skin is incompletely characterized and lacks a comprehensive set of reference genomes. Our work aims to investigate the distribution of Corynebacterium species on the skin, as well as to expand the existing genome reference catalog to enable more complete characterization of skin metagenomes. We used V1-V3 16S rRNA gene sequencing data from 14 body sites of 23 healthy volunteers to characterize Corynebacterium diversity and distribution across healthy human skin. Corynebacterium tuberculostearicum is the predominant species found on human skin and we identified two distinct C. tuberculostearicum ribotypes (A & B) that can be distinguished by variation in the 16S rRNA V1-V3 sequence. One is distributed across all body sites and the other found primarily on the feet. We performed whole genome sequencing of 40 C. tuberculostearicum isolates cultured from the skin of five healthy individuals across seven skin sites. We generated five closed genomes of diverse C. tuberculostearicum which revealed that C. tuberculostearicum isolates are largely syntenic and carry a diversity of methylation patterns, plasmids and CRISPR/Cas systems. The pangenome of C. tuberculostearicum is open with a core genome size of 1806 genes and a pangenome size of 5451 total genes. This expanded pangenome enabled the mapping of 24% more C. tuberculostearicum reads from shotgun metagenomic datasets derived from skin body sites. Finally, while the genomes from this study all fall within a C. tuberculostearicum species complex, the ribotype B isolates may constitute a new species.
IMPORTANCE: Amplicon sequencing data combined with isolate whole genome sequencing has expanded our understanding of Corynebacterium on the skin. Human skin is characterized by a diverse collection of Corynebacterium species but C. tuberculostearicum predominates many sites. Our work supports the emerging idea that C. tuberculostearicum is a species complex encompassing several distinct species. We produced a collection of genomes that help define this complex including a potentially new species which we are calling C. hallux based on a preference for sites on the feet, whole-genome average nucleotide identity, pangenomics and growth in skin-like media. This isolate collection and high-quality genome resource sets the stage for developing engineered strains for both basic and translational clinical studies. Microbiomes are shaped by taxa that are both characteristic to those sites and functionally important to that community. The genus Corynebacterium is one such taxa for the human skin and nares. Foundational studies using 16S rRNA gene sequencing and shotgun metagenomics by our lab (1, 2) and others (3) have established Corynebacterium as common members of the skin microbiome. While Corynebacterium have been positively correlated with the resolution of dysbiosis associated with eczema flares (4), the importance of the Corynebacterium spp. is less defined for skin disease severity in primary immune deficient patients (5, 6). Corynebacterium spp. are predominant members of the human aerodigestive tract microbiome (nares, oral cavity and respiratory tract) (3) and participate in microbe-microbe interactions with members of nasal microbiome (7, 8). Corynebacterium have been shown to engage with the host immune system, specifically C. accolens -promoted IL23-dependent inflammation in mice on a high-fat diet (9). C. bovis and C. mastiditis have been shown to predominate the microbiome of a ADAM10-deficient mouse model (10) as well as an ADAM17-deficient mouse model of eczema (11). Finally, C. tuberculostearicum has been shown to induce inflammation in human epidermal keratinocyte cell cultures (12). These studies establish Corynebacterium spp. as key members of the skin microbiome capable of both microbe-microbe and microbe-host interactions. A critical resource for understanding the biology of Corynebacterium on the skin is a robust collection of complete reference genomes, including isolates collected from a variety of individuals and body sites. Previously published genome collections from skin- or nares-resident species include Staphylococcus epidermidis (13), Cutibacterium acnes (14) and the recent comparative analysis of Dolosigranulum pigrum (15). Of note, while emerging bioinformatic methods and pipelines are now being employed to extract nearly-complete genomes (MAGs) from metagenomic assemblies of skin samples (16), MAGs are not yet a substitute for genomes from cultured isolates to understand strain level or pangenomic diversity. In addition to functional prediction, comparative genomics is increasingly being used to augment conventional microbiological methods to define or redefine taxonomic boundaries (17, 18), as well as describe the full extent of diversity within these boundaries (19). A pangenome, which encompasses the complete set of genes present within a set of genome sequences, enables the characterization of gene-level heterogeneity within a taxonomic group. The pangenome is commonly subdivided into the 'core' genome, referring to genes present in all strains, and the 'accessory' or 'dispensable' genome, referring to those present in only one or some isolates. (The accessory pangenome can be further subdivided to reflect a wider range of gene uniqueness, e.g. singletons.) Thorough characterization of taxa is limited by the availability of representative and high-quality genome assemblies. Unfortunately, with the exceptions of clinically relevant Corynebacterium spp. (e.g. , C. diphtheriae , C. striatum and C. pseudotuberculosis), the genus is inadequately sequenced, with 75% of species having fewer than six genomes. This includes common skin-associated species like C. tuberculostearicum with just five unique isolate genomes, only two of which are from skin. This work seeks first to characterize the distribution of Corynebacterium across 14 skin sites from 23 healthy volunteers. The second goal of this work focuses on what we identify as the predominant skin Corynebacterium species, C. tuberculostearicum . We have sequenced 23 distinct C. tuberculostearicum strains (n=40 genomes before dereplication), a five-fold increase in the number of publicly available, unique genomes (n=5). In addition to short-read assemblies, we generated five complete genomes which, along with the type strain (DSM44922), demonstrate that C. tuberculostearicum genomes are largely syntenic and carry a number of methylation systems as well as a CRISPR/Cas system. Genes from the C. tuberculostearicum genomes in our collection fall into 5451 gene clusters comprising the species pangenome. This expanded pangenome, as compared to existing public references, improved the mapping of C. tuberculostearicum metagenomic reads from unrelated healthy volunteers. In addition, we have identified a distinct C. tuberculostearicum clade that is highly enriched on the feet that may represent a new species, tentatively designated Corynebacterium hallux .}, }
@article {pmid37497030, year = {2023}, author = {Price, C and Russell, JA}, title = {AMAnD: an automated metagenome anomaly detection methodology utilizing DeepSVDD neural networks.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1181911}, pmid = {37497030}, issn = {2296-2565}, abstract = {The composition of metagenomic communities within the human body often reflects localized medical conditions such as upper respiratory diseases and gastrointestinal diseases. Fast and accurate computational tools to flag anomalous metagenomic samples from typical samples are desirable to understand different phenotypes, especially in contexts where repeated, long-duration temporal sampling is done. Here, we present Automated Metagenome Anomaly Detection (AMAnD), which utilizes two types of Deep Support Vector Data Description (DeepSVDD) models; one trained on taxonomic feature space output by the Pan-Genomics for Infectious Agents (PanGIA) taxonomy classifier and one trained on kmer frequency counts. AMAnD's semi-supervised one-class approach makes no assumptions about what an anomaly may look like, allowing the flagging of potentially novel anomaly types. Three diverse datasets are profiled. The first dataset is hosted on the National Center for Biotechnology Information's (NCBI) Sequence Read Archive (SRA) and contains nasopharyngeal swabs from healthy and COVID-19-positive patients. The second dataset is also hosted on SRA and contains gut microbiome samples from normal controls and from patients with slow transit constipation (STC). AMAnD can learn a typical healthy nasopharyngeal or gut microbiome profile and reliably flag the anomalous COVID+ or STC samples in both feature spaces. The final dataset is a synthetic metagenome created by the Critical Assessment of Metagenome Annotation Simulator (CAMISIM). A control dataset of 50 well-characterized organisms was submitted to CAMISIM to generate 100 synthetic control class samples. The experimental conditions included 12 different spiked-in contaminants that are taxonomically similar to organisms present in the laboratory blank sample ranging from one strain tree branch taxonomic distance away to one family tree branch taxonomic distance away. This experiment was repeated in triplicate at three different coverage levels to probe the dependence on sample coverage. AMAnD was again able to flag the contaminant inserts as anomalous. AMAnD's assumption-free flagging of metagenomic anomalies, the real-time model training update potential of the deep learning approach, and the strong performance even with lightweight models of low sample cardinality would make AMAnD well-suited to a wide array of applied metagenomics biosurveillance use-cases, from environmental to clinical utility.}, }
@article {pmid37494467, year = {2023}, author = {Ma, J and Cáceres, M and Salmela, L and Mäkinen, V and Tomescu, AI}, title = {Chaining for Accurate Alignment of Erroneous Long Reads to Acyclic Variation Graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btad460}, pmid = {37494467}, issn = {1367-4811}, abstract = {MOTIVATION: Aligning reads to a variation graph is a standard task in pangenomics, with downstream applications such as improving variant calling. While the vg toolkit (Garrison et al., 2018) is a popular aligner of short reads, GraphAligner (Rautiainen and Marschall, 2020) is the state-of-the-art aligner of erroneous long reads. GraphAligner works by finding candidate read occurrences based on individually extending the best seeds of the read in the variation graph. However, a more principled approach recognized in the community is to co-linearly chain multiple seeds.
RESULTS: We present a new algorithm to co-linearly chain a set of seeds in a string labeled acyclic graph, together with the first efficient implementation of such a co-linear chaining algorithm into a new aligner of erroneous long reads to acyclic variation graphs, GraphChainer. We run experiments aligning real and simulated PacBio CLR reads with average error rates 15% and 5%. Compared to GraphAligner, GraphChainer aligns 12% to 17% more reads, and 21% to 28% more total read length, on real PacBio CLR reads from human chromosomes 1, 22 and the whole human pangenome. On both simulated and real data, GraphChainer aligns between 95% and 99% of all reads, and of total read length. We also show that minigraph (Li et al., 2020) and minichain (Chandra and Jain, 2023) obtain an accuracy of less than 60% on this setting.
AVAILABILITY: GraphChainer is freely available at https://github.com/algbio/GraphChainer. The datasets and evaluation pipeline can be reached from the previous address.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid37492100, year = {2023}, author = {Frazer, KA and Schork, NJ}, title = {The human pangenome reference anticipates equitable and fundamental genomic insights.}, journal = {Cell genomics}, volume = {3}, number = {7}, pages = {100360}, pmid = {37492100}, issn = {2666-979X}, abstract = {For the past few years, researchers in the Human Pangenome Reference Consortium (HPRC) have been working to catalog almost all human genomic diversity. Frazer and Schork preview an article recently published in Nature, "A draft human pangenome reference,"[1] which represents the initial release of 47 fully phased diploid assemblies of genomes of individuals with diverse ancestries.}, }
@article {pmid37491415, year = {2023}, author = {Matrishin, CB and Haase, EM and Dewhirst, FE and Mark Welch, JL and Miranda-Sanchez, F and Chen, T and MacFarland, DC and Kauffman, KM}, title = {Phages are unrecognized players in the ecology of the oral pathogen Porphyromonas gingivalis.}, journal = {Microbiome}, volume = {11}, number = {1}, pages = {161}, pmid = {37491415}, issn = {2049-2618}, support = {T32DE023526/DE/NIDCR NIH HHS/United States ; R01DE016937/DE/NIDCR NIH HHS/United States ; R01DE016937/DE/NIDCR NIH HHS/United States ; R01DE016937/DE/NIDCR NIH HHS/United States ; }, abstract = {BACKGROUND: Porphyromonas gingivalis (hereafter "Pg") is an oral pathogen that has been hypothesized to act as a keystone driver of inflammation and periodontal disease. Although Pg is most readily recovered from individuals with actively progressing periodontal disease, healthy individuals and those with stable non-progressing disease are also colonized by Pg. Insights into the factors shaping the striking strain-level variation in Pg, and its variable associations with disease, are needed to achieve a more mechanistic understanding of periodontal disease and its progression. One of the key forces often shaping strain-level diversity in microbial communities is infection of bacteria by their viral (phage) predators and symbionts. Surprisingly, although Pg has been the subject of study for over 40 years, essentially nothing is known of its phages, and the prevailing paradigm is that phages are not important in the ecology of Pg.
RESULTS: Here we systematically addressed the question of whether Pg are infected by phages-and we found that they are. We found that prophages are common in Pg, they are genomically diverse, and they encode genes that have the potential to alter Pg physiology and interactions. We found that phages represent unrecognized targets of the prevalent CRISPR-Cas defense systems in Pg, and that Pg strains encode numerous additional mechanistically diverse candidate anti-phage defense systems. We also found that phages and candidate anti-phage defense system elements together are major contributors to strain-level diversity and the species pangenome of this oral pathogen. Finally, we demonstrate that prophages harbored by a model Pg strain are active in culture, producing extracellular viral particles in broth cultures.
CONCLUSION: This work definitively establishes that phages are a major unrecognized force shaping the ecology and intra-species strain-level diversity of the well-studied oral pathogen Pg. The foundational phage sequence datasets and model systems that we establish here add to the rich context of all that is already known about Pg, and point to numerous avenues of future inquiry that promise to shed new light on fundamental features of phage impacts on human health and disease broadly. Video Abstract.}, }
@article {pmid37491393, year = {2023}, author = {Cho, MK and Fullerton, SM and Hammonds, EM and Lee, SS and Panofsky, A and Reardon, J}, title = {Pangenomics: prioritize diversity in collaborations.}, journal = {Nature}, volume = {619}, number = {7971}, pages = {698}, doi = {10.1038/d41586-023-02248-7}, pmid = {37491393}, issn = {1476-4687}, }
@article {pmid37490004, year = {2023}, author = {Wu, S and Sun, H and Gao, L and Branham, S and McGregor, C and Renner, SS and Xu, Y and Kousik, C and Wechter, WP and Levi, A and Fei, Z}, title = {A Citrullus genus super-pangenome reveals extensive variations in wild and cultivated watermelons and sheds light on watermelon evolution and domestication.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14120}, pmid = {37490004}, issn = {1467-7652}, support = {2015-51181-24285//National Institute of Food and Agriculture/ ; 2020-51181-32139//National Institute of Food and Agriculture/ ; 1855585//National Science Foundation/ ; }, }
@article {pmid37487084, year = {2023}, author = {Bozan, I and Achakkagari, SR and Anglin, NL and Ellis, D and Tai, HH and Strömvik, MV}, title = {Pangenome analyses reveal impact of transposable elements and ploidy on the evolution of potato species.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {31}, pages = {e2211117120}, doi = {10.1073/pnas.2211117120}, pmid = {37487084}, issn = {1091-6490}, support = {GQ-AAC-2019-2//Génome Québec (GQ)/ ; J-002367//Gouvernement du Canada | Agriculture and Agri-Food Canada (AAFC)/ ; The Potato Genome Diversity Portal//Compute Canada (Calcul Canada)/ ; Structural variation analyses of complex plant genomes in search of climate smart adaptations//Compute Canada (Calcul Canada)/ ; }, abstract = {Potato (Solanum sp., family Solanaceae) is the most important noncereal food crop globally. It has over 100 wild relatives in the Solanum section Petota, which features species with both sexual and asexual reproduction and varying ploidy levels. A pangenome of Solanum section Petota composed of 296 accessions was constructed including diploids and polyploids compared via presence/absence variation (PAV). The Petota core (genes shared by at least 97% of the accessions) and shell genomes (shared by 3 to 97%) are enriched in basic molecular and cellular functions, while the cloud genome (genes present in less than 3% of the member accessions) showed enrichment in transposable elements (TEs). Comparison of PAV in domesticated vs. wild accessions was made, and a phylogenetic tree was constructed based on PAVs, grouping accessions into different clades, similar to previous phylogenies produced using DNA markers. A cladewise pangenome approach identified abiotic stress response among the core genes in clade 1+2 and clade 3, and flowering/tuberization among the core genes in clade 4. The TE content differed between the clades, with clade 1+2, which is composed of species from North and Central America with reproductive isolation from species in other clades, having much lower TE content compared to other clades. In contrast, accessions with in vitro propagation history were identified and found to have high levels of TEs. Results indicate a role for TEs in adaptation to new environments, both natural and artificial, for Solanum section Petota.}, }
@article {pmid37485508, year = {2023}, author = {Liu, W and Ou, P and Tian, F and Liao, J and Ma, Y and Wang, J and Jin, X}, title = {Anti-Vibrio parahaemolyticus compounds from Streptomyces parvus based on Pan-genome and subtractive proteomics.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1218176}, pmid = {37485508}, issn = {1664-302X}, abstract = {INTRODUCTION: Vibrio parahaemolyticus is a foodborne pathogen commonly found in seafood, and drug resistance poses significant challenges to its control. This study aimed to identify novel drug targets for antibacterial drug discovery.
METHODS: To identify drug targets, we performed a pan-genome analysis on 58 strains of V. parahaemolyticus genomes to obtain core genes. Subsequently, subtractive proteomics and physiochemical checks were conducted on the core proteins to identify potential therapeutic targets. Molecular docking was then employed to screen for anti-V. parahaemolyticus compounds using a in-house compound library of Streptomyces parvus, chosen based on binding energy. The anti-V. parahaemolyticus efficacy of the identified compounds was further validated through a series of experimental tests.
RESULTS AND DISCUSSION: Pangenome analysis of 58 V. parahaemolyticus genomes revealed that there were 1,392 core genes. After Subtractive proteomics and physiochemical checks, Flagellar motor switch protein FliN was selected as a therapeutic target against V. parahaemolyticus. FliN was modeled and docked with Streptomyces parvus source compounds, and Actinomycin D was identified as a potential anti-V. parahaemolyticus agent with a strong binding energy. Experimental verification confirmed its effectiveness in killing V. parahaemolyticus and significantly inhibiting biofilm formation and motility. This study is the first to use pan-genome and subtractive proteomics to identify new antimicrobial targets for V. parahaemolyticus and to identify the anti-V. parahaemolyticus effect of Actinomycin D. These findings suggest potential avenues for the development of new antibacterial drugs to control V. parahaemolyticus infections.}, }
@article {pmid37480395, year = {2023}, author = {Tanuku, SNR and Pinnaka, AK and Behera, S and Singh, A and Pydi, S and Vasudeva, G and Vaidya, B and Sharma, G and Ganta, SK and Garbhapu, NS}, title = {Marinobacterium lacunae sp. nov. isolated from estuarine sediment.}, journal = {Archives of microbiology}, volume = {205}, number = {8}, pages = {294}, pmid = {37480395}, issn = {1432-072X}, support = {GAP3195//Ministry of Earth Sciences/ ; }, abstract = {A novel motile bacterium was isolated from a sediment sample collected in Kochi backwaters, Kerala, India. This bacterium is Gram negative, rod shaped, 1.0-1.5 µm wide, and 2.0-3.0 µm long. It was designated as strain AK27[T]. Colonies were grown on marine agar displayed circular, off-white, shiny, moist, translucent, flat, margin entire, 1-2 mm in diameter. The major fatty acids identified in this strain were C18:1 ω7c, C16:0, and summed in feature 3. The composition of polar lipids in the strain AK27[T] included phosphatidylglycerol, phosphatidylethanolamine, diphosphatidylglycerol, one unidentified amino lipid, two unidentified aminophospholipids, two unidentified phospholipids, and six unidentified lipids. The genomic DNA of strain AK27[T] exhibited a G+C content of 56.4 mol%. Based on the analysis of 16S rRNA gene sequence, strain AK27[T] showed sequence similarity to M. ramblicola D7[T] and M. zhoushanense WM3[T] as 98.99% and 98.58%, respectively. Compared to other type strains of the Marinobacterium genus, strain AK27[T] exhibited sequence similarities ranging from 91.7% to 96.4%. When compared to Marinobacterium zhoushanense WM3[T] and Marinobacterium ramblicola D7[T], strain AK27[T] exhibited average nucleotide identity values of 80.25% and 79.97%, and dDDH values of 22.9% and 22.6%, respectively. The genome size of the strain AK27[T] was 4.55 Mb, with 4,229 coding sequences. Based on the observed phenotypic and chemotaxonomic features, and the results of phylogenetic and phylogenomic analysis, this study proposes the classification of strain AK27[T] as a novel species within the genus Marinobacterium. The proposed name for this novel species is Marinobacterium lacunae sp. nov.}, }
@article {pmid37477947, year = {2023}, author = {Lyu, X and Xia, Y and Wang, C and Zhang, K and Deng, G and Shen, Q and Gao, W and Zhang, M and Liao, N and Ling, J and Bo, Y and Hu, Z and Yang, J and Zhang, M}, title = {Pan-genome analysis sheds light on structural variation-based dissection of agronomic traits in melon crops.}, journal = {Plant physiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/plphys/kiad405}, pmid = {37477947}, issn = {1532-2548}, abstract = {Sweetness and appearance of fresh fruits are key palatable and preference attributes for consumers and are often controlled by multiple genes. However, fine-mapping the key loci or genes of interest by single genome-based genetic analysis is challenging. Herein, we present the chromosome-level genome assembly of one landrace melon accession (Cucumis melo ssp. agrestis) with wild morphologic features and thus construct a melon pan-genome atlas via integrating sequenced melon genome datasets. Our comparative genomic analysis reveals a total of 3.4 million genetic variations, of which the presence/absence variations (PAVs) are mainly involved in regulating the function of genes for sucrose metabolism during melon domestication and improvement. We further resolved several loci that are accountable for sucrose contents, flesh color, rind stripe and suture using a structural variation (SV)-based genome-wide association study (GWAS). Furthermore, via BSA-seq and map-based cloning, we uncovered that a single gene, (CmPIRL6), determines the edible or inedible characteristics of melon fruit exocarp. These findings provide important melon pan-genome information and provide a powerful toolkit for future pan-genome-informed cultivar breeding of melon.}, }
@article {pmid37476668, year = {2023}, author = {Agarwal, V and Stubits, R and Nassrullah, Z and Dillon, MM}, title = {Pangenome insights into the diversification and disease specificity of worldwide Xanthomonas outbreaks.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1213261}, pmid = {37476668}, issn = {1664-302X}, abstract = {The bacterial genus Xanthomonas is responsible for disease outbreaks in several hundred plant species, many of them economically important crops. In the era of next-generation sequencing, thousands of strains from this genus have now been sequenced as part of isolated studies that focus on outbreak characterization, host range, diversity, and virulence factor identification. However, these data have not been synthesized and we lack a comprehensive phylogeny for the genus, with some species designations in public databases still relying on phenotypic similarities and representative sequence typing. The extent of genetic cohesiveness among Xanthomonas strains, the distribution of virulence factors across strains, and the impact of evolutionary history on host range across the genus are also poorly understood. In this study, we present a pangenome analysis of 1,910 diverse Xanthomonas genomes, highlighting their evolutionary relationships, the distribution of virulence-associated genes across strains, and rates of horizontal gene transfer. We find a number of broadly conserved classes of virulence factors and considerable diversity in the Type 3 Secretion Systems (T3SSs) and Type 3 Secreted Effector (T3SE) repertoires of different Xanthomonas species. We also use these data to re-assign incorrectly classified strains to phylogenetically informed species designations and find evidence of both monophyletic host specificity and convergent evolution of phylogenetically distant strains to the same host. Finally, we explore the role of recombination in maintaining genetic cohesion within the Xanthomonas genus as a result of both ancestral and recent recombination events. Understanding the evolutionary history of Xanthomonas species and the relationship of key virulence factors with host-specificity provides valuable insight into the mechanisms through which Xanthomonas species shift between hosts and will enable us to develop more robust resistance strategies against these highly virulent pathogens.}, }
@article {pmid37474912, year = {2023}, author = {Ortega-Sanz, I and Barbero-Aparicio, JA and Canepa-Oneto, A and Rovira, J and Melero, B}, title = {CamPype: an open-source workflow for automated bacterial whole-genome sequencing analysis focused on Campylobacter.}, journal = {BMC bioinformatics}, volume = {24}, number = {1}, pages = {291}, pmid = {37474912}, issn = {1471-2105}, support = {LCF/PR/PR18/51130007//"la Caixa" Foundation/ ; }, abstract = {BACKGROUND: The rapid expansion of Whole-Genome Sequencing has revolutionized the fields of clinical and food microbiology. However, its implementation as a routine laboratory technique remains challenging due to the growth of data at a faster rate than can be effectively analyzed and critical gaps in bioinformatics knowledge.
RESULTS: To address both issues, CamPype was developed as a new bioinformatics workflow for the genomics analysis of sequencing data of bacteria, especially Campylobacter, which is the main cause of gastroenteritis worldwide making a negative impact on the economy of the public health systems. CamPype allows fully customization of stages to run and tools to use, including read quality control filtering, read contamination, reads extension and assembly, bacterial typing, genome annotation, searching for antibiotic resistance genes, virulence genes and plasmids, pangenome construction and identification of nucleotide variants. All results are processed and resumed in an interactive HTML report for best data visualization and interpretation.
CONCLUSIONS: The minimal user intervention of CamPype makes of this workflow an attractive resource for microbiology laboratories with no expertise in bioinformatics as a first line method for bacterial typing and epidemiological analyses, that would help to reduce the costs of disease outbreaks, or for comparative genomic analyses. CamPype is publicly available at https://github.com/JoseBarbero/CamPype .}, }
@article {pmid37474911, year = {2023}, author = {Huff, M and Hulse-Kemp, AM and Scheffler, BE and Youngblood, RC and Simpson, SA and Babiker, E and Staton, M}, title = {Long-read, chromosome-scale assembly of Vitis rotundifolia cv. Carlos and its unique resistance to Xylella fastidiosa subsp. fastidiosa.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {409}, pmid = {37474911}, issn = {1471-2164}, support = {6062-21000-010-013//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-013//USDA-ARS/ ; }, abstract = {BACKGROUND: Muscadine grape (Vitis rotundifolia) is resistant to many of the pathogens that negatively impact the production of common grape (V. vinifera), including the bacterial pathogen Xylella fastidiosa subsp. fastidiosa (Xfsf), which causes Pierce's Disease (PD). Previous studies in common grape have indicated Xfsf delays host immune response with a complex O-chain antigen produced by the wzy gene. Muscadine cultivars range from tolerant to completely resistant to Xfsf, but the mechanism is unknown.
RESULTS: We assembled and annotated a new, long-read genome assembly for 'Carlos', a cultivar of muscadine that exhibits tolerance, to build upon the existing genetic resources available for muscadine. We used these resources to construct an initial pan-genome for three cultivars of muscadine and one cultivar of common grape. This pan-genome contains a total of 34,970 synteny-constrained entries containing genes of similar structure. Comparison of resistance gene content between the 'Carlos' and common grape genomes indicates an expansion of resistance (R) genes in 'Carlos.' We further identified genes involved in Xfsf response by transcriptome sequencing 'Carlos' plants inoculated with Xfsf. We observed 234 differentially expressed genes with functions related to lipid catabolism, oxidation-reduction signaling, and abscisic acid (ABA) signaling as well as seven R genes. Leveraging public data from previous experiments of common grape inoculated with Xfsf, we determined that most differentially expressed genes in the muscadine response were not found in common grape, and three of the R genes identified as differentially expressed in muscadine do not have an ortholog in the common grape genome.
CONCLUSIONS: Our results support the utility of a pan-genome approach to identify candidate genes for traits of interest, particularly disease resistance to Xfsf, within and between muscadine and common grape.}, }
@article {pmid37465028, year = {2023}, author = {Thieringer, PH and Boyd, ES and Templeton, AS and Spear, JR}, title = {Metapangenomic investigation provides insight into niche differentiation of methanogenic populations from the subsurface serpentinizing environment, Samail Ophiolite, Oman.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1205558}, pmid = {37465028}, issn = {1664-302X}, abstract = {Serpentinization reactions produce highly reduced waters that have hyperalkaline pH and that can have high concentrations of H2 and CH4. Putatively autotrophic methanogenic archaea have been identified in the subsurface waters of the Samail Ophiolite, Sultanate of Oman, though the strategies to overcome hyperalkaline pH and dissolved inorganic carbon limitation remain to be fully understood. Here, we recovered metagenome assembled genomes (MAGs) and applied a metapangenomic approach to three different Methanobacterium populations to assess habitat-specific functional gene distribution. A Type I population was identified in the fluids with neutral pH, while a Type II and "Mixed" population were identified in the most hyperalkaline fluids (pH 11.63). The core genome of all Methanobacterium populations highlighted potential DNA scavenging techniques to overcome phosphate or nitrogen limitation induced by environmental conditions. With particular emphasis on the Mixed and Type II population found in the most hyperalkaline fluids, the accessory genomes unique to each population reflected adaptation mechanisms suggesting lifestyles that minimize niche overlap. In addition to previously reported metabolic capability to utilize formate as an electron donor and generate intracellular CO2, the Type II population possessed genes relevant to defense against antimicrobials and assimilating potential osmoprotectants to provide cellular stability. The accessory genome of the Mixed population was enriched in genes for multiple glycosyltransferases suggesting reduced energetic costs by adhering to mineral surfaces or to other microorganisms, and fostering a non-motile lifestyle. These results highlight the niche differentiation of distinct Methanobacterium populations to circumvent the challenges of serpentinization impacted fluids through coexistence strategies, supporting our ability to understand controls on methanogenic lifestyles and adaptations within the serpentinizing subsurface fluids of the Samail Ophiolite.}, }
@article {pmid37464310, year = {2023}, author = {Singh, RP and Kumari, K and Sharma, PK and Ma, Y}, title = {Characterization and in-depth genome analysis of a halotolerant probiotic bacterium Paenibacillus sp. S-12, a multifarious bacterium isolated from Rauvolfia serpentina.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {192}, pmid = {37464310}, issn = {1471-2180}, support = {BT/RLF/2020-21//Department of Biotechnology, Ministry of Science and Technology, India/ ; }, abstract = {BACKGROUND: Members of Paenibacillus genus from diverse habitats have attracted great attention due to their multifarious properties. Considering that members of this genus are mostly free-living in soil, we characterized the genome of a halotolerant environmental isolate belonging to the genus Paenibacillus. The genome mining unravelled the presence of CAZymes, probiotic, and stress-protected genes that suggested strain S-12 for industrial and agricultural purposes.
RESULTS: Molecular identification by 16 S rRNA gene sequencing showed its closest match to other Paenibacillus species. The complete genome size of S-12 was 5.69 Mb, with a GC-content 46.5%. The genome analysis of S-12 unravelled the presence of an open reading frame (ORF) encoding the functions related to environmental stress tolerance, adhesion processes, multidrug efflux systems, and heavy metal resistance. Genome annotation identified the various genes for chemotaxis, flagellar motility, and biofilm production, illustrating its strong colonization ability.
CONCLUSION: The current findings provides the in-depth investigation of a probiotic Paenibacillus bacterium that possessed various genome features that enable the bacterium to survive under diverse conditions. The strain shows the strong ability for probiotic application purposes.}, }
@article {pmid37461539, year = {2023}, author = {Steenwyk, JL and Knowles, S and Bastos, RW and Balamurugan, C and Rinker, D and Mead, ME and Roberts, CD and Raja, HA and Li, Y and Colabardini, AC and de Castro, PA and Dos Reis, TF and Canóvas, D and Sanchez, RL and Lagrou, K and Torrado, E and Rodrigues, F and Oberlies, NH and Zhou, X and Goldman, GH and Rokas, A}, title = {Evolutionary origin, population diversity, and diagnostics for a cryptic hybrid pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.07.03.547508}, pmid = {37461539}, abstract = {Cryptic fungal pathogens pose significant identification and disease management challenges due to their morphological resemblance to known pathogenic species while harboring genetic and (often) infection-relevant trait differences. The cryptic fungal pathogen Aspergillus latus , an allodiploid hybrid originating from Aspergillus spinulosporus and an unknown close relative of Aspergillus quadrilineatus within section Nidulantes , remains poorly understood. The absence of accurate diagnostics for A. latus has led to misidentifications, hindering epidemiological studies and the design of effective treatment plans. We conducted an in-depth investigation of the genomes and phenotypes of 44 globally distributed isolates (41 clinical isolates and three type strains) from Aspergillus section Nidulantes . We found that 21 clinical isolates were A. latus ; notably, standard methods of pathogen identification misidentified all A. latus isolates. The remaining isolates were identified as A. spinulosporus (8), A. quadrilineatus (1), or A. nidulans (11). Phylogenomic analyses shed light on the origin of A. latus , indicating one or two hybridization events gave rise to the species during the Miocene, approximately 15.4 to 8.8 million years ago. Characterizing the A. latus pangenome uncovered substantial genetic diversity within gene families and biosynthetic gene clusters. Transcriptomic analysis revealed that both parental genomes are actively expressed in nearly equal proportions and respond to environmental stimuli. Further investigation into infection-relevant chemical and physiological traits, including drug resistance profiles, growth under oxidative stress conditions, and secondary metabolite biosynthesis, highlight distinct phenotypic profiles of the hybrid A. latus compared to its parental and closely related species. Leveraging our comprehensive genomic and phenotypic analyses, we propose five genomic and phenotypic markers as diagnostics for A. latus species identification. These findings provide valuable insights into the evolutionary origin, genomic outcome, and phenotypic implications of hybridization in a cryptic fungal pathogen, thus enhancing our understanding of the underlying processes contributing to fungal pathogenesis. Furthermore, our study underscores the effectiveness of extensive genomic and phenotypic analyses as a promising approach for developing diagnostics applicable to future investigations of cryptic and emerging pathogens.}, }
@article {pmid37460717, year = {2023}, author = {Kumari, K and Sharma, PK and Shikha, S and Singh, RP}, title = {Molecular characterization and in-depth genome analysis of Enterobacter sp. S-16.}, journal = {Functional & integrative genomics}, volume = {23}, number = {3}, pages = {245}, pmid = {37460717}, issn = {1438-7948}, abstract = {Enterobacter species are considered to be an opportunistic human pathogen owing to the existence of antibiotic-resistant strains and drug resides; however, the detailed analysis of the antibiotic resistance and virulence features in environmental isolates is poorly characterized. Here, in the study, we characterized the biochemical characteristics, and genome, pan-genome, and comparative genome analyses of an environmental isolate Enterobacter sp. S-16. The strain was identified as Enterobacter spp. by using 16S rRNA gene sequencing. To unravel genomic features, whole genome of Enterobacter sp. S-16 was sequenced using a hybrid assembly approach and genome assembly was performed using the Unicycler tool. The assembled genome contained the single conting size 5.3 Mbp, GC content 55.43%, and 4500 protein-coding genes. The genome analysis revealed the various gene clusters associated with virulence, antibiotic resistance, type VI secretion system (T6SS), and many stress tolerant genes, which may provide important insight for adapting to changing environment conditions. Moreover, different metabolic pathways were identified that potentially contribute to environmental survival. Various hydrolytic enzymes and motility functions equipped the strain S-16 as an active colonizer. The genome analysis confirms the presence of carbohydrate-active enzymes (CAZymes), and non-enzymatic carbohydrate-binding modules (CBMs) involved in the hydrolysis of complex carbohydrate polymers. Moreover, the pan-genome analysis provides detailed information about the core genes and shared genes with the closest related Enterobacter species. The present study is the first report showing the presence of YdhE/NorM in Enterobacter spp. Thus, the elucidation of genome sequencing will increase our understanding of the pathogenic nature of environmental isolate, supporting the One Health Concept.}, }
@article {pmid37449094, year = {2023}, author = {Buzzanca, D and Kerkhof, PJ and Alessandria, V and Rantsiou, K and Houf, K}, title = {Arcobacteraceae comparative genome analysis demonstrates genome heterogeneity and reduction in species isolated from animals and associated with human illness.}, journal = {Heliyon}, volume = {9}, number = {7}, pages = {e17652}, doi = {10.1016/j.heliyon.2023.e17652}, pmid = {37449094}, issn = {2405-8440}, abstract = {The Arcobacteraceae family groups Gram-negative bacterial species previously included in the family Campylobacteraceae. These species of which some are considered foodborne pathogens, have been isolated from different environmental niches and hosts. They have been isolated from various types of foods, though predominantly from food of animal origin, as well as from stool of humans with enteritis. Their different abilities to survive in different hosts and environments suggest an evolutionary pressure with consequent variation in their genome content. Moreover, their different physiological and genomic characteristics led to the recent proposal to create new genera within this family, which is however criticized due to the lack of discriminatory features and biological and clinical relevance. Aims of the present study were to assess the Arcobacteraceae pangenome, and to characterize existing similarities and differences in 20 validly described species. For this, analysis has been conducted on the genomes of the corresponding type strains obtained by Illumina sequencing, applying several bioinformatic tools. Results of the present study do not support the proposed division into different genera and revealed the presence of pangenome partitions with numbers comparable to other Gram-negative bacteria genera, such as Campylobacter. Different gene class compositions in animal and human-associated species are present, including a higher percentage of virulence-related gene classes such as cell motility genes. The adaptation to environmental and/or host conditions of some species was identified by the presence of specific genes. Furthermore, a division into pathogenic and non-pathogenic species is suggested, which can support future research on food safety and public health.}, }
@article {pmid37446042, year = {2023}, author = {Arifuzzaman, M and Jost, M and Wang, M and Chen, X and Perovic, D and Park, RF and Rouse, M and Forrest, K and Hayden, M and Khan, GA and Dracatos, PM}, title = {Mining the Australian Grains Gene Bank for Rust Resistance in Barley.}, journal = {International journal of molecular sciences}, volume = {24}, number = {13}, pages = {}, doi = {10.3390/ijms241310860}, pmid = {37446042}, issn = {1422-0067}, abstract = {Global barley production is threatened by plant pathogens, especially the rusts. In this study we used a targeted genotype-by-sequencing (GBS) assisted GWAS approach to identify rust resistance alleles in a collection of 287 genetically distinct diverse barley landraces and historical cultivars available in the Australian Grains Genebank (AGG) and originally sourced from Eastern Europe. The accessions were challenged with seven US-derived cereal rust pathogen races including Puccinia hordei (Ph-leaf rust) race 17VA12C, P. coronata var. hordei (Pch-crown rust) race 91NE9305 and five pathogenically diverse races of P. striiformis f. sp. hordei (Psh-stripe rust) (PSH-33, PSH-48, PSH-54, PSH-72 and PSH-100) and phenotyped quantitatively at the seedling stage. Novel resistance factors were identified on chromosomes 1H, 2H, 4H and 5H in response to Pch, whereas a race-specific QTL on 7HS was identified that was effective only to Psh isolates PSH-72 and PSH-100. A major effect QTL on chromosome 5HL conferred resistance to all Psh races including PSH-72, which is virulent on all 12 stripe rust differential tester lines. The same major effect QTL was also identified in response to leaf rust (17VA12C) suggesting this locus contains several pathogen specific rust resistance genes or the same gene is responsible for both leaf rust and stripe rust resistance. Twelve accessions were highly resistant to both leaf and stripe rust diseases and also carried the 5HL QTL. We subsequently surveyed the physical region at the 5HL locus for across the barley pan genome variation in the presence of known resistance gene candidates and identified a rich source of high confidence protein kinase and antifungal genes in the QTL region.}, }
@article {pmid37435610, year = {2023}, author = {Deverka, P and Geary, J and Mathews, C and Cohen, M and Hooker, G and Majumder, M and Skvarkova, Z and Cook-Deegan, R}, title = {Payer reimbursement practices and incentives for improving interpretation of germline genetic testing.}, journal = {Journal of law and the biosciences}, volume = {10}, number = {2}, pages = {lsad020}, doi = {10.1093/jlb/lsad020}, pmid = {37435610}, issn = {2053-9711}, abstract = {Germline genetic testing for inherited cancer risk has shifted to multi-gene panel tests (MGPTs). While MGPTs detect more pathogenic variants, they also detect more variants of uncertain significance (VUSs) that increase the possibility of harms such as unnecessary surgery. Data sharing by laboratories is critical to addressing the VUS problem. However, barriers to sharing and an absence of incentives have limited laboratory contributions to the ClinVar database. Payers can play a crucial role in the expansion of knowledge and effectiveness of genetic testing. Current policies affecting MGPT reimbursement are complex and create perverse incentives. Trends in utilization and coverage for private payers and Medicare illustrate opportunities and challenges for data sharing to close knowledge gaps and improve clinical utility. Policy options include making data sharing (i) a condition of payment, and (ii) a metric of laboratory quality in payment contracts, yielding preferred coverage or enhanced reimbursement. Mandating data sharing sufficient to verify interpretations and resolve discordance among labs under Medicare and federal health programs is an option for the US Congress. Such policies can reduce the current waste of valuable data needed for precision oncology and improved patient outcomes, enabling a learning health system.}, }
@article {pmid37434713, year = {2023}, author = {Batarseh, TN and Batarseh, SN and Morales-Cruz, A and Gaut, BS}, title = {Comparative genomics of the Liberibacter genus reveals widespread diversity in genomic content and positive selection history.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1206094}, pmid = {37434713}, issn = {1664-302X}, abstract = {'Candidatus Liberibacter' is a group of bacterial species that are obligate intracellular plant pathogens and cause Huanglongbing disease of citrus trees and Zebra Chip in potatoes. Here, we examined the extent of intra- and interspecific genetic diversity across the genus using comparative genomics. Our approach examined a wide set of Liberibacter genome sequences including five pathogenic species and one species not known to cause disease. By performing comparative genomics analyses, we sought to understand the evolutionary history of this genus and to identify genes or genome regions that may affect pathogenicity. With a set of 52 genomes, we performed comparative genomics, measured genome rearrangement, and completed statistical tests of positive selection. We explored markers of genetic diversity across the genus, such as average nucleotide identity across the whole genome. These analyses revealed the highest intraspecific diversity amongst the 'Ca. Liberibacter solanacearum' species, which also has the largest plant host range. We identified sets of core and accessory genes across the genus and within each species and measured the ratio of nonsynonymous to synonymous mutations (dN/dS) across genes. We identified ten genes with evidence of a history of positive selection in the Liberibacter genus, including genes in the Tad complex, which have been previously implicated as being highly divergent in the 'Ca. L. capsica' species based on high values of dN.}, }
@article {pmid37433982, year = {2023}, author = {Attwaters, M}, title = {A diverse and inclusive human pangenome.}, journal = {Nature reviews. Genetics}, volume = {}, number = {}, pages = {}, pmid = {37433982}, issn = {1471-0064}, }
@article {pmid37431308, year = {2023}, author = {Amas, JC and Bayer, PE and Hong Tan, W and Tirnaz, S and Thomas, WJW and Edwards, D and Batley, J}, title = {Comparative pangenome analyses provide insights into the evolution of Brassica rapa resistance gene analogues (RGAs).}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14116}, pmid = {37431308}, issn = {1467-7652}, support = {DP200100762//Australian Research Council/ ; DP210100296//Australian Research Council/ ; UWA1905- 006RTX//Grains Research and Development Corporation/ ; }, abstract = {Brassica rapa is grown worldwide as economically important vegetable and oilseed crop. However, its production is challenged by yield-limiting pathogens. The sustainable control of these pathogens mainly relies on the deployment of genetic resistance primarily driven by resistance gene analogues (RGAs). While several studies have identified RGAs in B. rapa, these were mainly based on a single genome reference and do not represent the full range of RGA diversity in B. rapa. In this study, we utilized the B. rapa pangenome, constructed from 71 lines encompassing 12 morphotypes, to describe a comprehensive repertoire of RGAs in B. rapa. We show that 309 RGAs were affected by presence-absence variation (PAV) and 223 RGAs were missing from the reference genome. The transmembrane leucine-rich repeat (TM-LRR) RGA class had more core gene types than variable genes, while the opposite was observed for nucleotide-binding site leucine-rich repeats (NLRs). Comparative analysis with the B. napus pangenome revealed significant RGA conservation (93%) between the two species. We identified 138 candidate RGAs located within known B. rapa disease resistance QTL, of which the majority were under negative selection. Using blackleg gene homologues, we demonstrated how these genes in B. napus were derived from B. rapa. This further clarifies the genetic relationship of these loci, which may be useful in narrowing-down candidate blackleg resistance genes. This study provides a novel genomic resource towards the identification of candidate genes for breeding disease resistance in B. rapa and its relatives.}, }
@article {pmid37430957, year = {2022}, author = {Rani, A and Dike, CC and Mantri, N and Ball, A}, title = {Point-of-Care Lateral Flow Detection of Viable Escherichia coli O157:H7 Using an Improved Propidium Monoazide-Recombinase Polymerase Amplification Method.}, journal = {Foods (Basel, Switzerland)}, volume = {11}, number = {20}, pages = {}, doi = {10.3390/foods11203207}, pmid = {37430957}, issn = {2304-8158}, abstract = {The detection of both viable and viable but non-culturable (VBNC) Escherichia coli O157:H7 is a crucial part of food safety. Traditional culture-dependent methods are lengthy, expensive, laborious, and unable to detect VBNC. Hence, there is a need to develop a rapid, simple, and cost-effective detection method to differentiate between viable/dead E. coli O157:H7 and detect VBNC cells. In this work, recombinase polymerase amplification (RPA) was developed for the detection of viable E. coli O157:H7 through integration with propidium monoazide (PMAxx). Initially, two primer sets, targeting two different genes (rfbE and stx) were selected, and DNA amplification by RPA combined with PMAxx treatment and the lateral flow assay (LFA) was carried out. Subsequently, the rfbE gene target was found to be more effective in inhibiting the amplification from dead cells and detecting only viable E. coli O157:H7. The assay's detection limit was found to be 10[2] CFU/mL for VBNC E. coli O157:H7 when applied to spiked commercial beverages including milk, apple juice, and drinking water. pH values from 3 to 11 showed no significant effect on the efficacy of the assay. The PMAxx-RPA-LFA was completed at 39 °C within 40 min. This study introduces a rapid, robust, reliable, and reproducible method for detecting viable bacterial counts. In conclusion, the optimised assay has the potential to be used by the food and beverage industry in quality assurance related to E. coli O157:H7.}, }
@article {pmid37429841, year = {2023}, author = {Tisza, MJ and Smith, DDN and Clark, AE and Youn, JH and , and Khil, PP and Dekker, JP}, title = {Roving methyltransferases generate a mosaic epigenetic landscape and influence evolution in Bacteroides fragilis group.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {4082}, pmid = {37429841}, issn = {2041-1723}, abstract = {Three types of DNA methyl modifications have been detected in bacterial genomes, and mechanistic studies have demonstrated roles for DNA methylation in physiological functions ranging from phage defense to transcriptional control of virulence and host-pathogen interactions. Despite the ubiquity of methyltransferases and the immense variety of possible methylation patterns, epigenomic diversity remains unexplored for most bacterial species. Members of the Bacteroides fragilis group (BFG) reside in the human gastrointestinal tract as key players in symbiotic communities but also can establish anaerobic infections that are increasingly multi-drug resistant. In this work, we utilize long-read sequencing technologies to perform pangenomic (n = 383) and panepigenomic (n = 268) analysis of clinical BFG isolates cultured from infections seen at the NIH Clinical Center over four decades. Our analysis reveals that single BFG species harbor hundreds of DNA methylation motifs, with most individual motif combinations occurring uniquely in single isolates, implying immense unsampled methylation diversity within BFG epigenomes. Mining of BFG genomes identified more than 6000 methyltransferase genes, approximately 1000 of which were associated with intact prophages. Network analysis revealed substantial gene flow among disparate phage genomes, implying a role for genetic exchange between BFG phages as one of the ultimate sources driving BFG epigenome diversity.}, }
@article {pmid37424551, year = {2023}, author = {Narayanan, S and Couger, B and Bates, H and Gupta, SK and Malayer, J and Ramachandran, A}, title = {Characterization of three Francisella tularensis genomes from Oklahoma, USA.}, journal = {Access microbiology}, volume = {5}, number = {6}, pages = {acmi000451}, pmid = {37424551}, issn = {2516-8290}, abstract = {Francisella tularensis , the causative agent for tularaemia, is a Tier 1 select agent, and a pan-species pathogen of global significance due to its zoonotic potential. Consistent genome characterization of the pathogen is essential to identify novel genes, virulence factors, antimicrobial resistance genes, for studying phylogenetics and other features of interest. This study was conducted to understand the genetic variations among genomes of F. tularensis isolated from two felines and one human source. Pan-genome analysis revealed that 97.7 % of genes were part of the core genome. All three F. tularensis isolates were assigned to sequence type A based on single nucleotide polymorphisms (SNPs) in sdhA. Most of the virulence genes were part of the core genome. An antibiotic resistance gene coding for class A beta-lactamase was detected in all three isolates. Phylogenetic analysis showed that these isolates clustered with other isolates reported from Central and South-Central USA. Assessment of large sets of the F. tularensis genome sequences is essential in understanding pathogen dynamics, geographical distribution and potential zoonotic implications.}, }
@article {pmid37423939, year = {2023}, author = {Priyamvada, P and Ramaiah, S}, title = {Pan-genome and reverse vaccinology approaches to design multi-epitope vaccine against Epstein-Barr virus associated with colorectal cancer.}, journal = {Immunologic research}, volume = {}, number = {}, pages = {}, pmid = {37423939}, issn = {1559-0755}, abstract = {Epstein-Barr virus (EBV) is a global lymphotropic virus and has been associated with various malignancies, among which colorectal cancer (CRC) is the prevalent one causing mortality worldwide. In the recent past, numerous research efforts have been made to develop a potential vaccine against this virus; however, none is effective possibly due to their low throughput, laboriousness, and lack of sensitivity. In this study, we designed a multi-epitope subunit vaccine that targets latent membrane protein (LMP-2B) of EBV using pan-genome and reverse vaccinology approaches. Twenty-three major histocompatibility complex (MHC) epitopes (five class-I and eighteen class-II) and eight B-cell epitopes, which have been found to be antigenic, immunogenic, and non-toxic, were selected for the vaccine construction. Furthermore, 24 vaccine constructs (VCs) were designed from the predicted epitopes and out of which VC1 was selected and finalized based on its structural parameters. The functionality of VC1 was validated through molecular docking with different immune receptors (MHC class-I, MHC class-II, and TLRs). The binding affinity, molecular and immune simulation revealed that the VC1 had more stable interaction and is believed to elicit good immune responses against EBV. HIGHLIGHTS: Pan-genome and reverse vaccinology approaches were used to design a multi-epitope subunit vaccine against LMP-2B protein of EBV. Epitopes were selected based on the antigenic, immunogenic, and non-toxic properties. Twenty-four vaccine constructs (VCs) were designed from the predicted epitopes. Designed vaccine VC1 has shown good binding affinity and molecular and immune simulation. VC1 was validated using molecular docking with different immune receptors.}, }
@article {pmid37409285, year = {2023}, author = {Luo, M and Sarnowski, TJ and Libault, M and Ríos, G and Charron, JB and Mantri, N and Zhang, S}, title = {Editorial: New insights into mechanisms of epigenetic modifiers in plant growth and development, volume II.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1213511}, pmid = {37409285}, issn = {1664-462X}, }
@article {pmid37408640, year = {2023}, author = {Kumari, K and Rawat, V and Shadan, A and Sharma, PK and Deb, S and Singh, RP}, title = {In-depth genome and pan-genome analysis of a metal-resistant bacterium Pseudomonas parafulva OS-1.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1140249}, pmid = {37408640}, issn = {1664-302X}, abstract = {A metal-resistant bacterium Pseudomonas parafulva OS-1 was isolated from waste-contaminated soil in Ranchi City, India. The isolated strain OS-1 showed its growth at 25-45°C, pH 5.0-9.0, and in the presence of ZnSO4 (upto 5 mM). Phylogenetic analysis based on 16S rRNA gene sequences revealed that strain OS-1 belonged to the genus Pseudomonas and was most closely related to parafulva species. To unravel the genomic features, we sequenced the complete genome of P. parafulva OS-1 using Illumina HiSeq 4,000 sequencing platform. The results of average nucleotide identity (ANI) analysis indicated the closest similarity of OS-1 to P. parafulva PRS09-11288 and P. parafulva DTSP2. The metabolic potential of P. parafulva OS-1 based on Clusters of Othologous Genes (COG) and Kyoto Encyclopedia of Genes and Genomes (KEGG) indicated a high number of genes related to stress protection, metal resistance, and multiple drug-efflux, etc., which is relatively rare in P. parafulva strains. Compared with other parafulva strains, P. parafulva OS-1 was found to have the unique β-lactam resistance and type VI secretion system (T6SS) gene. Additionally, its genomes encode various CAZymes such as glycoside hydrolases and other genes associated with lignocellulose breakdown, suggesting that strain OS-1 have strong biomass degradation potential. The presence of genomic complexity in the OS-1 genome indicates that horizontal gene transfer (HGT) might happen during evolution. Therefore, genomic and comparative genome analysis of parafulva strains is valuable for further understanding the mechanism of resistance to metal stress and opens a perspective to exploit a newly isolated bacterium for biotechnological applications.}, }
@article {pmid37401440, year = {2023}, author = {Glick, L and Mayrose, I}, title = {The effect of methodological considerations on the construction of gene-based plant pan-genomes.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evad121}, pmid = {37401440}, issn = {1759-6653}, abstract = {Pan-genomics is an emerging approach for studying the genetic diversity within plant populations. In contrast to common resequencing studies that compare whole genome sequencing data to a single reference genome, the construction of a pan-genome involves the direct comparison of multiple genomes to one another, thereby enabling the detection of genomic sequences and genes not present in the reference, as well as the analysis of gene content diversity. While multiple studies describing pan-genomes of various plant species have been published in recent years, a better understanding regarding the effect of the computational procedures used for pan-genome construction could guide researchers in making more informed methodological decisions. Here we examine the effect of several key methodological factors on the obtained gene pool and on gene presence-absence detections by constructing and comparing multiple pan-genomes of Arabidopsis thaliana and cultivated soybean, as well as conducting a meta-analysis on published pan-genomes. These factors include the construction method, the sequencing depth, and the extent of input data used for gene annotation. We observe substantial differences between pan-genomes constructed using three common procedures (De novo assembly and annotation, Map-to-pan, and Iterative assembly), and that results are dependent on the extent of the input data. Specifically, we report low agreement between the gene content inferred using different procedures and input data. Our results should increase the awareness of the community to the consequences of methodological decisions made during the process of pan-genome construction and emphasize the need for further investigation of commonly applied methodologies.}, }
@article {pmid37397999, year = {2023}, author = {Raghuram, V and Gunoskey, JJ and Hofstetter, KS and Jacko, NF and Shumaker, MJ and Hu, YJ and Read, TD and David, MZ}, title = {Comparison of genomic diversity between single and pooled Staphylococcus aureus colonies isolated from human colonisation cultures.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.06.14.544959}, pmid = {37397999}, abstract = {The most common approach to sampling the bacterial populations within an infected or colonised host is to sequence genomes from a single colony obtained from a culture plate. However, it is recognized that this method does not capture the genetic diversity in the population. An alternative is to sequence a mixture containing multiple colonies ("pool-seq"), but this has the disadvantage that it is a non-homogeneous sample, making it difficult to perform specific experiments. We compared differences in measures of genetic diversity between eight single-colony isolates (singles) and pool-seq on a set of 2286 S. aureus culture samples. The samples were obtained by swabbing three body sites on 85 human participants quarterly for a year, who initially presented with a methicillin-resistant S. aureus skin and soft-tissue infection (SSTI). We compared parameters such as sequence quality, contamination, allele frequency, nucleotide diversity and pangenome diversity in each pool to the corresponding singles. Comparing singles from the same culture plate, we found that 18% of sample collections contained mixtures of multiple Multilocus sequence types (MLSTs or STs). We showed that pool-seq data alone could predict the presence of multi-ST populations with 95% accuracy. We also showed that pool-seq could be used to estimate the number of polymorphic sites in the population. Additionally, we found that the pool may contain clinically relevant genes such as antimicrobial resistance markers that may be missed when only examining singles. These results highlight the potential advantage of analysing genome sequences of total populations obtained from clinical cultures rather than single colonies.}, }
@article {pmid37396358, year = {2023}, author = {Simpson, AC and Eedara, VVR and Singh, NK and Damle, N and Parker, CW and Karouia, F and Mason, CE and Venkateswaran, K}, title = {Comparative genomic analysis of Cohnella hashimotonis sp. nov. isolated from the International Space Station.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1166013}, pmid = {37396358}, issn = {1664-302X}, abstract = {A single strain from the family Paenibacillaceae was isolated from the wall behind the Waste Hygiene Compartment aboard the International Space Station (ISS) in April 2018, as part of the Microbial Tracking mission series. This strain was identified as a gram-positive, rod-shaped, oxidase-positive, catalase-negative motile bacterium in the genus Cohnella, designated as F6_2S_P_1[T]. The 16S sequence of the F6_2S_P_1[T] strain places it in a clade with C. rhizosphaerae and C. ginsengisoli, which were originally isolated from plant tissue or rhizosphere environments. The closest 16S and gyrB matches to strain F6_2S_P_1[T] are to C. rhizosphaerae with 98.84 and 93.99% sequence similarity, while a core single-copy gene phylogeny from all publicly available Cohnella genomes places it as more closely related to C. ginsengisoli. Average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values to any described Cohnella species are <89 and <22%, respectively. The major fatty acids for strain F6_2S_P_1[T] are anteiso-C15:0 (51.7%), iso-C16:0 (23.1%), and iso-C15:0 (10.5%), and it is able to metabolize a wide range of carbon compounds. Given the results of the ANI and dDDH analyses, this ISS strain is a novel species within the genus Cohnella for which we propose the name Cohnella hashimotonis, with the type strain F6_2S_P_1[T] (=NRRL B-65657[T] and DSMZ 115098[T]). Because no closely related Cohnella genomes were available, this study generated the whole-genome sequences (WGSs) of the type strains for C. rhizosphaerae and C. ginsengisoli. Phylogenetic and pangenomic analysis reveals that F6_2S_P_1[T], C. rhizosphaerae, and C. ginsengisoli, along with two uncharacterized Cohnella strains, possess a shared set of 332 gene clusters which are not shared with any other WGS of Cohnella species, and form a distinct clade branching off from C. nanjingensis. Functional traits were predicted for the genomes of strain F6_2S_P_1[T] and other members of this clade.}, }
@article {pmid37395662, year = {2023}, author = {Moreno, E and Middlebrook, EA and Altamirano-Silva, P and Al Dahouk, S and Araj, GF and Arce-Gorvel, V and Arenas-Gamboa, Á and Ariza, J and Barquero-Calvo, E and Battelli, G and Bertu, WJ and Blasco, JM and Bosilkovski, M and Cadmus, S and Caswell, CC and Celli, J and Chacón-Díaz, C and Chaves-Olarte, E and Comerci, DJ and Conde-Álvarez, R and Cook, E and Cravero, S and Dadar, M and De Boelle, X and De Massis, F and Díaz, R and Escobar, GI and Fernández-Lago, L and Ficht, TA and Foster, JT and Garin-Bastuji, B and Godfroid, J and Gorvel, JP and Güler, L and Erdenliğ-Gürbilek, S and Gusi, AM and Guzmán-Verri, C and Hai, J and Hernández-Mora, G and Iriarte, M and Jacob, NR and Keriel, A and Khames, M and Köhler, S and Letesson, JJ and Loperena-Barber, M and López-Goñi, I and McGiven, J and Melzer, F and Mora-Cartin, R and Moran-Gilad, J and Muñoz, PM and Neubauer, H and O'Callaghan, D and Ocholi, R and Oñate, Á and Pandey, P and Pappas, G and Pembroke, JT and Roop, M and Ruiz-Villalonos, N and Ryan, MP and Salvador-Bescós, M and Sangari, FJ and de Lima Santos, R and Seimenis, A and Splitter, G and Suárez-Esquivel, M and Tabbaa, D and Trangoni, MD and Tsolis, RM and Vizcaíno, N and Wareth, G and Welburn, SC and Whatmore, A and Zúñiga-Ripa, A and Moriyón, I}, title = {If You're Not Confused, You're Not Paying Attention: Ochrobactrum Is Not Brucella.}, journal = {Journal of clinical microbiology}, volume = {}, number = {}, pages = {e0043823}, doi = {10.1128/jcm.00438-23}, pmid = {37395662}, issn = {1098-660X}, abstract = {Bacteria of the genus Brucella are facultative intracellular parasites that cause brucellosis, a severe animal and human disease. Recently, a group of taxonomists merged the brucellae with the primarily free-living, phylogenetically related Ochrobactrum spp. in the genus Brucella. This change, founded only on global genomic analysis and the fortuitous isolation of some opportunistic Ochrobactrum spp. from medically compromised patients, has been automatically included in culture collections and databases. We argue that clinical and environmental microbiologists should not accept this nomenclature, and we advise against its use because (i) it was presented without in-depth phylogenetic analyses and did not consider alternative taxonomic solutions; (ii) it was launched without the input of experts in brucellosis or Ochrobactrum; (iii) it applies a non-consensus genus concept that disregards taxonomically relevant differences in structure, physiology, population structure, core-pangenome assemblies, genome structure, genomic traits, clinical features, treatment, prevention, diagnosis, genus description rules, and, above all, pathogenicity; and (iv) placing these two bacterial groups in the same genus creates risks for veterinarians, medical doctors, clinical laboratories, health authorities, and legislators who deal with brucellosis, a disease that is particularly relevant in low- and middle-income countries. Based on all this information, we urge microbiologists, bacterial collections, genomic databases, journals, and public health boards to keep the Brucella and Ochrobactrum genera separate to avoid further bewilderment and harm.}, }
@article {pmid37395647, year = {2023}, author = {Queiroz, VF and Carvalho, JVRP and de Souza, FG and Lima, MT and Santos, JD and Rocha, KLS and de Oliveira, DB and Araújo, JP and Ullmann, LS and Rodrigues, RAL and Abrahão, JS}, title = {Analysis of the Genomic Features and Evolutionary History of Pithovirus-Like Isolates Reveals Two Major Divergent Groups of Viruses.}, journal = {Journal of virology}, volume = {}, number = {}, pages = {e0041123}, doi = {10.1128/jvi.00411-23}, pmid = {37395647}, issn = {1098-5514}, abstract = {New representatives of the phylum Nucleocytoviricota have been rapidly described in the last decade. Despite this, not all viruses of this phylum are allocated to recognized taxonomic families, as is the case for orpheovirus, pithovirus, and cedratvirus, which form the proposed family Pithoviridae. In this study, we performed comprehensive comparative genomic analyses of 8 pithovirus-like isolates, aiming to understand their common traits and evolutionary history. Structural and functional genome annotation was performed de novo for all the viruses, which served as a reference for pangenome construction. The synteny analysis showed substantial differences in genome organization between these viruses, with very few and short syntenic blocks shared between orpheovirus and its relatives. It was possible to observe an open pangenome with a significant increase in the slope when orpheovirus was added, alongside a decrease in the core genome. Network analysis placed orpheovirus as a distant and major hub with a large fraction of unique clusters of orthologs, indicating a distant relationship between this virus and its relatives, with only a few shared genes. Additionally, phylogenetic analyses of strict core genes shared with other viruses of the phylum reinforced the divergence of orpheovirus from pithoviruses and cedratviruses. Altogether, our results indicate that although pithovirus-like isolates share common features, this group of ovoid-shaped giant viruses presents substantial differences in gene contents, genomic architectures, and the phylogenetic history of several core genes. Our data indicate that orpheovirus is an evolutionarily divergent viral entity, suggesting its allocation to a different viral family, Orpheoviridae. IMPORTANCE Giant viruses that infect amoebae form a monophyletic group named the phylum Nucleocytoviricota. Despite being genomically and morphologically very diverse, the taxonomic categories of some clades that form this phylum are not yet well established. With advances in isolation techniques, the speed at which new giant viruses are described has increased, escalating the need to establish criteria to define the emerging viral taxa. In this work, we performed a comparative genomic analysis of representatives of the putative family Pithoviridae. Based on the dissimilarity of orpheovirus from the other viruses of this putative family, we propose that orpheovirus be considered a member of an independent family, Orpheoviridae, and suggest criteria to demarcate families consisting of ovoid-shaped giant viruses.}, }
@article {pmid37395521, year = {2023}, author = {Meng, PQ and Zhang, Q and Ding, Y and Lin, JX and Chen, F}, title = {Evolutionary and Pan-genome Analysis of Three Important Black-pigmented Periodontal Pathogens.}, journal = {The Chinese journal of dental research : the official journal of the Scientific Section of the Chinese Stomatological Association (CSA)}, volume = {26}, number = {2}, pages = {93-104}, doi = {10.3290/j.cjdr.b4128023}, pmid = {37395521}, issn = {1867-5646}, abstract = {OBJECTIVE: To analyse the pan-genome of three black-pigmented periodontal pathogens: Porphyromonas gingivalis, Prevotella intermedia and Prevotella nigrescens.
METHODS: Pan-genome analyses of 66, 33 and 5 publicly available whole-genome sequences of P. gingivalis, P. intermedia and P. nigrescens, respectively, were performed using Pan-genome Analysis Pipeline software (version 1.2.1; Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, PR China). Phylogenetic trees were constructed based on the entire pan-genome and single nucleotide polymorphisms within the core genome. The distribution and abundance of virulence genes in the core and dispensable genomes were also compared in the three species.
RESULTS: All three species possess an open pan-genome. The core genome of P. gingivalis, P. intermedia and P. nigrescens included 1001, 1514 and 1745 orthologous groups, respectively, which were mainly related to basic cellular functions such as metabolism. The dispensable genome of P. gingivalis, P. intermedia and P. nigrescens was composed of 2814, 2689 and 906 orthologous groups, respectively, and it was enriched in genes involved in pathogenicity or with unknown functions. Phylogenetic trees presented a clear separation of P. gingivalis, P. intermedia and P. nigrescens, verifying the reclassification of the black-pigmented species. Furthermore, the three species shared almost the same virulence factors involved in adhesion, proteolysis and evasion of host defences. Some of these virulence genes were conserved across species whereas others belonged to the dispensable genome, which might be acquired through horizontal gene transfer.
CONCLUSION: This study highlighted the usefulness of pan-genome analysis to infer evolutionary cues for black-pigmented species, indicating their homology and phylogenomic diversity.}, }
@article {pmid37393724, year = {2023}, author = {He, Y and Pan, J and Huang, D and Sanford, RA and Peng, S and Wei, N and Sun, W and Shi, L and Jiang, Z and Jiang, Y and Hu, Y and Li, S and Li, Y and Li, M and Dong, Y}, title = {Distinct microbial structure and metabolic potential shaped by significant environmental gradient impacted by ferrous slag weathering.}, journal = {Environment international}, volume = {178}, number = {}, pages = {108067}, doi = {10.1016/j.envint.2023.108067}, pmid = {37393724}, issn = {1873-6750}, abstract = {Alkaline ferrous slags pose global environmental issues and long-term risks to ambient environments. To explore the under-investigated microbial structure and biogeochemistry in such unique ecosystems, combined geochemical, microbial, ecological and metagenomic analyses were performed in the areas adjacent to a ferrous slag disposal plant in Sichuan, China. Different levels of exposure to ultrabasic slag leachate had resulted in a significant geochemical gradient of pH (8.0-12.4), electric potential (-126.9 to 437.9 mV), total organic carbon (TOC, 1.5-17.3 mg/L), and total nitrogen (TN, 0.17-1.01 mg/L). Distinct microbial communities were observed depending on their exposure to the strongly alkaline leachate. High pH and Ca[2+] concentrations were associated with low microbial diversity and enrichment of bacterial classes Gamma-proteobacteria and Deinococci in the microbial communities exposed to the leachate. Combined metagenomic analyses of 4 leachate-unimpacted and 2-impacted microbial communities led to the assembly of one Serpentinomonas pangenome and 81 phylogenetically diversified metagenome assembled genomes (MAGs). The prevailing taxa in the leachate-impacted habitats (e.g., Serpentinomonas and Meiothermus spp.) were phylogenetically related to those in active serpentinizing ecosystems, suggesting the analogous processes between the man-made and natural systems. More importantly, they accounted for significant abundance of most functional genes associated with environmental adaptation and major element cycling. Their metabolic potential (e.g., cation/H[+] antiporters, carbon fixation on lithospheric carbon source, and respiration coupling sulfur oxidization and oxygen or nitrate reduction) may support these taxa to survive and prosper in these unique geochemical niches. This study provides fundamental understandings of the adaptive strategies of microorganisms in response to the strong environmental perturbation by alkali tailings. It also contributes to a better comprehension of how to remediate environments affected by alkaline industrial material.}, }
@article {pmid37389215, year = {2023}, author = {Conte, AL and Brunetti, F and Marazzato, M and Longhi, C and Maurizi, L and Raponi, G and Palamara, AT and Grassi, S and Conte, MP}, title = {Atopic dermatitis-derived Staphylococcus aureus strains: what makes them special in the interplay with the host.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1194254}, doi = {10.3389/fcimb.2023.1194254}, pmid = {37389215}, issn = {2235-2988}, abstract = {BACKGROUND: Atopic dermatitis (AD) is a chronic inflammatory skin condition whose pathogenesis involves genetic predisposition, epidermal barrier dysfunction, alterations in the immune responses and microbial dysbiosis. Clinical studies have shown a link between Staphylococcus aureus and the pathogenesis of AD, although the origins and genetic diversity of S. aureus colonizing patients with AD is poorly understood. The aim of the study was to investigate if specific clones might be associated with the disease.
METHODS: WGS analyses were performed on 38 S. aureus strains, deriving from AD patients and healthy carriers. Genotypes (i.e. MLST, spa-, agr- and SCCmec-typing), genomic content (e.g. virulome and resistome), and the pan-genome structure of strains have been investigated. Phenotypic analyses were performed to determine the antibiotic susceptibility, the biofilm production and the invasiveness within the investigated S. aureus population.
RESULTS: Strains isolated from AD patients revealed a high degree of genetic heterogeneity and a shared set of virulence factors and antimicrobial resistance genes, suggesting that no genotype and genomic content are uniquely associated with AD. The same strains were characterized by a lower variability in terms of gene content, indicating that the inflammatory conditions could exert a selective pressure leading to the optimization of the gene repertoire. Furthermore, genes related to specific mechanisms, like post-translational modification, protein turnover and chaperones as well as intracellular trafficking, secretion and vesicular transport, were significantly more enriched in AD strains. Phenotypic analysis revealed that all of our AD strains were strong or moderate biofilm producers, while less than half showed invasive capabilities.
CONCLUSIONS: We conclude that in AD skin, the functional role played by S. aureus may depend on differential gene expression patterns and/or on post-translational modification mechanisms rather than being associated with peculiar genetic features.}, }
@article {pmid37386186, year = {2023}, author = {Ahsan, MU and Liu, Q and Perdomo, JE and Fang, L and Wang, K}, title = {A survey of algorithms for the detection of genomic structural variants from long-read sequencing data.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {37386186}, issn = {1548-7105}, support = {GM132713//U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences (NIGMS)/ ; }, abstract = {As long-read sequencing technologies are becoming increasingly popular, a number of methods have been developed for the discovery and analysis of structural variants (SVs) from long reads. Long reads enable detection of SVs that could not be previously detected from short-read sequencing, but computational methods must adapt to the unique challenges and opportunities presented by long-read sequencing. Here, we summarize over 50 long-read-based methods for SV detection, genotyping and visualization, and discuss how new telomere-to-telomere genome assemblies and pangenome efforts can improve the accuracy and drive the development of SV callers in the future.}, }
@article {pmid37382545, year = {2023}, author = {Chen, X and Zhang, H and Feng, J and Zhang, L and Zheng, M and Luo, H and Zhuo, H and Xu, N and Zhang, X and Chen, C and Qu, P and Li, Y}, title = {Comparative Genomic Analysis Reveals Genetic Diversity and Pathogenic Potential of Haemophilus seminalis and Emended Description of Haemophilus seminalis.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0477222}, doi = {10.1128/spectrum.04772-22}, pmid = {37382545}, issn = {2165-0497}, abstract = {Haemophilus seminalis is a newly proposed species that is phylogenetically related to Haemophilus haemolyticus. The distribution of H. seminalis in the human population, its genomic diversity, and its pathogenic potential are still unclear. This study reports the finding of our comparative genomic analyses of four newly isolated Haemophilus strains (SZY H8, SZY H35, SZY H36, and SZY H68) from human sputum specimens (Guangzhou, China) along with the publicly available genomes of other phylogenetically related Haemophilus species. Based on pairwise comparisons of the 16S rRNA gene sequences, the four isolates showed <98.65% sequence identity to the type strains of all known Haemophilus species but were identified as belonging to H. seminalis, based on comparable phenotypic and genotypic features. Additionally, the four isolates showed high genome-genome relatedness indices (>95% ANI values) with 17 strains that were previously identified as either "Haemophilus intermedius" or hemin (X-factor)-independent H. haemolyticus and therefore required a more detailed classification study. Phylogenetically, these isolates, along with the two previously described H. seminalis isolates (a total of 23 isolates), shared a highly homologous lineage that is distinct from the clades of the main H. haemolyticus and Haemophilus influenzae strains. These isolates present an open pangenome with multiple virulence genes. Notably, all 23 isolates have a functional heme biosynthesis pathway that is similar to that of Haemophilus parainfluenzae. The phenotype of hemin (X-factor) independence and the analysis of the ispD, pepG, and moeA genes can be used to distinguish these isolates from H. haemolyticus and H. influenzae. Based on the above findings, we propose a reclassification for all "H. intermedius" and two H. haemolyticus isolates belonging to H. seminalis with an emended description of H. seminalis. This study provides a more accurate identification of Haemophilus isolates for use in the clinical laboratory and a better understanding of the clinical significance and genetic diversity in human environments. IMPORTANCE As a versatile opportunistic pathogen, the accurate identification of Haemophilus species is a challenge in clinical practice. In this study, we characterized the phenotypic and genotypic features of four H. seminalis strains that were isolated from human sputum specimens and propose the "H. intermedius" and hemin (X-factor)-independent H. haemolyticus isolates as belonging to H. seminalis. The prediction of virulence-related genes indicates that H. seminalis isolates carry several virulence genes that are likely to play an important role in its pathogenicity. In addition, we depict that the genes ispD, pepG, and moeA can be used as biomarkers for distinguishing H. seminalis from H. haemolyticus and H. influenzae. Our findings provide some insights into the identification, epidemiology, genetic diversity, pathogenic potential, and antimicrobial resistance of the newly proposed H. seminalis.}, }
@article {pmid37382302, year = {2023}, author = {Puente-Sánchez, F and Hoetzinger, M and Buck, M and Bertilsson, S}, title = {Exploring environmental intra-species diversity through non-redundant pangenome assemblies.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {}, doi = {10.1111/1755-0998.13826}, pmid = {37382302}, issn = {1755-0998}, support = {892961//H2020 Marie Skłodowska-Curie Actions/ ; 2019-02336//Svenska Forskningsrådet Formas/ ; 2017-04422//Vetenskapsrådet/ ; 2018-05973//Vetenskapsrådet/ ; }, abstract = {At the genome level, microorganisms are highly adaptable both in terms of allele and gene composition. Such heritable traits emerge in response to different environmental niches and can have a profound influence on microbial community dynamics. As a consequence, any individual genome or population will contain merely a fraction of the total genetic diversity of any operationally defined "species", whose ecological potential can thus be only fully understood by studying all of their genomes and the genes therein. This concept, known as the pangenome, is valuable for studying microbial ecology and evolution, as it partitions genomes into core (present in all the genomes from a species, and responsible for housekeeping and species-level niche adaptation among others) and accessory regions (present only in some, and responsible for intra-species differentiation). Here we present SuperPang, an algorithm producing pangenome assemblies from a set of input genomes of varying quality, including metagenome-assembled genomes (MAGs). SuperPang runs in linear time and its results are complete, non-redundant, preserve gene ordering and contain both coding and non-coding regions. Our approach provides a modular view of the pangenome, identifying operons and genomic islands, and allowing to track their prevalence in different populations. We illustrate this by analysing intra-species diversity in Polynucleobacter, a bacterial genus ubiquitous in freshwater ecosystems, characterized by their streamlined genomes and their ecological versatility. We show how SuperPang facilitates the simultaneous analysis of allelic and gene content variation under different environmental pressures, allowing us to study the drivers of microbial diversification at unprecedented resolution.}, }
@article {pmid37379037, year = {2023}, author = {Madhusoodanan, J}, title = {A More Diverse and Complete Reference Human Genome Is Poised to Change Medicine.}, journal = {JAMA}, volume = {}, number = {}, pages = {}, doi = {10.1001/jama.2023.9498}, pmid = {37379037}, issn = {1538-3598}, }
@article {pmid37377491, year = {2023}, author = {Karanth, S and Patel, J and Shirmohammadi, A and Pradhan, AK}, title = {Machine learning to predict foodborne salmonellosis outbreaks based on genome characteristics and meteorological trends.}, journal = {Current research in food science}, volume = {6}, number = {}, pages = {100525}, pmid = {37377491}, issn = {2665-9271}, abstract = {Several studies have shown a correlation between outbreaks of Salmonella enterica and meteorological trends, especially related to temperature and precipitation. Additionally, current studies based on outbreaks are performed on data for the species Salmonella enterica, without considering its intra-species and genetic heterogeneity. In this study, we analyzed the effect of differential gene expression and a suite of meteorological factors on salmonellosis outbreak scale (typified by case numbers) using a combination of machine learning and count-based modeling methods. Elastic Net regularization model was used to identify significant genes from a Salmonella pan-genome, and a multi-variable Poisson regression developed to fit the individual and mixed effects data. The best-fit Elastic Net model (α = 0.50; λ = 2.18) identified 53 significant gene features. The final multi-variable Poisson regression model (χ[2] = 5748.22; pseudo R[2] = 0.669; probability > χ[2] = 0) identified 127 significant predictor terms (p < 0.10), comprising 45 gene-only predictors, average temperature, average precipitation, and average snowfall, and 79 gene-meteorological interaction terms. The significant genes ranged in functionality from cellular signaling and transport, virulence, metabolism, and stress response, and included gene variables not considered as significant by the baseline model. This study presents a holistic approach towards evaluating multiple data sources (such as genomic and environmental data) to predict outbreak scale, which could help in revising the estimates for human health risk.}, }
@article {pmid37375105, year = {2023}, author = {Myintzaw, P and Pennone, V and McAuliffe, O and Begley, M and Callanan, M}, title = {Association of Virulence, Biofilm, and Antimicrobial Resistance Genes with Specific Clonal Complex Types of Listeria monocytogenes.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, doi = {10.3390/microorganisms11061603}, pmid = {37375105}, issn = {2076-2607}, support = {15F604, 2019R495//Department of Agriculture Food and the Marine/ ; }, abstract = {Precise classification of foodborne pathogen Listeria monocytogenes is a necessity in efficient foodborne disease surveillance, outbreak detection, and source tracking throughout the food chain. In this study, a total of 150 L. monocytogenes isolates from various food products, food processing environments, and clinical sources were investigated for variations in virulence, biofilm formation, and the presence of antimicrobial resistance genes based on their Whole-Genome Sequences. Clonal complex (CC) determination based on Multi-Locus Sequence Typing (MLST) revealed twenty-eight CC-types including eight isolates representing novel CC-types. The eight isolates comprising the novel CC-types share the majority of the known (cold and acid) stress tolerance genes and are all genetic lineage II, serogroup 1/2a-3a. Pan-genome-wide association analysis by Scoary using Fisher's exact test identified eleven genes specifically associated with clinical isolates. Screening for the presence of antimicrobial and virulence genes using the ABRicate tool uncovered variations in the presence of Listeria Pathogenicity Islands (LIPIs) and other known virulence genes. Specifically, the distributions of actA, ecbA, inlF, inlJ, lapB, LIPI-3, and vip genes across isolates were found to be significantly CC-dependent while the presence of ami, inlF, inlJ, and LIPI-3 was associated with clinical isolates specifically. In addition, Roary-derived phylogenetic grouping based on Antimicrobial-Resistant Genes (AMRs) revealed that the thiol transferase (FosX) gene was present in all lineage I isolates, and the presence of the lincomycin resistance ABC-F-type ribosomal protection protein (lmo0919_fam) was also genetic-lineage-dependent. More importantly, the genes found to be specific to CC-type were consistent when a validation analysis was performed with fully assembled, high-quality complete L. monocytogenes genome sequences (n = 247) extracted from the National Centre for Biotechnology Information (NCBI) microbial genomes database. This work highlights the usefulness of MLST-based CC typing using the Whole-Genome Sequence as a tool in classifying isolates.}, }
@article {pmid37374997, year = {2023}, author = {Negrete-Paz, AM and Vázquez-Marrufo, G and Gutiérrez-Moraga, A and Vázquez-Garcidueñas, MS}, title = {Pangenome Reconstruction of Mycobacterium tuberculosis as a Guide to Reveal Genomic Features Associated with Strain Clinical Phenotype.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, doi = {10.3390/microorganisms11061495}, pmid = {37374997}, issn = {2076-2607}, support = {PICIR-021//Instituto de Ciencia, Tecnología e Innovación de Michoacán/ ; }, abstract = {Tuberculosis (TB) is one of the leading causes of human deaths worldwide caused by infectious diseases. TB infection by Mycobacterium tuberculosis can occur in the lungs, causing pulmonary tuberculosis (PTB), or in any other organ of the body, resulting in extrapulmonary tuberculosis (EPTB). There is no consensus on the genetic determinants of this pathogen that may contribute to EPTB. In this study, we constructed the M. tuberculosis pangenome and used it as a tool to seek genomic signatures associated with the clinical presentation of TB based on its accessory genome differences. The analysis carried out in the present study includes the raw reads of 490 M. tuberculosis genomes (PTB n = 245, EPTB n = 245) retrieved from public databases that were assembled, as well as ten genomes from Mexican strains (PTB n = 5, EPTB n = 5) that were sequenced and assembled. All genomes were annotated and then used to construct the pangenome with Roary and Panaroo. The pangenome obtained using Roary consisted of 2231 core genes and 3729 accessory genes. On the other hand, the pangenome resulting from Panaroo consisted of 2130 core genes and 5598 accessory genes. Associations between the distribution of accessory genes and the PTB/EPTB phenotypes were examined using the Scoary and Pyseer tools. Both tools found a significant association between the hspR, plcD, Rv2550c, pe_pgrs5, pe_pgrs25, and pe_pgrs57 genes and the PTB genotype. In contrast, the deletion of the aceA, esxR, plcA, and ppe50 genes was significantly associated with the EPTB phenotype. Rv1759c and Rv3740 were found to be associated with the PTB phenotype according to Scoary; however, these associations were not observed when using Pyseer. The robustness of the constructed pangenome and the gene-phenotype associations is supported by several factors, including the analysis of a large number of genomes, the inclusion of the same number of PTB/EPTB genomes, and the reproducibility of results thanks to the different bioinformatic tools used. Such characteristics surpass most of previous M. tuberculosis pangenomes. Thus, it can be inferred that the deletion of these genes can lead to changes in the processes involved in stress response and fatty acid metabolism, conferring phenotypic advantages associated with pulmonary or extrapulmonary presentation of TB. This study represents the first attempt to use the pangenome to seek gene-phenotype associations in M. tuberculosis.}, }
@article {pmid37374927, year = {2023}, author = {Uljanovas, D and Gölz, G and Fleischmann, S and Kudirkiene, E and Kasetiene, N and Grineviciene, A and Tamuleviciene, E and Aksomaitiene, J and Alter, T and Malakauskas, M}, title = {Genomic Characterization of Arcobacter butzleri Strains Isolated from Various Sources in Lithuania.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, doi = {10.3390/microorganisms11061425}, pmid = {37374927}, issn = {2076-2607}, support = {01KI1712//Federal Ministry of Education and Research/ ; }, abstract = {Arcobacter (A.) butzleri, the most widespread species within the genus Arcobacter, is considered as an emerging pathogen causing gastroenteritis in humans. Here, we performed a comparative genome-wide analysis of 40 A. butzleri strains from Lithuania to determine the genetic relationship, pangenome structure, putative virulence, and potential antimicrobial- and heavy-metal-resistance genes. Core genome single nucleotide polymorphism (cgSNP) analysis revealed low within-group variability (≤4 SNPs) between three milk strains (RCM42, RCM65, RCM80) and one human strain (H19). Regardless of the type of input (i.e., cgSNPs, accessory genome, virulome, resistome), these strains showed a recurrent phylogenetic and hierarchical grouping pattern. A. butzleri demonstrated a relatively large and highly variable accessory genome (comprising of 6284 genes with around 50% of them identified as singletons) that only partially correlated to the isolation source. Downstream analysis of the genomes resulted in the detection of 115 putative antimicrobial- and heavy-metal-resistance genes and 136 potential virulence factors that are associated with the induction of infection in host (e.g., cadF, degP, iamA), survival and environmental adaptation (e.g., flagellar genes, CheA-CheY chemotaxis system, urease cluster). This study provides additional knowledge for a better A. butzleri-related risk assessment and highlights the need for further genomic epidemiology studies in Lithuania and other countries.}, }
@article {pmid37374141, year = {2023}, author = {Abondio, P and Cilli, E and Luiselli, D}, title = {Human Pangenomics: Promises and Challenges of a Distributed Genomic Reference.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {6}, pages = {}, doi = {10.3390/life13061360}, pmid = {37374141}, issn = {2075-1729}, abstract = {A pangenome is a collection of the common and unique genomes that are present in a given species. It combines the genetic information of all the genomes sampled, resulting in a large and diverse range of genetic material. Pangenomic analysis offers several advantages compared to traditional genomic research. For example, a pangenome is not bound by the physical constraints of a single genome, so it can capture more genetic variability. Thanks to the introduction of the concept of pangenome, it is possible to use exceedingly detailed sequence data to study the evolutionary history of two different species, or how populations within a species differ genetically. In the wake of the Human Pangenome Project, this review aims at discussing the advantages of the pangenome around human genetic variation, which are then framed around how pangenomic data can inform population genetics, phylogenetics, and public health policy by providing insights into the genetic basis of diseases or determining personalized treatments, targeting the specific genetic profile of an individual. Moreover, technical limitations, ethical concerns, and legal considerations are discussed.}, }
@article {pmid37372961, year = {2023}, author = {Abdul Aziz, M and Masmoudi, K}, title = {Insights into the Transcriptomics of Crop Wild Relatives to Unravel the Salinity Stress Adaptive Mechanisms.}, journal = {International journal of molecular sciences}, volume = {24}, number = {12}, pages = {}, doi = {10.3390/ijms24129813}, pmid = {37372961}, issn = {1422-0067}, support = {12F041//United Arab Emirates University/ ; }, abstract = {The narrow genomic diversity of modern cultivars is a major bottleneck for enhancing the crop's salinity stress tolerance. The close relatives of modern cultivated plants, crop wild relatives (CWRs), can be a promising and sustainable resource to broaden the diversity of crops. Advances in transcriptomic technologies have revealed the untapped genetic diversity of CWRs that represents a practical gene pool for improving the plant's adaptability to salt stress. Thus, the present study emphasizes the transcriptomics of CWRs for salinity stress tolerance. In this review, the impacts of salt stress on the plant's physiological processes and development are overviewed, and the transcription factors (TFs) regulation of salinity stress tolerance is investigated. In addition to the molecular regulation, a brief discussion on the phytomorphological adaptation of plants under saline environments is provided. The study further highlights the availability and use of transcriptomic resources of CWR and their contribution to pangenome construction. Moreover, the utilization of CWRs' genetic resources in the molecular breeding of crops for salinity stress tolerance is explored. Several studies have shown that cytoplasmic components such as calcium and kinases, and ion transporter genes such as Salt Overly Sensitive 1 (SOS1) and High-affinity Potassium Transporters (HKTs) are involved in the signaling of salt stress, and in mediating the distribution of excess Na[+] ions within the plant cells. Recent comparative analyses of transcriptomic profiling through RNA sequencing (RNA-Seq) between the crops and their wild relatives have unraveled several TFs, stress-responsive genes, and regulatory proteins for generating salinity stress tolerance. This review specifies that the use of CWRs transcriptomics in combination with modern breeding experimental approaches such as genomic editing, de novo domestication, and speed breeding can accelerate the CWRs utilization in the breeding programs for enhancing the crop's adaptability to saline conditions. The transcriptomic approaches optimize the crop genomes with the accumulation of favorable alleles that will be indispensable for designing salt-resilient crops.}, }
@article {pmid37369325, year = {2023}, author = {Thorwall, S and Trivedi, V and Ottum, E and Wheeldon, I}, title = {Population genomics-guided engineering of phenazine biosynthesis in Pseudomonas chlororaphis.}, journal = {Metabolic engineering}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.ymben.2023.06.008}, pmid = {37369325}, issn = {1096-7184}, abstract = {The emergence of next-generation sequencing (NGS) technologies has made it possible to not only sequence entire genomes, but also identify metabolic engineering targets across the pangenome of a microbial population. This study leverages NGS data as well as existing molecular biology and bioinformatics tools to identify and validate genomic signatures for improving phenazine biosynthesis in Pseudomonas chlororaphis. We sequenced a diverse collection of 34 Pseudomonas isolates using short- and long-read sequencing techniques and assembled whole genomes using the NGS reads. In addition, we assayed three industrially relevant phenotypes (phenazine production, biofilm formation, and growth temperature) for these isolates in two different media conditions. We then provided the whole genomes and phenazine production data to a unitig-based microbial genome-wide association study (mGWAS) tool to identify novel genomic signatures responsible for phenazine production in P. chlororaphis. Post-processing of the mGWAS analysis results yielded 330 significant hits influencing the biosynthesis of one or more phenazine compounds. Based on a quantitative metric (called the phenotype score), we elucidated the most influential hits for phenazine production and experimentally validated them in vivo in the most optimal phenazine producing strain. Two genes significantly increased phenazine-1-carboxamide (PCN) production: a histidine transporter (ProY_1), and a putative carboxypeptidase (PS__04251). A putative MarR-family transcriptional regulator decreased PCN titer when overexpressed in a high PCN producing isolate. Overall, this work seeks to demonstrate the utility of a population genomics approach as an effective strategy in enabling identification of targets for metabolic engineering of bioproduction hosts.}, }
@article {pmid37365340, year = {2023}, author = {Chin, CS and Behera, S and Khalak, A and Sedlazeck, FJ and Sudmant, PH and Wagner, J and Zook, JM}, title = {Multiscale analysis of pangenomes enables improved representation of genomic diversity for repetitive and clinically relevant genes.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {37365340}, issn = {1548-7105}, support = {1U01HG011758-01//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; 1U01HG011758-01//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; R35GM142916//U.S. Department of Health & Human Services | NIH | National Institute of General Medical Sciences (NIGMS)/ ; }, abstract = {Advancements in sequencing technologies and assembly methods enable the regular production of high-quality genome assemblies characterizing complex regions. However, challenges remain in efficiently interpreting variation at various scales, from smaller tandem repeats to megabase rearrangements, across many human genomes. We present a PanGenome Research Tool Kit (PGR-TK) enabling analyses of complex pangenome structural and haplotype variation at multiple scales. We apply the graph decomposition methods in PGR-TK to the class II major histocompatibility complex demonstrating the importance of the human pangenome for analyzing complicated regions. Moreover, we investigate the Y-chromosome genes, DAZ1/DAZ2/DAZ3/DAZ4, of which structural variants have been linked to male infertility, and X-chromosome genes OPN1LW and OPN1MW linked to eye disorders. We further showcase PGR-TK across 395 complex repetitive medically important genes. This highlights the power of PGR-TK to resolve complex variation in regions of the genome that were previously too complex to analyze.}, }
@article {pmid37364097, year = {2023}, author = {Fayyaz, A and Robinson, G and Chang, PL and Bekele, D and Yimer, S and Carrasquilla-Garcia, N and Negash, K and Surendrarao, A and von Wettberg, EJB and Kemal, SA and Tesfaye, K and Fikre, A and Farmer, AD and Cook, DR}, title = {Hiding in plain sight: Genome-wide recombination and a dynamic accessory genome drive diversity in Fusarium oxysporum f.sp. ciceris.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {27}, pages = {e2220570120}, doi = {10.1073/pnas.2220570120}, pmid = {37364097}, issn = {1091-6490}, support = {AID-OAA-A-14-00008//Bureau for Economic Growth, Education, and Environment, United States Agency for International Development (E3)/ ; IOS-1339346//National Science Foundation (NSF)/ ; NA//Two Blades Foundation/ ; }, abstract = {Understanding the origins of variation in agricultural pathogens is of fundamental interest and practical importance, especially for diseases that threaten food security. Fusarium oxysporum is among the most important of soil-borne pathogens, with a global distribution and an extensive host range. The pathogen is considered to be asexual, with horizontal transfer of chromosomes providing an analog of assortment by meiotic recombination. Here, we challenge those assumptions based on the results of population genomic analyses, describing the pathogen's diversity and inferring its origins and functional consequences in the context of a single, long-standing agricultural system. We identify simultaneously low nucleotide distance among strains, and unexpectedly high levels of genetic and genomic variability. We determine that these features arise from a combination of genome-scale recombination, best explained by widespread sexual reproduction, and presence-absence variation consistent with chromosomal rearrangement. Pangenome analyses document an accessory genome more than twice the size of the core genome, with contrasting evolutionary dynamics. The core genome is stable, with low diversity and high genetic differentiation across geographic space, while the accessory genome is paradoxically more diverse and unstable but with lower genetic differentiation and hallmarks of contemporary gene flow at local scales. We suggest a model in which episodic sexual reproduction generates haplotypes that are selected and then maintained through clone-like dynamics, followed by contemporary genomic rearrangements that reassort the accessory genome among sympatric strains. Taken together, these processes contribute unique genome content, including reassortment of virulence determinants that may explain observed variation in pathogenic potential.}, }
@article {pmid37361319, year = {2023}, author = {Torres-Morales, J and Mark Welch, JL and Dewhirst, FE and Borisy, GG}, title = {Site-specialization of human oral Gemella species.}, journal = {Journal of oral microbiology}, volume = {15}, number = {1}, pages = {2225261}, doi = {10.1080/20002297.2023.2225261}, pmid = {37361319}, issn = {2000-2297}, abstract = {Gemella species are core members of the human oral microbiome in healthy subjects and are regarded as commensals, although they can cause opportunistic infections. Our objective was to evaluate the site-specialization of Gemella species among various habitats within the mouth by combining pangenomics and metagenomics. With pangenomics, we identified genome relationships and categorized genes as core and accessory to each species. With metagenomics, we identified the primary oral habitat of individual genomes. Our results establish that the genomes of three species, G. haemolysans, G. sanguinis and G. morbillorum, are abundant and prevalent in human mouths at different oral sites: G. haemolysans on buccal mucosa and keratinized gingiva; G. sanguinis on tongue dorsum, throat, and tonsils; and G. morbillorum in dental plaque. The gene-level basis of site-specificity was investigated by identifying genes that were core to Gemella genomes at a specific oral site but absent from other Gemella genomes. The riboflavin biosynthesis pathway was present in G. haemolysans genomes associated with buccal mucosa but absent from the rest of the genomes. Overall, metapangenomics show that Gemella species have clear ecological preferences in the oral cavity of healthy humans and provides an approach to identifying gene-level drivers of site specificity.}, }
@article {pmid37359562, year = {2023}, author = {Touray, BJB and Hanafy, M and Phanse, Y and Hildebrand, R and Talaat, AM}, title = {Protective RNA nanovaccines against Mycobacterium avium subspecies hominissuis.}, journal = {Frontiers in immunology}, volume = {14}, number = {}, pages = {1188754}, doi = {10.3389/fimmu.2023.1188754}, pmid = {37359562}, issn = {1664-3224}, abstract = {The induction of an effective immune response is critical for the success of mRNA-based therapeutics. Here, we developed a nanoadjuvant system compromised of Quil-A and DOTAP (dioleoyl 3 trimethylammonium propane), hence named QTAP, for the efficient delivery of mRNA vaccine constructs into cells. Electron microscopy indicated that the complexation of mRNA with QTAP forms nanoparticles with an average size of 75 nm and which have ~90% encapsulation efficiency. The incorporation of pseudouridine-modified mRNA resulted in higher transfection efficiency and protein translation with low cytotoxicity than unmodified mRNA. When QTAP-mRNA or QTAP alone transfected macrophages, pro-inflammatory pathways (e.g., NLRP3, NF-kb, and MyD88) were upregulated, an indication of macrophage activation. In C57Bl/6 mice, QTAP nanovaccines encoding Ag85B and Hsp70 transcripts (QTAP-85B+H70) were able to elicit robust IgG antibody and IFN- ɣ, TNF-α, IL-2, and IL-17 cytokines responses. Following aerosol challenge with a clinical isolate of M. avium ss. hominissuis (M.ah), a significant reduction of mycobacterial counts was observed in lungs and spleens of only immunized animals at both 4- and 8-weeks post-challenge. As expected, reduced levels of M. ah were associated with diminished histological lesions and robust cell-mediated immunity. Interestingly, polyfunctional T-cells expressing IFN- ɣ, IL-2, and TNF- α were detected at 8 but not 4 weeks post-challenge. Overall, our analysis indicated that QTAP is a highly efficient transfection agent and could improve the immunogenicity of mRNA vaccines against pulmonary M. ah, an infection of significant public health importance, especially to the elderly and to those who are immune compromised.}, }
@article {pmid37358412, year = {2023}, author = {Li, T and Huang, J and Yang, S and Chen, J and Yao, Z and Zhong, M and Zhong, X and Ye, X}, title = {Pan-Genome-Wide Association Study of Serotype 19A Pneumococci Identifies Disease-Associated Genes.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0407322}, doi = {10.1128/spectrum.04073-22}, pmid = {37358412}, issn = {2165-0497}, abstract = {Despite the widespread implementation of pneumococcal vaccines, hypervirulent Streptococcus pneumoniae serotype 19A is endemic worldwide. It is still unclear whether specific genetic elements contribute to complex pathogenicity of serotype 19A isolates. We performed a large-scale pan-genome-wide association study (pan-GWAS) of 1,292 serotype 19A isolates sampled from patients with invasive disease and asymptomatic carriers. To address the underlying disease-associated genotypes, a comprehensive analysis using three methods (Scoary, a linear mixed model, and random forest) was performed to compare disease and carriage isolates to identify genes consistently associated with disease phenotype. By using three pan-GWAS methods, we found consensus on statistically significant associations between genotypes and disease phenotypes (disease or carriage), with a subset of 30 consistently significant disease-associated genes. The results of functional annotation revealed that these disease-associated genes had diverse predicted functions, including those that participated in mobile genetic elements, antibiotic resistance, virulence, and cellular metabolism. Our findings suggest the multifactorial pathogenicity nature of this hypervirulent serotype and provide important evidence for the design of novel protein-based vaccines to prevent and control pneumococcal disease. IMPORTANCE It is important to understand the genetic and pathogenic characteristics of S. pneumoniae serotype 19A, which may provide important information for the prevention and treatment of pneumococcal disease. This global large-sample pan-GWAS study has identified a subset of 30 consistently significant disease-associated genes that are involved in mobile genetic elements, antibiotic resistance, virulence, and cellular metabolism. These findings suggest the multifactorial pathogenicity nature of hypervirulent S. pneumoniae serotype 19A isolates and provide implications for the design of novel protein-based vaccines.}, }
@article {pmid37356834, year = {2023}, author = {Prakash, JAJ and Jacob, JJ and Rachel, T and Vasudevan, K and Amladi, A and Iyadurai, R and Manesh, A and Veeraraghavan, B}, title = {Genomic analysis of Brucella melitensis reveals new insights into phylogeny and evolutionary divergence.}, journal = {Indian journal of medical microbiology}, volume = {44}, number = {}, pages = {100360}, doi = {10.1016/j.ijmmb.2023.02.003}, pmid = {37356834}, issn = {1998-3646}, abstract = {PURPOSE: Brucellosis is a bacterial zoonotic disease caused by genus Brucella. The disease is often transmitted to humans by direct or indirect contact with infected livestock or from laboratory exposure. In this study two clinical isolates of Brucella melitensis were subjected to whole genome sequencing (WGS) using Ion Torrent PGM and Oxford Nanopore MinIon platform.
METHODS: The two hybrid complete genomes were subjected to core gene SNP analysis to identify the relative evolutionary position. To distinguish between the various lineages of B. melitensis, Pangenome analysis was carried out.
RESULTS: Phylogenetic analysis revealed that both the study isolates (ST8) clustered along the other Asian isolates that formed genotype II. Genome wide analyses of 326 B melitensis isolates suggests 2171 gene clusters were shared across all the genomes while 3552 gene clusters were considered as accessory genes.
CONCLUSION: Here we attempted to provide the gain and loss of six unique genes that defined the phylogenetic lineages and complex evolutionary process. As the severity and prevalence of human brucellosis is increasing a better understanding of Brucella genomics and transmission dynamics is needed.}, }
@article {pmid37356030, year = {2023}, author = {Gupta, RK and Tikariha, H and Purohit, HJ and Khardenavis, AA}, title = {Pangenome-driven insights into nitrogen metabolic characteristics of Citrobacter portucalensis strain AAK_AS5 associated with wastewater nitrogen removal.}, journal = {Archives of microbiology}, volume = {205}, number = {7}, pages = {270}, pmid = {37356030}, issn = {1432-072X}, abstract = {Nitrogen metabolism in the genus Citrobacter is very poorly studied despite its several implications in wastewater treatment. In the current study, Citrobacter portucalensis strain AAK_AS5 was assessed for remediation of simulated wastewater supplemented with different inorganic nitrogen sources. Combination of (NH4)2SO4 with KNO3 was the most preferred for achieving high growth density followed by (NH4)2SO4 and KNO3 alone. This was in agreement with highest ammonical nitrogen removal of 92.9% in the presence of combined nitrogen sources and the corresponding nitrate nitrogen removal of 93% in the presence of KNO3. Furthermore, these removal capacities were validated by investigating the uniqueness and the spread of metabolic features through pan-genomic approach that revealed the largest number of unique genes (2097) and accessory genes (705) in strain AAK_AS5. Of the total 44 different types of nitrogen metabolism-related genes, 39 genes were associated with the core genome, while 5 genes such as gltI, nasA, nasR, nrtA, and ntrC uniquely belonged to the accessory genome. Strain AAK_AS5 possessed three major nitrate removal pathways viz., assimilatory and dissimilatory nitrate reduction to ammonia (ANRA & DNRA), and denitrification; however, the absence of nitrification was compensated by ammonia assimilation catalyzed by gene products of the GDH and GS-GOGAT pathways. narGHIJ encoding the respiratory nitrate reductase was commonly identified in all the studied genomes, while genes such as nirK, norB, and nosZ were uniquely present in the strain AAK_AS5 only. A markedly different genetic content and metabolic diversity between the strains reflected their adaptive evolution in the environment thus highlighting the significance of C. portucalensis AAK_AS5 for potential application in nitrogen removal from wastewater.}, }
@article {pmid37354526, year = {2023}, author = {Masutani, B and Suzuki, Y and Suzuki, Y and Morishita, S}, title = {JTK: targeted diploid genome assembler.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btad398}, pmid = {37354526}, issn = {1367-4811}, abstract = {MOTIVATION: Diploid assembly, or determining sequences of homologous chromosomes separately, is essential to elucidate genetic differences between haplotypes. One approach is to call and phase single nucleotide variants (SNVs) on a reference sequence. However, this approach becomes unstable on large segmental duplications (SDs) or structural variations (SVs) because the alignments of reads deriving from these regions tend to be unreliable. Another approach is to use highly accurate PacBio HiFi reads to output diploid assembly directly. Nonetheless, HiFi reads cannot phase homozygous regions longer than their length and require oxford nanopore technology (ONT) reads or Hi-C to produce a fully phased assembly. Is a single long-read sequencing technology sufficient to create an accurate diploid assembly?
RESULTS: Here, we present JTK, a megabase-scale diploid genome assembler. It first randomly samples kilobase-scale sequences (called "chunks") from the long reads, phases variants found on them, and produces two haplotypes. The novel idea of JTK is to utilize chunks to capture SNVs and SVs simultaneously. From 60-fold ONT reads on the HG002 and a Japanese sample, it fully assembled two haplotypes with approximately 99.9% accuracy on the histocompatibility complex (MHC) and the leukocyte receptor complex (LRC) regions, which was impossible by the reference-based approach. In addition, in the LRC region on a Japanese sample, JTK output an assembly of better contiguity than those built from high-coverage HiFi + Hi-C. In the coming age of pan-genomics, JTK would complement the reference-based phasing method to assemble the difficult-to-assemble but medically important regions.
CODE AVAILABILITY: JTK is available at https://github.com/ban-m/jtk, and the datasets are available at https://doi.org/10.5281/zenodo.7790310 or JGAS000580 in DDBJ.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid37353434, year = {2023}, author = {Wang, B and Dang, N and Yang, X and Xu, S and Ye, K}, title = {The human pangenome reference: the beginning of a new era for genomics.}, journal = {Science bulletin}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.scib.2023.06.014}, pmid = {37353434}, issn = {2095-9281}, }
@article {pmid37349950, year = {2023}, author = {Pei, Z and Liu, Y and Yi, Z and Liao, J and Wang, H and Zhang, H and Chen, W and Lu, W}, title = {Diversity within the species Clostridium butyricum: pan-genome, phylogeny, prophage, carbohydrate utilization, and antibiotic resistance.}, journal = {Journal of applied microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/jambio/lxad127}, pmid = {37349950}, issn = {1365-2672}, abstract = {AIMS: Clostridium butyricum has been recognized as a strong candidate for the "next generation of probiotics" due to its beneficial roles on humans. Owing to our current understanding of this species is limited, it is imperative to unveil the genetic variety and biological properties of C. butyricum on sufficient strains.
METHODS AND RESULTS: We isolated 53 C. butyricum strains and collected 25 publicly available genomes to comprehensively assess the genomic and phenotypic diversity of this species. Average nucleotide identity and phylogeny suggested that multiple C. butyricum strains might share the same niche. C. butyricum genomes were replete with prophage elements, but the CRISPR-positive strain efficiently inhibited prophage integration. C. butyricum utilizes cellulose, alginate, and soluble starch universally, and shows general resistance to aminoglycoside antibiotics.
CONCLUSIONS: C. butyricum exhibited a broad genetic diversity from the extraordinarily open pan-genome, extremely convergent core genome, and ubiquitous prophages. In carbohydrate utilization and antibiotic resistance, partial genotypes have a certain guiding significance for phenotypes.}, }
@article {pmid37349608, year = {2023}, author = {Manivannan, A and Cheeran Amal, T}, title = {Deciphering the complex cotton genome for improving fiber traits and abiotic stress resilience in sustainable agriculture.}, journal = {Molecular biology reports}, volume = {}, number = {}, pages = {}, pmid = {37349608}, issn = {1573-4978}, abstract = {BACKGROUND: Understanding the complex cotton genome is of paramount importance in devising a strategy for sustainable agriculture. Cotton is probably the most economically important cash crop known for its cellulose-rich fiber content. The cotton genome has become an ideal model for deciphering polyploidization due to its polyploidy, setting it apart from other major crops. However, the main challenge in understanding the functional and regulatory functions of many genes in cotton is still the complex cotton polyploidy genome, which is not limited to a single role. Cotton production is vulnerable to the sensitive effects of climate change, which can alter or aggravate soil, pests, and diseases. Thus, conventional plant breeding coupled with advanced technologies has led to substantial progress being made in cotton production.
GENOMICS APPROACHES IN COTTON: In the frontier areas of genomics research, cotton genomics has gained momentum accomplished by robust high-throughput sequencing platforms combined with novel computational tools to make the cotton genome more tractable. Advances in long-read sequencing have allowed for the generation of the complete set of cotton gene transcripts giving incisive scientific knowledge in cotton improvement. In contrast, the integration of the latest sequencing platforms has been used to generate multiple high-quality reference genomes in diploid and tetraploid cotton. While pan-genome and 3D genomic studies are still in the early stages in cotton, it is anticipated that rapid advances in sequencing, assembly algorithms, and analysis pipelines will have a greater impact on advanced cotton research.
CONCLUSIONS: This review article briefly compiles substantial contributions in different areas of the cotton genome, which include genome sequencing, genes, and their molecular regulatory networks in fiber development and stress tolerance mechanism. This will greatly help us in understanding the robust genomic organization which in turn will help unearth candidate genes for functionally important agronomic traits.}, }
@article {pmid37341494, year = {2023}, author = {Potter, RF and Zhang, K and Reimler, B and Marino, J and Muenks, CE and Alvarado, K and Wallace, MA and Westblade, LF and McElvania, E and Yarbrough, ML and Hunstad, DA and Dantas, G and Burnham, CD}, title = {Uncharacterized and lineage-specific accessory genes within the Proteus mirabilis pan-genome landscape.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0015923}, doi = {10.1128/msystems.00159-23}, pmid = {37341494}, issn = {2379-5077}, abstract = {Proteus mirabilis is a Gram-negative bacterium recognized for its unique swarming motility and urease activity. A previous proteomic report on four strains hypothesized that, unlike other Gram-negative bacteria, P. mirabilis may not exhibit significant intraspecies variation in gene content. However, there has not been a comprehensive analysis of large numbers of P. mirabilis genomes from various sources to support or refute this hypothesis. We performed comparative genomic analysis on 2,060 Proteus genomes. We sequenced the genomes of 893 isolates recovered from clinical specimens from three large US academic medical centers, combined with 1,006 genomes from NCBI Assembly and 161 genomes assembled from Illumina reads in the public domain. We used average nucleotide identity (ANI) to delineate species and subspecies, core genome phylogenetic analysis to identify clusters of highly related P. mirabilis genomes, and pan-genome annotation to identify genes of interest not present in the model P. mirabilis strain HI4320. Within our cohort, Proteus is composed of 10 named species and 5 uncharacterized genomospecies. P. mirabilis can be subdivided into three subspecies; subspecies 1 represented 96.7% (1,822/1,883) of all genomes. The P. mirabilis pan-genome includes 15,399 genes outside of HI4320, and 34.3% (5,282/15,399) of these genes have no putative assigned function. Subspecies 1 is composed of several highly related clonal groups. Prophages and gene clusters encoding putatively extracellular-facing proteins are associated with clonal groups. Uncharacterized genes not present in the model strain P. mirabilis HI4320 but with homology to known virulence-associated operons can be identified within the pan-genome. IMPORTANCE Gram-negative bacteria use a variety of extracellular facing factors to interact with eukaryotic hosts. Due to intraspecies genetic variability, these factors may not be present in the model strain for a given organism, potentially providing incomplete understanding of host-microbial interactions. In contrast to previous reports on P. mirabilis, but similar to other Gram-negative bacteria, P. mirabilis has a mosaic genome with a linkage between phylogenetic position and accessory genome content. P. mirabilis encodes a variety of genes that may impact host-microbe dynamics beyond what is represented in the model strain HI4320. The diverse, whole-genome characterized strain bank from this work can be used in conjunction with reverse genetic and infection models to better understand the impact of accessory genome content on bacterial physiology and pathogenesis of infection.}, }
@article {pmid37337218, year = {2023}, author = {Smith, TPL and Bickhart, DM and Boichard, D and Chamberlain, AJ and Djikeng, A and Jiang, Y and Low, WY and Pausch, H and Demyda-Peyrás, S and Prendergast, J and Schnabel, RD and Rosen, BD and , }, title = {The Bovine Pangenome Consortium: democratizing production and accessibility of genome assemblies for global cattle breeds and other bovine species.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {139}, pmid = {37337218}, issn = {1474-760X}, abstract = {The Bovine Pangenome Consortium (BPC) is an international collaboration dedicated to the assembly of cattle genomes to develop a more complete representation of cattle genomic diversity. The goal of the BPC is to provide genome assemblies and a community-agreed pangenome representation to replace breed-specific reference assemblies for cattle genomics. The BPC invites partners sharing our vision to participate in the production of these assemblies and the development of a common, community-approved, pangenome reference as a public resource for the research community (https://bovinepangenome.github.io/). This community-driven resource will provide the context for comparison between studies and the future foundation for cattle genomic selection.}, }
@article {pmid37333201, year = {2023}, author = {Tran, TH and Roberts, AQ and F Escapa, I and Gao, W and Segre, JA and Kong, HH and Conlan, S and Kelly, MS and Lemon, KP}, title = {Metabolic capabilities are highly conserved among human nasal-associated Corynebacterium species in pangenomic analyses.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.06.05.543719}, pmid = {37333201}, abstract = {UNLABELLED: Corynebact e rium species are globally ubiquitous in human nasal microbiota across the lifespan. Moreover, nasal microbiota profiles typified by higher relative abundances of Corynebacterium are often positively associated with health. Among the most common human nasal Corynebacterium species are C. propinquum , C. pseudodiphtheriticum, C. accolens , and C. tuberculostearicum . Based on the prevalence of these species, at least two likely coexist in the nasal microbiota of 82% of adults. To gain insight into the functions of these four species, we identified genomic, phylogenomic, and pangenomic properties and estimated the functional protein repertoire and metabolic capabilities of 87 distinct human nasal Corynebacterium strain genomes: 31 from Botswana and 56 from the U.S. C. pseudodiphtheriticum had geographically distinct clades consistent with localized strain circulation, whereas some strains from the other species had wide geographic distribution across Africa and North America. All four species had similar genomic and pangenomic structures. Gene clusters assigned to all COG metabolic categories were overrepresented in the persistent (core) compared to the accessory genome of each species indicating limited strain-level variability in metabolic capacity. Moreover, core metabolic capabilities were highly conserved among the four species indicating limited species-level metabolic variation. Strikingly, strains in the U.S. clade of C. pseudodiphtheriticum lacked genes for assimilatory sulfate reduction present in the Botswanan clade and in the other studied species, indicating a recent, geographically related loss of assimilatory sulfate reduction. Overall, the minimal species and strain variability in metabolic capacity implies coexisting strains might have limited ability to occupy distinct metabolic niches.
IMPORTANCE: Pangenomic analysis with estimation of functional capabilities facilitates our understanding of the full biologic diversity of bacterial species. We performed systematic genomic, phylogenomic, and pangenomic analyses with qualitative estimation of the metabolic capabilities of four common human nasal Corynebacterium species generating a foundational resource. The prevalence of each species in human nasal microbiota is consistent with the common coexistence of at least two species. We identified a notably high level of metabolic conservation within and among species indicating limited options for species to occupy distinct metabolic niches and pointing to the importance of investigating interactions among nasal Corynebacterium species. Comparing strains from two continents, C. pseudodiphtheriticum had restricted geographic strain distribution characterized by an evolutionarily recent loss of assimilatory sulfate reduction in North American strains. Our findings contribute to understanding the functions of Corynebacterium within human nasal microbiota and to evaluating their potential for future use as biotherapeutics.}, }
@article {pmid37323942, year = {2023}, author = {Awori, RM and Waturu, CN and Pidot, SJ and Amugune, NO and Bode, HB}, title = {Draft genomes, phylogenomic reconstruction and comparative genome analysis of three Xenorhabdus strains isolated from soil-dwelling nematodes in Kenya.}, journal = {Access microbiology}, volume = {5}, number = {5}, pages = {}, pmid = {37323942}, issn = {2516-8290}, abstract = {As a proven source of potent and selective antimicrobials, Xenorhabdus bacteria are important to an age plagued with difficult-to-treat microbial infections. Yet, only 27 species have been described to date. In this study, a novel Xenorhabdus species was discovered through genomic studies on three isolates from Kenyan soils. Soils in Western Kenya were surveyed for steinernematids and Steinernema isolates VH1 and BG5 were recovered from red volcanic loam soils from cultivated land in Vihiga and clay soils from riverine land in Bungoma respectively. From the two nematode isolates, Xenorhabdus sp. BG5 and Xenorhabdus sp. VH1 were isolated. The genomes of these two, plus that of X. griffiniae XN45 - this was previously isolated from Steinernema sp. scarpo that also originated from Kenyan soils - were sequenced and assembled. Nascent genome assemblies of the three isolates were of good quality with over 70 % of their proteome having known functions. These three isolates formed the X. griffiniae clade in a phylogenomic reconstruction of the genus. Their species were delineated using three overall genome relatedness indices: an unnamed species of the genus, Xenorhabdus sp. BG5, X. griffiniae VH1 and X. griffiniae XN45. A pangenome analysis of this clade revealed that over 70 % of species-specific genes encoded unknown functions. Transposases were linked to genomic islands in Xenorhabdus sp. BG5. Thus, overall genome-related indices sufficiently delineated species of two new Xenorhabdus isolates from Kenya, both of which were closely related to X. griffiniae . The functions encoded by most species-specific genes in the X. griffiniae clade remain unknown.}, }
@article {pmid37323913, year = {2023}, author = {Huang, W and Hu, S and Zhu, Y and Liu, S and Zhou, X and Fang, Y and Lu, Y and Wang, R}, title = {Metagenomic surveillance and comparative genomic analysis of Chlamydia psittaci in patients with pneumonia.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1157888}, pmid = {37323913}, issn = {1664-302X}, abstract = {Chlamydia psittaci, a strictly intracellular bacterium, is an underestimated etiologic agent leading to infections in a broad range of animals and mild illness or pneumonia in humans. In this study, the metagenomes of bronchoalveolar lavage fluids from the patients with pneumonia were sequenced and highly abundant C. psittaci was found. The target-enriched metagenomic reads were recruited to reconstruct draft genomes with more than 99% completeness. Two C. psittaci strains from novel sequence types were detected and these were closely related to the animal-borne isolates derived from the lineages of ST43 and ST28, indicating the zoonotic transmissions of C. psittaci would benefit its prevalence worldwide. Comparative genomic analysis combined with public isolate genomes revealed that the pan-genome of C. psittaci possessed a more stable gene repertoire than those of other extracellular bacteria, with ~90% of the genes per genome being conserved core genes. Furthermore, the evidence for significantly positive selection was identified in 20 virulence-associated gene products, particularly bacterial membrane-embedded proteins and type three secretion machines, which may play important roles in the pathogen-host interactions. This survey uncovered novel strains of C. psittaci causing pneumonia and the evolutionary analysis characterized prominent gene candidates involved in bacterial adaptation to immune pressures. The metagenomic approach is of significance to the surveillance of difficult-to-culture intracellular pathogens and the research into molecular epidemiology and evolutionary biology of C. psittaci.}, }
@article {pmid37323667, year = {2023}, author = {Yang, MR and Su, SF and Wu, YW}, title = {Using bacterial pan-genome-based feature selection approach to improve the prediction of minimum inhibitory concentration (MIC).}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1054032}, pmid = {37323667}, issn = {1664-8021}, abstract = {Background: Predicting the resistance profiles of antimicrobial resistance (AMR) pathogens is becoming more and more important in treating infectious diseases. Various attempts have been made to build machine learning models to classify resistant or susceptible pathogens based on either known antimicrobial resistance genes or the entire gene set. However, the phenotypic annotations are translated from minimum inhibitory concentration (MIC), which is the lowest concentration of antibiotic drugs in inhibiting certain pathogenic strains. Since the MIC breakpoints that classify a strain to be resistant or susceptible to specific antibiotic drug may be revised by governing institutes, we refrained from translating these MIC values into the categories "susceptible" or "resistant" but instead attempted to predict the MIC values using machine learning approaches. Results: By applying a machine learning feature selection approach on a Salmonella enterica pan-genome, in which the protein sequences were clustered to identify highly similar gene families, we showed that the selected features (genes) performed better than known AMR genes, and that models built on the selected genes achieved very accurate MIC prediction. Functional analysis revealed that about half of the selected genes were annotated as hypothetical proteins (i.e., with unknown functional roles), and that only a small portion of known AMR genes were among the selected genes, indicating that applying feature selection on the entire gene set has the potential of uncovering novel genes that may be associated with and may contribute to pathogenic antimicrobial resistances. Conclusion: The application of the pan-genome-based machine learning approach was indeed capable of predicting MIC values with very high accuracy. The feature selection process may also identify novel AMR genes for inferring bacterial antimicrobial resistance phenotypes.}, }
@article {pmid37322453, year = {2023}, author = {Sun, Y and Kou, DR and Li, Y and Ni, JP and Wang, J and Zhang, YM and Wang, QN and Jiang, B and Wang, X and Sun, YX and Xu, XT and Tan, XJ and Zhang, YJ and Kong, XD}, title = {Pan-genome of Citrullus genus highlights the extent of presence/absence variation during domestication and selection.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {332}, pmid = {37322453}, issn = {1471-2164}, abstract = {The rich genetic diversity in Citrullus lanatus and the other six species in the Citrullus genus provides important sources in watermelon breeding. Here, we present the Citrullus genus pan-genome based on the 400 Citrullus genus resequencing data, showing that 477 Mb contigs and 6249 protein-coding genes were absent in the Citrullus lanatus reference genome. In the Citrullus genus pan-genome, there are a total of 8795 (30.5%) genes that exhibit presence/absence variations (PAVs). Presence/absence variation (PAV) analysis showed that a lot of gene PAV were selected during the domestication and improvement, such as 53 favorable genes and 40 unfavorable genes were identified during the C. mucosospermus to C. lanatus landrace domestication. We also identified 661 resistance gene analogs (RGAs) in the Citrullus genus pan-genome, which contains 90 RGAs (89 variable and 1 core gene) located on the pangenome additional contigs. By gene PAV-based GWAS, 8 gene presence/absence variations were found associated with flesh color. Finally, based on the results of gene PAV selection analysis between watermelon populations with different fruit colors, we identified four non-reference candidate genes associated with carotenoid accumulation, which had a significantly higher frequency in the white flesh. These results will provide an important source for watermelon breeding.}, }
@article {pmid37318846, year = {2023}, author = {Zang, X and Lv, H and Huang, P and Sun, Z and Gu, C and Ding, W and Jiao, X and Huang, J}, title = {Genomic Insights into Pangenome and Antimicrobial Resistance in Campylobacter spp. Isolated from Chickens at Specific Growth Stages.}, journal = {Foodborne pathogens and disease}, volume = {}, number = {}, pages = {}, doi = {10.1089/fpd.2023.0008}, pmid = {37318846}, issn = {1556-7125}, abstract = {Improved understanding of the genetic basis of Campylobacter spp. colonization of poultry at specific growth stage is the key to developing a farm-based strategy to prevent flock colonization. In this study, 39 Campylobacter spp. strains (chicken isolates, n = 29; environmental isolates, n = 10) were collected from six marked chickens at the growth stage from week 7 to week 13. Then, we use comparative genomics techniques to analyze the temporal genomic characteristics of Campylobacter spp. in individual chickens across a production cycle. Genotype, average nucleotide identity (ANI), and phylogenetic trees all indicated the evolutionary relationships between the strains from different sampling weeks. The clustering of isolates was not dependent on sampling time and sample source, indicating that strains could persist over several weeks in a flock. Notably, 10 antimicrobial resistance (AMR) genes were identified in the genome of Campylobacter coli isolates, and the genomes of isolates sampled at week 11 harbored fewer AMR genes and insertion sequences (IS) than the isolates from other weeks. Consistent with this, pangenome-wide association analysis demonstrated that gene acquisition and loss could happen at week 11 and week 13. These genes were mainly associated with cell membrane biogenesis, ion metabolism, and DNA replication, suggesting that genomic change may be related to Campylobacter adaptive response. This is a novel study focused on the genetic changes occurring in Campylobacter spp. isolates in a particular space and time; it highlights that accessory genes and AMR genes were overall stable at chicken farm, which will help us understand the survival and the transmission route of Campylobacter spp. better, and have the potential to inform the strategy on the safety control of market-ready chickens.}, }
@article {pmid37317256, year = {2023}, author = {Stone, NE and McDonough, RF and Hamond, C and LeCount, K and Busch, JD and Dirsmith, KL and Rivera-Garcia, S and Soltero, F and Arnold, LM and Weiner, Z and Galloway, RL and Schlater, LK and Nally, JE and Sahl, JW and Wagner, DM}, title = {DNA Capture and Enrichment: A Culture-Independent Approach for Characterizing the Genomic Diversity of Pathogenic Leptospira Species.}, journal = {Microorganisms}, volume = {11}, number = {5}, pages = {}, doi = {10.3390/microorganisms11051282}, pmid = {37317256}, issn = {2076-2607}, abstract = {Because they are difficult to culture, obtaining genomic information from Leptospira spp. is challenging, hindering the overall understanding of leptospirosis. We designed and validated a culture-independent DNA capture and enrichment system for obtaining Leptospira genomic information from complex human and animal samples. It can be utilized with a variety of complex sample types and diverse species as it was designed using the pan-genome of all known pathogenic Leptospira spp. This system significantly increases the proportion of Leptospira DNA contained within DNA extracts obtained from complex samples, oftentimes reaching >95% even when some estimated starting proportions were <1%. Sequencing enriched extracts results in genomic coverage similar to sequenced isolates, thereby enabling enriched complex extracts to be analyzed together with whole genome sequences from isolates, which facilitates robust species identification and high-resolution genotyping. The system is flexible and can be readily updated when new genomic information becomes available. Implementation of this DNA capture and enrichment system will improve efforts to obtain genomic data from unculturable Leptospira-positive human and animal samples. This, in turn, will lead to a better understanding of the overall genomic diversity and gene content of Leptospira spp. that cause leptospirosis, aiding epidemiology and the development of improved diagnostics and vaccines.}, }
@article {pmid37316739, year = {2023}, author = {Fudge, JB}, title = {Combining 47 human genomes into a single pangenome.}, journal = {Nature biotechnology}, volume = {41}, number = {6}, pages = {766}, doi = {10.1038/s41587-023-01842-4}, pmid = {37316739}, issn = {1546-1696}, }
@article {pmid37316654, year = {2023}, author = {Gao, Y and Yang, X and Chen, H and Tan, X and Yang, Z and Deng, L and Wang, B and Kong, S and Li, S and Cui, Y and Lei, C and Wang, Y and Pan, Y and Ma, S and Sun, H and Zhao, X and Shi, Y and Yang, Z and Wu, D and Wu, S and Zhao, X and Shi, B and Jin, L and Hu, Z and , and Lu, Y and Chu, J and Ye, K and Xu, S}, title = {A pangenome reference of 36 Chinese populations.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {37316654}, issn = {1476-4687}, abstract = {Human genomics is witnessing an ongoing paradigm shift from a single reference sequence to a pangenome form, but populations of Asian ancestry are underrepresented. Here we present data from the first phase of the Chinese Pangenome Consortium, including a collection of 116 high-quality and haplotype-phased de novo assemblies based on 58 core samples representing 36 minority Chinese ethnic groups. With an average 30.65× high-fidelity long-read sequence coverage, an average contiguity N50 of more than 35.63 megabases and an average total size of 3.01 gigabases, the CPC core assemblies add 189 million base pairs of euchromatic polymorphic sequences and 1,367 protein-coding gene duplications to GRCh38. We identified 15.9 million small variants and 78,072 structural variants, of which 5.9 million small variants and 34,223 structural variants were not reported in a recently released pangenome reference[1]. The Chinese Pangenome Consortium data demonstrate a remarkable increase in the discovery of novel and missing sequences when individuals are included from underrepresented minority ethnic groups. The missing reference sequences were enriched with archaic-derived alleles and genes that confer essential functions related to keratinization, response to ultraviolet radiation, DNA repair, immunological responses and lifespan, implying great potential for shedding new light on human evolution and recovering missing heritability in complex disease mapping.}, }
@article {pmid37316594, year = {2023}, author = {}, title = {A pangenome reference representative of 36 minority Chinese ethnic groups.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {37316594}, issn = {1476-4687}, }
@article {pmid37313509, year = {2022}, author = {Reeves, PA and Richards, CM}, title = {A pan-genome data structure induced by pooled sequencing facilitates variant mining in heterogeneous germplasm.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {42}, number = {7}, pages = {36}, pmid = {37313509}, issn = {1572-9788}, abstract = {UNLABELLED: Valuable genetic variation lies unused in gene banks due to the difficulty of exploiting heterogeneous germplasm accessions. Advances in molecular breeding, including transgenics and genome editing, present the opportunity to exploit hidden sequence variation directly. Here we describe the pan-genome data structure induced by whole-genome sequencing of pooled individuals from wild populations of Patellifolia spp., a source of disease resistance genes for the related crop species sugar beet (Beta vulgaris). We represent the pan-genome as a map of reads from pooled sequencing of a heterogeneous population sample to a reference genome, plus a BLAST data base of the mapped reads. We show that this basic data structure can be queried by reference genome position or homology to identify sequence variants present in the wild relative, at genes of agronomic interest in the crop, a process known as allele or variant mining. Further we demonstrate the possibility of cataloging variants in all Patellifolia genomic regions that have corresponding single copy orthologous regions in sugar beet. The data structure, termed a "pooled read archive," can be produced, altered, and queried using standard tools to facilitate discovery of agronomically-important sequence variation.
SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s11032-022-01308-6.}, }
@article {pmid37313015, year = {2022}, author = {Seyum, EG and Bille, NH and Abtew, WG and Munyengwa, N and Bell, JM and Cros, D}, title = {Genomic selection in tropical perennial crops and plantation trees: a review.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {42}, number = {10}, pages = {58}, pmid = {37313015}, issn = {1572-9788}, abstract = {UNLABELLED: To overcome the multiple challenges currently faced by agriculture, such as climate change and soil deterioration, more efficient plant breeding strategies are required. Genomic selection (GS) is crucial for the genetic improvement of quantitative traits, as it can increase selection intensity, shorten the generation interval, and improve selection accuracy for traits that are difficult to phenotype. Tropical perennial crops and plantation trees are of major economic importance and have consequently been the subject of many GS articles. In this review, we discuss the factors that affect GS accuracy (statistical models, linkage disequilibrium, information concerning markers, relatedness between training and target populations, the size of the training population, and trait heritability) and the genetic gain expected in these species. The impact of GS will be particularly strong in tropical perennial crops and plantation trees as they have long breeding cycles and constrained selection intensity. Future GS prospects are also discussed. High-throughput phenotyping will allow constructing of large training populations and implementing of phenomic selection. Optimized modeling is needed for longitudinal traits and multi-environment trials. The use of multi-omics, haploblocks, and structural variants will enable going beyond single-locus genotype data. Innovative statistical approaches, like artificial neural networks, are expected to efficiently handle the increasing amounts of heterogeneous multi-scale data. Targeted recombinations on sites identified from profiles of marker effects have the potential to further increase genetic gain. GS can also aid re-domestication and introgression breeding. Finally, GS consortia will play an important role in making the best of these opportunities.
SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s11032-022-01326-4.}, }
@article {pmid37310928, year = {2023}, author = {Li, R and Gong, M and Zhang, X and Wang, F and Liu, Z and Zhang, L and Yang, Q and Xu, Y and Xu, M and Zhang, H and Zhang, Y and Dai, X and Gao, Y and Zhang, Z and Fang, W and Yang, Y and Fu, W and Cao, C and Yang, P and Ghanatsaman, ZA and Negari, NJ and Nanaei, HA and Yue, X and Song, Y and Lan, X and Deng, W and Wang, X and Pan, C and Xiang, R and Ibeagha-Awemu, EM and Heslop-Harrison, PJS and Rosen, BD and Lenstra, JA and Gan, S and Jiang, Y}, title = {A sheep pangenome reveals the spectrum of structural variations and their effects on tail phenotypes.}, journal = {Genome research}, volume = {33}, number = {3}, pages = {463-477}, doi = {10.1101/gr.277372.122}, pmid = {37310928}, issn = {1549-5469}, abstract = {Structural variations (SVs) are a major contributor to genetic diversity and phenotypic variations, but their prevalence and functions in domestic animals are largely unexplored. Here we generated high-quality genome assemblies for 15 individuals from genetically diverse sheep breeds using Pacific Biosciences (PacBio) high-fidelity sequencing, discovering 130.3 Mb nonreference sequences, from which 588 genes were annotated. A total of 149,158 biallelic insertions/deletions, 6531 divergent alleles, and 14,707 multiallelic variations with precise breakpoints were discovered. The SV spectrum is characterized by an excess of derived insertions compared to deletions (94,422 vs. 33,571), suggesting recent active LINE expansions in sheep. Nearly half of the SVs display low to moderate linkage disequilibrium with surrounding single-nucleotide polymorphisms (SNPs) and most SVs cannot be tagged by SNP probes from the widely used ovine 50K SNP chip. We identified 865 population-stratified SVs including 122 SVs possibly derived in the domestication process among 690 individuals from sheep breeds worldwide. A novel 168-bp insertion in the 5' untranslated region (5' UTR) of HOXB13 is found at high frequency in long-tailed sheep. Further genome-wide association study and gene expression analyses suggest that this mutation is causative for the long-tail trait. In summary, we have developed a panel of high-quality de novo assemblies and present a catalog of structural variations in sheep. Our data capture abundant candidate functional variations that were previously unexplored and provide a fundamental resource for understanding trait biology in sheep.}, }
@article {pmid37303795, year = {2023}, author = {Zhang, Z and Cui, M and Chen, P and Li, J and Mao, Z and Mao, Y and Li, Z and Guo, Q and Wang, C and Liao, X and Liu, H}, title = {Insight into the phylogeny and metabolic divergence of Monascus species (M. pilosus, M. ruber, and M. purpureus) at the genome level.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1199144}, pmid = {37303795}, issn = {1664-302X}, abstract = {BACKGROUND: Species of the genus Monascus are economically important and widely used in the production of food colorants and monacolin K. However, they have also been known to produce the mycotoxin citrinin. Currently, taxonomic knowledge of this species at the genome level is insufficient.
METHODS: This study presents genomic similarity analyses through the analysis of the average nucleic acid identity of the genomic sequence and the whole genome alignment. Subsequently, the study constructed a pangenome of Monascus by reannotating all the genomes and identifying a total of 9,539 orthologous gene families. Two phylogenetic trees were constructed based on 4,589 single copy orthologous protein sequences and all the 5,565 orthologous proteins, respectively. In addition, carbohydrate active enzymes, secretome, allergic proteins, as well as secondary metabolite gene clusters were compared among the included 15 Monascus strains.
RESULTS: The results clearly revealed a high homology between M. pilosus and M. ruber, and their distant relationship with M. purpureus. Accordingly, all the included 15 Monascus strains should be classified into two distinctly evolutionary clades, namely the M. purpureus clade and the M. pilosus-M. ruber clade. Moreover, gene ontology enrichment showed that the M. pilosus-M. ruber clade had more orthologous genes involved with environmental adaptation than the M. purpureus clade. Compared to Aspergillus oryzae, all the Monascus species had a substantial gene loss of carbohydrate active enzymes. Potential allergenic and fungal virulence factor proteins were also found in the secretome of Monascus. Furthermore, this study identified the pigment synthesis gene clusters present in all included genomes, but with multiple nonessential genes inserted in the gene cluster of M. pilosus and M. ruber compared to M. purpureus. The citrinin gene cluster was found to be intact and highly conserved only among M. purpureus genomes. The monacolin K gene cluster was found only in the genomes of M. pilosus and M. ruber, but the sequence was more conserved in M. ruber.
CONCLUSION: This study provides a paradigm for phylogenetic analysis of the genus Monascus, and it is believed that this report will lead to a better understanding of these food microorganisms in terms of classification, metabolic differentiation, and safety.}, }
@article {pmid37298462, year = {2023}, author = {Wekesa, C and Kiprotich, K and Okoth, P and Asudi, GO and Muoma, JO and Furch, ACU and Oelmüller, R}, title = {Molecular Characterization of Indigenous Rhizobia from Kenyan Soils Nodulating with Common Beans.}, journal = {International journal of molecular sciences}, volume = {24}, number = {11}, pages = {}, doi = {10.3390/ijms24119509}, pmid = {37298462}, issn = {1422-0067}, abstract = {Kenya is the seventh most prominent producer of common beans globally and the second leading producer in East Africa. However, the annual national productivity is low due to insufficient quantities of vital nutrients and nitrogen in the soils. Rhizobia are symbiotic bacteria that fix nitrogen through their interaction with leguminous plants. Nevertheless, inoculating beans with commercial rhizobia inoculants results in sparse nodulation and low nitrogen supply to the host plants because these strains are poorly adapted to the local soils. Several studies describe native rhizobia with much better symbiotic capabilities than commercial strains, but only a few have conducted field studies. This study aimed to test the competence of new rhizobia strains that we isolated from Western Kenya soils and for which the symbiotic efficiency was successfully determined in greenhouse experiments. Furthermore, we present and analyze the whole-genome sequence for a promising candidate for agricultural application, which has high nitrogen fixation features and promotes common bean yields in field studies. Plants inoculated with the rhizobial isolate S3 or with a consortium of local isolates (COMB), including S3, produced a significantly higher number of seeds and seed dry weight when compared to uninoculated control plants at two study sites. The performance of plants inoculated with commercial isolate CIAT899 was not significantly different from uninoculated plants (p > 0.05), indicating tight competition from native rhizobia for nodule occupancy. Pangenome analysis and the overall genome-related indices showed that S3 is a member of R. phaseoli. However, synteny analysis revealed significant differences in the gene order, orientation, and copy numbers between S3 and the reference R. phaseoli. Isolate S3 is phylogenomically similar to R. phaseoli. However, it has undergone significant genome rearrangements (global mutagenesis) to adapt to harsh conditions in Kenyan soils. Its high nitrogen fixation ability shows optimal adaptation to Kenyan soils, and the strain can potentially replace nitrogenous fertilizer application. We recommend that extensive fieldwork in other parts of the country over a period of five years be performed on S3 to check on how the yield changes with varying whether conditions.}, }
@article {pmid37296461, year = {2023}, author = {Schmidt, S and Khan, S and Alanko, JN and Pibiri, GE and Tomescu, AI}, title = {Matchtigs: minimum plain text representation of k-mer sets.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {136}, pmid = {37296461}, issn = {1474-760X}, abstract = {We propose a polynomial algorithm computing a minimum plain-text representation of k-mer sets, as well as an efficient near-minimum greedy heuristic. When compressing read sets of large model organisms or bacterial pangenomes, with only a minor runtime increase, we shrink the representation by up to 59% over unitigs and 26% over previous work. Additionally, the number of strings is decreased by up to 97% over unitigs and 90% over previous work. Finally, a small representation has advantages in downstream applications, as it speeds up SSHash-Lite queries by up to 4.26× over unitigs and 2.10× over previous work.}, }
@article {pmid37291196, year = {2023}, author = {He, Q and Tang, S and Zhi, H and Chen, J and Zhang, J and Liang, H and Alam, O and Li, H and Zhang, H and Xing, L and Li, X and Zhang, W and Wang, H and Shi, J and Du, H and Wu, H and Wang, L and Yang, P and Xing, L and Yan, H and Song, Z and Liu, J and Wang, H and Tian, X and Qiao, Z and Feng, G and Guo, R and Zhu, W and Ren, Y and Hao, H and Li, M and Zhang, A and Guo, E and Yan, F and Li, Q and Liu, Y and Tian, B and Zhao, X and Jia, R and Feng, B and Zhang, J and Wei, J and Lai, J and Jia, G and Purugganan, M and Diao, X}, title = {A graph-based genome and pan-genome variation of the model plant Setaria.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {37291196}, issn = {1546-1718}, abstract = {Setaria italica (foxtail millet), a founder crop of East Asian agriculture, is a model plant for C4 photosynthesis and developing approaches to adaptive breeding across multiple climates. Here we established the Setaria pan-genome by assembling 110 representative genomes from a worldwide collection. The pan-genome is composed of 73,528 gene families, of which 23.8%, 42.9%, 29.4% and 3.9% are core, soft core, dispensable and private genes, respectively; 202,884 nonredundant structural variants were also detected. The characterization of pan-genomic variants suggests their importance during foxtail millet domestication and improvement, as exemplified by the identification of the yield gene SiGW3, where a 366-bp presence/absence promoter variant accompanies gene expression variation. We developed a graph-based genome and performed large-scale genetic studies for 68 traits across 13 environments, identifying potential genes for millet improvement at different geographic sites. These can be used in marker-assisted breeding, genomic selection and genome editing to accelerate crop improvement under different climatic conditions.}, }
@article {pmid37291142, year = {2023}, author = {Eché, C and Iampietro, C and Birbes, C and Dréau, A and Kuchly, C and Di Franco, A and Klopp, C and Faraut, T and Djebali, S and Castinel, A and Zytnicki, M and Denis, E and Boussaha, M and Grohs, C and Boichard, D and Gaspin, C and Milan, D and Donnadieu, C}, title = {A Bos taurus sequencing methods benchmark for assembly, haplotyping, and variant calling.}, journal = {Scientific data}, volume = {10}, number = {1}, pages = {369}, pmid = {37291142}, issn = {2052-4463}, abstract = {Inspired by the production of reference data sets in the Genome in a Bottle project, we sequenced one Charolais heifer with different technologies: Illumina paired-end, Oxford Nanopore, Pacific Biosciences (HiFi and CLR), 10X Genomics linked-reads, and Hi-C. In order to generate haplotypic assemblies, we also sequenced both parents with short reads. From these data, we built two haplotyped trio high quality reference genomes and a consensus assembly, using up-to-date software packages. The assemblies obtained using PacBio HiFi reaches a size of 3.2 Gb, which is significantly larger than the 2.7 Gb ARS-UCD1.2 reference. The BUSCO score of the consensus assembly reaches a completeness of 95.8%, among highly conserved mammal genes. We also identified 35,866 structural variants larger than 50 base pairs. This assembly is a contribution to the bovine pangenome for the "Charolais" breed. These datasets will prove to be useful resources enabling the community to gain additional insight on sequencing technologies for applications such as SNP, indel or structural variant calling, and de novo assembly.}, }
@article {pmid37289488, year = {2023}, author = {Mossop, M and Robinson, L and Jiang, JH and Peleg, AY and Blakeway, LV and Macesic, N and Perry, A and Bourke, S and Ulhuq, FR and Palmer, T}, title = {Characterisation of key genotypic and phenotypic traits of clinical cystic fibrosis Staphylococcus aureus isolates.}, journal = {Journal of medical microbiology}, volume = {72}, number = {6}, pages = {}, doi = {10.1099/jmm.0.001703}, pmid = {37289488}, issn = {1473-5644}, abstract = {Introduction. One third of people with CF in the UK are co-infected by both Staphylococcus aureus and Pseudomonas aeruginosa. Chronic bacterial infection in CF contributes to the gradual destruction of lung tissue, and eventually respiratory failure in this group.Gap Statement. The contribution of S. aureus to cystic fibrosis (CF) lung decline in the presence or absence of P. aeruginosa is unclear. Defining the molecular and phenotypic characteristics of a range of S. aureus clinical isolates will help further understand its pathogenic capabilities.Aim. Our objective was to use molecular and phenotypic tools to characterise twenty-five clinical S. aureus isolates collected from mono- and coinfection with P. aeruginosa from people with CF at the Royal Victoria Infirmary, Newcastle upon Tyne.Methodology. Genomic DNA was extracted and sequenced. Multilocus sequence typing was used to construct phylogeny from the seven housekeeping genes. A pangenome was calculated using Roary, and cluster of Orthologous groups were assigned using eggNOG-mapper which were used to determine differences within core, accessory, and unique genomes. Characterisation of sequence type, clonal complex, agr and spa types was carried out using PubMLST, eBURST, AgrVATE and spaTyper, respectively. Antibiotic resistance was determined using Kirby-Bauer disc diffusion tests. Phenotypic testing of haemolysis was carried out using ovine red blood cell agar plates and mucoid phenotypes visualised using Congo red agar.Results. Clinical strains clustered closely based on agr type, sequence type and clonal complex. COG analysis revealed statistically significant enrichment of COG families between core, accessory and unique pangenome groups. The unique genome was significantly enriched for replication, recombination and repair, and defence mechanisms. The presence of known virulence genes and toxins were high within this group, and unique genes were identified in 11 strains. Strains which were isolated from the same patient all surpassed average nucleotide identity thresholds, however, differed in phenotypic traits. Antimicrobial resistance to macrolides was significantly higher in the coinfection group.Conclusion. There is huge variation in genetic and phenotypic capabilities of S. aureus strains. Further studies on how these may differ in relation to other species in the CF lung may give insight into inter-species interactions.}, }
@article {pmid37285390, year = {2023}, author = {Rubin, JD and Vogel, NA and Gopalakrishnan, S and Sackett, PW and Renaud, G}, title = {HaploCart: Human mtDNA haplogroup classification using a pangenomic reference graph human mtDNA haplogroup inference.}, journal = {PLoS computational biology}, volume = {19}, number = {6}, pages = {e1011148}, doi = {10.1371/journal.pcbi.1011148}, pmid = {37285390}, issn = {1553-7358}, abstract = {Current mitochondrial DNA (mtDNA) haplogroup classification tools map reads to a single reference genome and perform inference based on the detected mutations to this reference. This approach biases haplogroup assignments towards the reference and prohibits accurate calculations of the uncertainty in assignment. We present HaploCart, a probabilistic mtDNA haplogroup classifier which uses a pangenomic reference graph framework together with principles of Bayesian inference. We demonstrate that our approach significantly outperforms available tools by being more robust to lower coverage or incomplete consensus sequences and producing phylogenetically-aware confidence scores that are unbiased towards any haplogroup. HaploCart is available both as a command-line tool and through a user-friendly web interface. The C++ program accepts as input consensus FASTA, FASTQ, or GAM files, and outputs a text file with the haplogroup assignments of the samples along with the level of confidence in the assignments. Our work considerably reduces the amount of data required to obtain a confident mitochondrial haplogroup assignment.}, }
@article {pmid37285209, year = {2023}, author = {Liu, R and Ma, L and Wang, H and Liu, D and Lu, X and Huang, X and Huang, S and Liu, X}, title = {Comparative genomics reveals intraspecific divergence of Acidithiobacillus ferrooxidans: insights from evolutionary adaptation.}, journal = {Microbial genomics}, volume = {9}, number = {6}, pages = {}, doi = {10.1099/mgen.0.001038}, pmid = {37285209}, issn = {2057-5858}, abstract = {Acidithiobacillus ferrooxidans serves as a model chemolithoautotrophic organism in extremely acidic environments, which has attracted much attention due to its unique metabolism and strong adaptability. However, little was known about the divergences along the evolutionary process based on whole genomes. Herein, we isolated six strains of A. ferrooxidans from mining areas in China and Zambia, and used comparative genomics to investigate the intra-species divergences. The results indicated that A. ferrooxidans diverged into three groups from a common ancestor, and the pan-genome is 'open'. The ancestral reconstruction of A. ferrooxidans indicated that genome sizes experienced a trend of increase in the very earliest days before a decreasing tendency during the evolutionary process, suggesting that both gene gain and gene loss played crucial roles in A. ferrooxidans genome flexibility. Meanwhile, 23 single-copy orthologous groups (OGs) were under positive selection. The differences of rusticyanin (Rus) sequences (the key protein in the iron oxidation pathway) and type IV secretion system (T4SS) composition in the A. ferrooxidans were both related to their group divergences, which contributed to their intraspecific diversity. This study improved our understanding of the divergent evolution and environmental adaptation of A. ferrooxidans at the genome level in extreme conditions, which provided theoretical support for the survival mechanism of living creatures at the extreme.}, }
@article {pmid37278719, year = {2023}, author = {Noll, N and Molari, M and Shaw, LP and Neher, RA}, title = {PanGraph: scalable bacterial pan-genome graph construction.}, journal = {Microbial genomics}, volume = {9}, number = {6}, pages = {}, doi = {10.1099/mgen.0.001034}, pmid = {37278719}, issn = {2057-5858}, abstract = {The genomic diversity of microbes is commonly parameterized as SNPs relative to a reference genome of a well-characterized, but arbitrary, isolate. However, any reference genome contains only a fraction of the microbial pangenome, the total set of genes observed in a given species. Reference-based approaches are thus blind to the dynamics of the accessory genome, as well as variation within gene order and copy number. With the widespread usage of long-read sequencing, the number of high-quality, complete genome assemblies has increased dramatically. In addition to pangenomic approaches that focus on the variation in the sets of genes present in different genomes, complete assemblies allow investigations of the evolution of genome structure and gene order. This latter problem, however, is computationally demanding with few tools available that shed light on these dynamics. Here, we present PanGraph, a Julia-based library and command line interface for aligning whole genomes into a graph. Each genome is represented as a path along vertices, which in turn encapsulate homologous multiple sequence alignments. The resultant data structure succinctly summarizes population-level nucleotide and structural polymorphisms and can be exported into several common formats for either downstream analysis or immediate visualization.}, }
@article {pmid37275147, year = {2023}, author = {Salvà-Serra, F and Pérez-Pantoja, D and Donoso, RA and Jaén-Luchoro, D and Fernández-Juárez, V and Engström-Jakobsson, H and Moore, ERB and Lalucat, J and Bennasar-Figueras, A}, title = {Comparative genomics of Stutzerimonas balearica (Pseudomonas balearica): diversity, habitats, and biodegradation of aromatic compounds.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1159176}, doi = {10.3389/fmicb.2023.1159176}, pmid = {37275147}, issn = {1664-302X}, abstract = {Stutzerimonas balearica (Pseudomonas balearica) has been found principally in oil-polluted environments. The capability of S. balearica to thrive from the degradation of pollutant compounds makes it a species of interest for potential bioremediation applications. However, little has been reported about the diversity of S. balearica. In this study, genome sequences of S. balearica strains from different origins were analyzed, revealing that it is a diverse species with an open pan-genome that will continue revealing new genes and functionalities as the genomes of more strains are sequenced. The nucleotide signatures and intra- and inter-species variation of the 16S rRNA genes of S. balearica were reevaluated. A strategy of screening 16S rRNA gene sequences in public databases enabled the detection of 158 additional strains, of which only 23% were described as S. balearica. The species was detected from a wide range of environments, although mostly from aquatic and polluted environments, predominantly related to petroleum oil. Genomic and phenotypic analyses confirmed that S. balearica possesses varied inherent capabilities for aromatic compounds degradation. This study increases the knowledge of the biology and diversity of S. balearica and will serve as a basis for future work with the species.}, }
@article {pmid37274318, year = {2023}, author = {Lopez, MES and Gontijo, MTP and Cardoso, RR and Batalha, LS and Eller, MR and Bazzolli, DMS and Vidigal, PMP and Mendonça, RCS}, title = {Complete genome analysis of Tequatrovirus ufvareg1, a Tequatrovirus species inhibiting Escherichia coli O157:H7.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1178248}, pmid = {37274318}, issn = {2235-2988}, abstract = {INTRODUCTION: Bacteriophages infecting human pathogens have been considered potential biocontrol agents, and studying their genetic content is essential to their safe use in the food industry. Tequatrovirus ufvareg1 is a bacteriophage named UFV-AREG1, isolated from cowshed wastewater and previously tested for its ability to inhibit Escherichia coli O157:H7.
METHODS: T. ufvareg1 was previously isolated using E. coli O157:H7 (ATCC 43895) as a bacterial host. The same strain was used for bacteriophage propagation and the one-step growth curve. The genome of the T. ufvareg1 was sequenced using 305 Illumina HiSeq, and the genome comparison was calculated by VIRIDIC and VIPTree.
RESULTS: Here, we characterize its genome and compare it to other Tequatrovirus. T. ufvareg1 virions have an icosahedral head (114 x 86 nm) and a contracted tail (117 x 23 nm), with a latent period of 25 min, and an average burst size was 18 phage particles per infected E. coli cell. The genome of the bacteriophage T. ufvareg1 contains 268 coding DNA sequences (CDS) and ten tRNA genes distributed in both negative and positive strains. T. ufvareg1 genome also contains 40 promoters on its regulatory regions and two rho-independent terminators. T. ufvareg1 shares an average intergenomic similarity (VIRIDC) of 88.77% and an average genomic similarity score (VipTree) of 88.91% with eight four reference genomes for Tequatrovirus available in the NCBI RefSeq database. The pan-genomic analysis confirmed the high conservation of Tequatrovirus genomes. Among all CDS annotated in the T. ufvareg1 genome, there are 123 core genes, 38 softcore genes, 94 shell genes, and 13 cloud genes. None of 268 CDS was classified as being exclusive of T. ufvareg1.
CONCLUSION: The results in this paper, combined with other previously published findings, indicate that T. ufvareg1 bacteriophage is a potential candidate for food protection against E. coli O157:H7 in foods.}, }
@article {pmid37267130, year = {2023}, author = {Brandt, AVD and Jonkheer, EM and van Workum, DM and van de Wetering, H and Smit, S and Vilanova, A}, title = {PanVA: Pangenomic Variant Analysis.}, journal = {IEEE transactions on visualization and computer graphics}, volume = {PP}, number = {}, pages = {}, doi = {10.1109/TVCG.2023.3282364}, pmid = {37267130}, issn = {1941-0506}, abstract = {Genomics researchers increasingly use multiple reference genomes to comprehensively explore genetic variants underlying differences in detectable characteristics between organisms. Pangenomes allow for an efficient data representation of multiple related genomes and their associated metadata. However, current visual analysis approaches for exploring these complex genotype-phenotype relationships are often based on single reference approaches or lack adequate support for interpreting the variants in the genomic context with heterogeneous (meta)data. This design study introduces PanVA, a visual analytics design for pangenomic variant analysis developed with the active participation of genomics researchers. The design uniquely combines tailored visual representations with interactions such as sorting, grouping, and aggregation, allowing users to navigate and explore different perspectives on complex genotype-phenotype relations. Through evaluation in the context of plants and pathogen research, we show that PanVA helps researchers explore variants in genes and generate hypotheses about their role in phenotypic variation.}, }
@article {pmid37261234, year = {2023}, author = {Fatima, S and Ishaq, Z and Irfan, M and AlAsmari, AF and Achakzai, JK and Zaheer, T and Ali, A and Akbar, A}, title = {Whole-genome sequencing of multidrug resistance Salmonella Typhi clinical strains isolated from Balochistan, Pakistan.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1151805}, pmid = {37261234}, issn = {2296-2565}, abstract = {INTRODUCTION: Salmonella enterica serovar Typhi (S. Typhi) is a major cause of morbidity and mortality in developing countries, contributing significantly to the global disease burden.
METHODS: In this study, S. Typhi strains were isolated from 100 patients exhibiting symptoms of typhoid fever at a tertiary care hospital in Pakistan. Antimicrobial testing of all isolates was performed to determine the sensitivity and resistance pattern. Three MDR strains, namely QS194, QS430, and QS468, were subjected to whole genome sequencing for genomic characterization.
RESULTS AND DISCUSSION: MLST analysis showed that QS194, belonged to ST19, which is commonly associated with Salmonella enterica serovar typhimurium. In contrast, QS430 and QS468, belonged to ST1, a sequence type frequently associated with S. Typhi. PlasmidFinder identified the presence of IncFIB(S) and IncFII(S) plasmids in QS194, while IncQ1 was found in QS468. No plasmid was detected in QS430. CARD-based analysis showed that the strains were largely resistant to a variety of antibiotics and disinfecting agents/antiseptics, including fluoroquinolones, cephalosporins, monobactams, cephamycins, penams, phenicols, tetracyclines, rifamycins, aminoglycosides, etc. The S. Typhi strains possessed various virulence factors, such as Vi antigen, Agf/Csg, Bcf, Fim, Pef, etc. The sequencing data indicated that the strains had antibiotic resistance determinants and shared common virulence factors. Pangenome analysis of the selected S. Typhi strains identified 13,237 genes, with 3,611 being core genes, 2,093 shell genes, and 7,533 cloud genes. Genome-based typing and horizontal gene transfer analysis revealed that the strains had different evolutionary origins and may have adapted to distinct environments or host organisms. These findings provide important insights into the genetic characteristics of S. Typhi strains and their potential association with various ecological niches and host organisms.}, }
@article {pmid37258301, year = {2023}, author = {Ahmed, O and Rossi, M and Boucher, C and Langmead, B}, title = {Efficient taxa identification using a pangenome index.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277642.123}, pmid = {37258301}, issn = {1549-5469}, abstract = {Tools that classify sequencing reads against a database of reference sequences require efficient index data structures. The r-index is a compressed full-text index that answers substring presence/absence, count and locate queries in space proportional to the amount of distinct sequence in the database: O(r) space where r is the number of Burrows-Wheeler runs. To date, the r-index has lacked the ability to quickly classify matches according to which reference sequences (or sequence groupings, i.e.~taxa) a match overlaps. We present new algorithms and methods for solving this problem. Specifically, given a collection D of d documents D = {T_1, T_2, ..., T_d } over an alphabet of size sigma, we extend the r-index with O(rd) additional words to support document listing queries for a pattern S[1..m] that occurs in ndoc documents in D in O(m log log_w(sigma + n/r) + ndoc) time and O(rd) space, where w is the machine word size. Applied in a bacterial mock community experiment, our method is up to 3 times faster than a comparable method that uses the standard r-index locate queries. We show that our method classifies both simulated and real nanopore reads at the strain level with higher accuracy compared to other approaches. Finally, we present strategies for compacting this structure in applications where read lengths or match lengths can be bounded.}, }
@article {pmid37256057, year = {2023}, author = {Zhao, W and Zeng, W and Pang, B and Luo, M and Peng, Y and Xu, J and Kan, B and Li, Z and Lu, X}, title = {Oxford nanopore long-read sequencing enables the generation of complete bacterial and plasmid genomes without short-read sequencing.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1179966}, pmid = {37256057}, issn = {1664-302X}, abstract = {INTRODUCTION: Genome-based analysis is crucial in monitoring antibiotic-resistant bacteria (ARB)and antibiotic-resistance genes (ARGs). Short-read sequencing is typically used to obtain incomplete draft genomes, while long-read sequencing can obtain genomes of multidrug resistance (MDR) plasmids and track the transmission of plasmid-borne antimicrobial resistance genes in bacteria. However, long-read sequencing suffers from low-accuracy base calling, and short-read sequencing is often required to improve genome accuracy. This increases costs and turnaround time.
METHODS: In this study, a novel ONT sequencing method is described, which uses the latest ONT chemistry with improved accuracy to assemble genomes of MDR strains and plasmids from long-read sequencing data only. Three strains of Salmonella carrying MDR plasmids were sequenced using the ONT SQK-LSK114 kit with flow cell R10.4.1, and de novo genome assembly was performed with average read accuracy (Q > 10) of 98.9%.
RESULTS AND DISCUSSION: For a 5-Mb-long bacterial genome, finished genome sequences with accuracy of >99.99% could be obtained at 75× sequencing coverage depth using Flye and Medaka software. Thus, this new ONT method greatly improves base-calling accuracy, allowing for the de novo assembly of high-quality finished bacterial or plasmid genomes without the need for short-read sequencing. This saves both money and time and supports the application of ONT data in critical genome-based epidemiological analyses. The novel ONT approach described in this study can take the place of traditional combination genome assembly based on short- and long-read sequencing, enabling pangenomic analyses based on high-quality complete bacterial and plasmid genomes to monitor the spread of antibiotic-resistant bacteria and antibiotic resistance genes.}, }
@article {pmid37250090, year = {2023}, author = {Zhang, JX and Xu, JH and Yuan, B and Wang, XD and Mao, XH and Wang, JL and Zhang, XL and Yuan, Y}, title = {Detection of Burkholderia pseudomallei with CRISPR-Cas12a based on specific sequence tags.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1153352}, pmid = {37250090}, issn = {2296-2565}, abstract = {Melioidosis is a bacterial infection caused by Burkholderia pseudomallei (B. pseudomallei), posing a significant threat to public health. Rapid and accurate detection of B. pseudomallei is crucial for preventing and controlling melioidosis. However, identifying B. pseudomallei is challenging due to its high similarity to other species in the same genus. To address this issue, this study proposed a dual-target method that can specifically identify B. pseudomallei in less than 40 min. We analyzed 1722 B. pseudomallei genomes to construct large-scale pan-genomes and selected specific sequence tags in their core genomes that effectively distinguish B. pseudomallei from its closely related species. Specifically, we selected two specific tags, LC1 and LC2, which we combined with the Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR)-CRISPR associated proteins (Cas12a) system and recombinase polymerase amplification (RPA) pre-amplification. Our analysis showed that the dual-target RPA-CRISPR/Cas12a assay has a sensitivity of approximately 0.2 copies/reaction and 10 fg genomic DNA for LC1, and 2 copies/reaction and 20 fg genomic DNA for LC2. Additionally, our method can accurately and rapidly detect B. pseudomallei in human blood and moist soil samples using the specific sequence tags mentioned above. In conclusion, the dual-target RPA-CRISPR/Cas12a method is a valuable tool for the rapid and accurate identification of B. pseudomallei in clinical and environmental samples, aiding in the prevention and control of melioidosis.}, }
@article {pmid37249320, year = {2023}, author = {}, title = {New Genomic Sequencing Resource Could Improve Care.}, journal = {Cancer discovery}, volume = {}, number = {}, pages = {OF1}, doi = {10.1158/2159-8290.CD-NB2023-0042}, pmid = {37249320}, issn = {2159-8290}, abstract = {The first draft of a human pangenomic reference, which includes 47 individuals selected to maximize biogeographic diversity, offers a path to more accurate and effective screening for disease. This broader and more complete view of genetic diversity could lead to new targets for cancer therapies.}, }
@article {pmid37249052, year = {2023}, author = {Chen, Y and Guo, Y and Xie, X and Wang, Z and Miao, L and Yang, Z and Jiao, Y and Xie, C and Liu, J and Hu, Z and Xin, M and Yao, Y and Ni, Z and Sun, Q and Peng, H and Guo, W}, title = {Pangenome-based trajectories of intracellular gene transfers in Poaceae unveil high cumulation in Triticeae.}, journal = {Plant physiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/plphys/kiad319}, pmid = {37249052}, issn = {1532-2548}, abstract = {Intracellular gene transfers (IGTs) between the nucleus and organelles, including plastids and mitochondria, constantly reshape the nuclear genome during evolution. Despite the substantial contribution of IGTs to genome variation, the dynamic trajectories of IGTs at the pangenomic level remain elusive. Here, we developed an approach, IGTminer, that maps the evolutionary trajectories of IGTs using collinearity and gene reannotation across multiple genome assemblies. We applied IGTminer to create a nuclear organellar gene (NOG) map across 67 genomes covering 15 Poaceae species, including important crops. The resulting NOGs were verified by experiments and sequencing datasets. Our analysis revealed that most NOGs were recently transferred and lineage-specific and that Triticeae species tended to have more NOGs than other Poaceae species. Wheat (Triticum aestivum) had a higher retention rate of NOGs than maize (Zea mays) and rice (Oryza sativa), and the retained NOGs were likely involved in photosynthesis and translation pathways. Large numbers of NOG clusters were aggregated in hexaploid wheat during two rounds of polyploidization, contributing to the genetic diversity among modern wheat accessions. We implemented an interactive web server to facilitate the exploration of NOGs in Poaceae. In summary, this study provides resources and insights into the roles of IGTs in shaping inter- and intraspecies genome variation and driving plant genome evolution.}, }
@article {pmid37246787, year = {2023}, author = {Qian, C and Xu, M and Huang, Z and Tan, M and Fu, C and Zhou, T and Cao, J and Zhou, C}, title = {Complete genome sequence of the emerging pathogen Cysteiniphilum spp. and comparative genomic analysis with genus Francisella: Insights into its genetic diversity and potential virulence traits.}, journal = {Virulence}, volume = {14}, number = {1}, pages = {2214416}, doi = {10.1080/21505594.2023.2214416}, pmid = {37246787}, issn = {2150-5608}, abstract = {Cysteiniphilum is a newly discovered genus in 2017 and is phylogenetically closely related to highly pathogenic Francisella tularensis. Recently, it has become an emerging pathogen in humans. However, the complete genome sequence of genus Cysteiniphilum is lacking, and the genomic characteristics of genetic diversity, evolutionary dynamics, and pathogenicity have not been characterized. In this study, the complete genome of the first reported clinical isolate QT6929 of genus Cysteiniphilum was sequenced, and comparative genomics analyses to Francisella genus were conducted to unveil the genomic landscape and diversity of the genus Cysteiniphilum. Our results showed that the complete genome of QT6929 consists of one 2.61 Mb chromosome and a 76,819 bp plasmid. The calculated average nucleotide identity and DNA-DNA hybridization values revealed that two clinical isolates QT6929 and JM-1 should be reclassified as two novel species in genus Cysteiniphilum. Pan-genome analysis revealed genomic diversity within the genus Cysteiniphilum and an open pan-genome state. Genomic plasticity analysis exhibited abundant mobile genetic elements including genome islands, insertion sequences, prophages, and plasmids on Cysteiniphilum genomes, which facilitated the broad exchange of genetic material between Cysteiniphilum and other genera like Francisella and Legionella. Several potential virulence genes associated with lipopolysaccharide/lipooligosaccharide, capsule, and haem biosynthesis specific to clinical isolates were predicted and might contribute to their pathogenicity in humans. Incomplete Francisella pathogenicity island was identified in most Cysteiniphilum genomes. Overall, our study provides an updated phylogenomic relationship of members of the genus Cysteiniphilum and comprehensive genomic insights into this rare emerging pathogen.}, }
@article {pmid37243202, year = {2023}, author = {Lobb, B and Shapter, A and Doxey, AC and Nissimov, JI}, title = {Functional Profiling and Evolutionary Analysis of a Marine Microalgal Virus Pangenome.}, journal = {Viruses}, volume = {15}, number = {5}, pages = {}, doi = {10.3390/v15051116}, pmid = {37243202}, issn = {1999-4915}, abstract = {Phycodnaviridae are large double-stranded DNA viruses, which facilitate studies of host-virus interactions and co-evolution due to their prominence in algal infection and their role in the life cycle of algal blooms. However, the genomic interpretation of these viruses is hampered by a lack of functional information, stemming from the surprising number of hypothetical genes of unknown function. It is also unclear how many of these genes are widely shared within the clade. Using one of the most extensively characterized genera, Coccolithovirus, as a case study, we combined pangenome analysis, multiple functional annotation tools, AlphaFold structural modeling, and literature analysis to compare the core and accessory pangenome and assess support for novel functional predictions. We determined that the Coccolithovirus pangenome shares 30% of its genes with all 14 strains, making up the core. Notably, 34% of its genes were found in at most three strains. Core genes were enriched in early expression based on a transcriptomic dataset of Coccolithovirus EhV-201 algal infection, were more likely to be similar to host proteins than the non-core set, and were more likely to be involved in vital functions such as replication, recombination, and repair. In addition, we generated and collated annotations for the EhV representative EhV-86 from 12 different annotation sources, building up information for 142 previously hypothetical and putative membrane proteins. AlphaFold was further able to predict structures for 204 EhV-86 proteins with a modelling accuracy of good-high. These functional clues, combined with generated AlphaFold structures, provide a foundational framework for the future characterization of this model genus (and other giant viruses) and a further look into the evolution of the Coccolithovirus proteome.}, }
@article {pmid37240287, year = {2023}, author = {Xia, L and Wang, H and Zhao, X and Obel, HO and Yu, X and Lou, Q and Chen, J and Cheng, C}, title = {Chloroplast Pan-Genomes and Comparative Transcriptomics Reveal Genetic Variation and Temperature Adaptation in the Cucumber.}, journal = {International journal of molecular sciences}, volume = {24}, number = {10}, pages = {}, doi = {10.3390/ijms24108943}, pmid = {37240287}, issn = {1422-0067}, abstract = {Although whole genome sequencing, genetic variation mapping, and pan-genome studies have been done on a large group of cucumber nuclear genomes, organelle genome information is largely unclear. As an important component of the organelle genome, the chloroplast genome is highly conserved, which makes it a useful tool for studying plant phylogeny, crop domestication, and species adaptation. Here, we have constructed the first cucumber chloroplast pan-genome based on 121 cucumber germplasms, and investigated the genetic variations of the cucumber chloroplast genome through comparative genomic, phylogenetic, haplotype, and population genetic structure analysis. Meanwhile, we explored the changes in expression of cucumber chloroplast genes under high- and low-temperature stimulation via transcriptome analysis. As a result, a total of 50 complete chloroplast genomes were successfully assembled from 121 cucumber resequencing data, ranging in size from 156,616-157,641 bp. The 50 cucumber chloroplast genomes have typical quadripartite structures, consisting of a large single copy (LSC, 86,339-86,883 bp), a small single copy (SSC, 18,069-18,363 bp), and two inverted repeats (IRs, 25,166-25,797 bp). Comparative genomic, haplotype, and population genetic structure results showed that there is more genetic variation in Indian ecotype cucumbers compared to other cucumber cultivars, which means that many genetic resources remain to be explored in Indian ecotype cucumbers. Phylogenetic analysis showed that the 50 cucumber germplasms could be classified into 3 types: East Asian, Eurasian + Indian, and Xishuangbanna + Indian. The transcriptomic analysis showed that matK were significantly up-regulated under high- and low-temperature stresses, further demonstrating that cucumber chloroplasts respond to temperature adversity by regulating lipid metabolism and ribosome metabolism. Further, accD has higher editing efficiency under high-temperature stress, which may contribute to the heat tolerance. These studies provide useful insight into genetic variation in the chloroplast genome, and established the foundation for exploring the mechanisms of temperature-stimulated chloroplast adaptation.}, }
@article {pmid37239397, year = {2023}, author = {Dey, S and Gaur, M and Sykes, EME and Prusty, M and Elangovan, S and Dixit, S and Pati, S and Kumar, A and Subudhi, E}, title = {Unravelling the Evolutionary Dynamics of High-Risk Klebsiella pneumoniae ST147 Clones: Insights from Comparative Pangenome Analysis.}, journal = {Genes}, volume = {14}, number = {5}, pages = {}, doi = {10.3390/genes14051037}, pmid = {37239397}, issn = {2073-4425}, abstract = {BACKGROUND: The high prevalence and rapid emergence of antibiotic resistance in high-risk Klebsiella pneumoniae (KP) ST147 clones is a global health concern and warrants molecular surveillance.
METHODS: A pangenome analysis was performed using publicly available ST147 complete genomes. The characteristics and evolutionary relationships among ST147 members were investigated through a Bayesian phylogenetic analysis.
RESULTS: The large number of accessory genes in the pangenome indicates genome plasticity and openness. Seventy-two antibiotic resistance genes were found to be linked with antibiotic inactivation, efflux, and target alteration. The exclusive detection of the blaOXA-232 gene within the ColKp3 plasmid of KP_SDL79 suggests its acquisition through horizontal gene transfer. The association of seventy-six virulence genes with the acrAB efflux pump, T6SS system and type I secretion system describes its pathogenicity. The presence of Tn6170, a putative Tn7-like transposon in KP_SDL79 with an insertion at the flanking region of the tnsB gene, establishes its transmission ability. The Bayesian phylogenetic analysis estimates ST147's initial divergence in 1951 and the most recent common ancestor for the entire KP population in 1621.
CONCLUSIONS: Present study highlights the genetic diversity and evolutionary dynamics of high-risk clones of K. pneumoniae. Further inter-clonal diversity studies will help us understand its outbreak more precisely and pave the way for therapeutic interventions.}, }
@article {pmid37229109, year = {2023}, author = {Jha, UC and Nayyar, H and Chattopadhyay, A and Beena, R and Lone, AA and Naik, YD and Thudi, M and Prasad, PVV and Gupta, S and Dixit, GP and Siddique, KHM}, title = {Major viral diseases in grain legumes: designing disease resistant legumes from plant breeding and OMICS integration.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1183505}, doi = {10.3389/fpls.2023.1183505}, pmid = {37229109}, issn = {1664-462X}, abstract = {Grain legumes play a crucial role in human nutrition and as a staple crop for low-income farmers in developing and underdeveloped nations, contributing to overall food security and agroecosystem services. Viral diseases are major biotic stresses that severely challenge global grain legume production. In this review, we discuss how exploring naturally resistant grain legume genotypes within germplasm, landraces, and crop wild relatives could be used as promising, economically viable, and eco-environmentally friendly solution to reduce yield losses. Studies based on Mendelian and classical genetics have enhanced our understanding of key genetic determinants that govern resistance to various viral diseases in grain legumes. Recent advances in molecular marker technology and genomic resources have enabled us to identify genomic regions controlling viral disease resistance in various grain legumes using techniques such as QTL mapping, genome-wide association studies, whole-genome resequencing, pangenome and 'omics' approaches. These comprehensive genomic resources have expedited the adoption of genomics-assisted breeding for developing virus-resistant grain legumes. Concurrently, progress in functional genomics, especially transcriptomics, has helped unravel underlying candidate gene(s) and their roles in viral disease resistance in legumes. This review also examines the progress in genetic engineering-based strategies, including RNA interference, and the potential of synthetic biology techniques, such as synthetic promoters and synthetic transcription factors, for creating viral-resistant grain legumes. It also elaborates on the prospects and limitations of cutting-edge breeding technologies and emerging biotechnological tools (e.g., genomic selection, rapid generation advances, and CRISPR/Cas9-based genome editing tool) in developing virus-disease-resistant grain legumes to ensure global food security.}, }
@article {pmid37228750, year = {2023}, author = {Groza, C and Chen, X and Pacis, A and Simon, MM and Pramatarova, A and Aracena, KA and Pastinen, T and Barreiro, LB and Bourque, G}, title = {Genome graphs detect human polymorphisms in active epigenomic state during influenza infection.}, journal = {Cell genomics}, volume = {3}, number = {5}, pages = {100294}, doi = {10.1016/j.xgen.2023.100294}, pmid = {37228750}, issn = {2666-979X}, abstract = {Genetic variants, including mobile element insertions (MEIs), are known to impact the epigenome. We hypothesized that genome graphs, which encapsulate genetic diversity, could reveal missing epigenomic signals. To test this, we sequenced the epigenome of monocyte-derived macrophages from 35 ancestrally diverse individuals before and after influenza infection, allowing us to investigate the role of MEIs in immunity. We characterized genetic variants and MEIs using linked reads and built a genome graph. Mapping epigenetic data revealed 2.3%-3% novel peaks for H3K4me1, H3K27ac chromatin immunoprecipitation sequencing (ChIP-seq), and ATAC-seq. Additionally, the use of a genome graph modified some quantitative trait loci estimates and revealed 375 polymorphic MEIs in an active epigenomic state. Among these is an AluYh3 polymorphism whose chromatin state changed after infection and was associated with the expression of TRIM25, a gene that restricts influenza RNA synthesis. Our results demonstrate that graph genomes can reveal regulatory regions that would have been overlooked by other approaches.}, }
@article {pmid37227251, year = {2023}, author = {Tonkin-Hill, G and Corander, J and Parkhill, J}, title = {Challenges in prokaryote pangenomics.}, journal = {Microbial genomics}, volume = {9}, number = {5}, pages = {}, doi = {10.1099/mgen.0.001021}, pmid = {37227251}, issn = {2057-5858}, abstract = {Horizontal gene transfer (HGT) and the resulting patterns of gene gain and loss are a fundamental part of bacterial evolution. Investigating these patterns can help us to understand the role of selection in the evolution of bacterial pangenomes and how bacteria adapt to a new niche. Predicting the presence or absence of genes can be a highly error-prone process that can confound efforts to understand the dynamics of horizontal gene transfer. This review discusses both the challenges in accurately constructing a pangenome and the potential consequences errors can have on downstream analyses. We hope that by summarizing these issues researchers will be able to avoid potential pitfalls, leading to improved bacterial pangenome analyses.}, }
@article {pmid37224809, year = {2023}, author = {Wisecaver, JH and Auber, RP and Pendleton, AL and Watervoort, NF and Fallon, TR and Riedling, OL and Manning, SR and Moore, BS and Driscoll, WW}, title = {Extreme genome diversity and cryptic speciation in a harmful algal-bloom-forming eukaryote.}, journal = {Current biology : CB}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.cub.2023.05.003}, pmid = {37224809}, issn = {1879-0445}, abstract = {Harmful algal blooms of the toxic haptophyte Prymnesium parvum are a recurrent problem in many inland and estuarine waters around the world. Strains of P. parvum vary in the toxins they produce and in other physiological traits associated with harmful algal blooms, but the genetic basis for this variation is unknown. To investigate genome diversity in this morphospecies, we generated genome assemblies for 15 phylogenetically and geographically diverse strains of P. parvum, including Hi-C guided, near-chromosome-level assemblies for two strains. Comparative analysis revealed considerable DNA content variation between strains, ranging from 115 to 845 Mbp. Strains included haploids, diploids, and polyploids, but not all differences in DNA content were due to variation in genome copy number. Haploid genome size between strains of different chemotypes differed by as much as 243 Mbp. Syntenic and phylogenetic analyses indicate that UTEX 2797, a common laboratory strain from Texas, is a hybrid that retains two phylogenetically distinct haplotypes. Investigation of gene families variably present across the strains identified several functional categories associated with metabolic and genome size variation in P. parvum, including genes for the biosynthesis of toxic metabolites and proliferation of transposable elements. Together, our results indicate that P. parvum comprises multiple cryptic species. These genomes provide a robust phylogenetic and genomic framework for investigations into the eco-physiological consequences of the intra- and inter-specific genetic variation present in P. parvum and demonstrate the need for similar resources for other harmful algal-bloom-forming morphospecies.}, }
@article {pmid37222600, year = {2023}, author = {Tchan, BGO and Ngazoa-Kakou, S and Aka, N and Apia, NKB and Hammoudi, N and Drancourt, M and Saad, J}, title = {PPE Barcoding Identifies Biclonal Mycobacterium ulcerans Buruli Ulcer, Côte d'Ivoire.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0034223}, doi = {10.1128/spectrum.00342-23}, pmid = {37222600}, issn = {2165-0497}, abstract = {Mycobacterium ulcerans, an environmental opportunistic pathogen, causes necrotic cutaneous and subcutaneous lesions, named Buruli ulcers, in tropical countries. PCR-derived tests used to detect M. ulcerans in environmental and clinical samples do not allow one-shot detection, identification, and typing of M. ulcerans among closely related Mycobacterium marinum complex mycobacteria. We established a 385-member M. marinum/M. ulcerans complex whole-genome sequence database by assembling and annotating 341 M. marinum/M. ulcerans complex genomes and added 44 M. marinum/M. ulcerans complex whole-genome sequences already deposited in the NCBI database. Pangenome, core genome, and single-nucleotide polymorphism (SNP) distance-based comparisons sorted the 385 strains into 10 M. ulcerans taxa and 13 M. marinum taxa, correlating with the geographic origin of strains. Aligning conserved genes identified one PPE (proline-proline-glutamate) gene sequence to be species and intraspecies specific, thereby genotyping the 23 M. marinum/M. ulcerans complex taxa. PCR sequencing of the PPE gene correctly genotyped nine M. marinum/M. ulcerans complex isolates among one M. marinum taxon and three M. ulcerans taxa in the African taxon (T2.4). Further, successful PPE gene PCR sequencing in 15/21 (71.4%) swabs collected from suspected Buruli ulcer lesions in Côte d'Ivoire exhibited positive M. ulcerans IS2404 real-time PCR and identified the M. ulcerans T2.4.1 genotype in eight swabs and M. ulcerans T2.4.1/T2.4.2 mixed genotypes in seven swabs. PPE gene sequencing could be used as a proxy for whole-genome sequencing for the one-shot detection, identification, and typing of clinical M. ulcerans strains, offering an unprecedented tool for identifying M. ulcerans mixed infections. IMPORTANCE We describe a new targeted sequencing approach that characterizes the PPE gene to disclose the simultaneous presence of different variants of a single pathogenic microorganism. This approach has direct implications on the understanding of pathogen diversity and natural history and potential therapeutic implications when dealing with obligate and opportunistic pathogens, such as Mycobacterium ulcerans presented here as a prototype.}, }
@article {pmid37221394, year = {2023}, author = {Drott, MT and Park, SC and Wang, YW and Harrow, L and Keller, NP and Pringle, A}, title = {Pangenomics of the death cap mushroom Amanita phalloides, and of Agaricales, reveals dynamic evolution of toxin genes in an invasive range.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, pmid = {37221394}, issn = {1751-7370}, abstract = {The poisonous European mushroom Amanita phalloides (the "death cap") is invading California. Whether the death caps' toxic secondary metabolites are evolving as it invades is unknown. We developed a bioinformatic pipeline to identify the MSDIN genes underpinning toxicity and probed 88 death cap genomes from an invasive Californian population and from the European range, discovering a previously unsuspected diversity of MSDINs made up of both core and accessory elements. Each death cap individual possesses a unique suite of MSDINs, and toxin genes are significantly differentiated between Californian and European samples. MSDIN genes are maintained by strong natural selection, and chemical profiling confirms MSDIN genes are expressed and result in distinct phenotypes; our chemical profiling also identified a new MSDIN peptide. Toxin genes are physically clustered within genomes. We contextualize our discoveries by probing for MSDINs in genomes from across the order Agaricales, revealing MSDIN diversity originated in independent gene family expansions among genera. We also report the discovery of an MSDIN in an Amanita outside the "lethal Amanitas" clade. Finally, the identification of an MSDIN gene and its associated processing gene (POPB) in Clavaria fumosa suggest the origin of MSDINs is older than previously suspected. The dynamic evolution of MSDINs underscores their potential to mediate ecological interactions, implicating MSDINs in the ongoing invasion. Our data change the understanding of the evolutionary history of poisonous mushrooms, emphasizing striking parallels to convergently evolved animal toxins. Our pipeline provides a roadmap for exploring secondary metabolites in other basidiomycetes and will enable drug prospecting.}, }
@article {pmid37217946, year = {2023}, author = {Leonard, AS and Crysnanto, D and Mapel, XM and Bhati, M and Pausch, H}, title = {Graph construction method impacts variation representation and analyses in a bovine super-pangenome.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {124}, pmid = {37217946}, issn = {1474-760X}, abstract = {BACKGROUND: Several models and algorithms have been proposed to build pangenomes from multiple input assemblies, but their impact on variant representation, and consequently downstream analyses, is largely unknown.
RESULTS: We create multi-species super-pangenomes using pggb, cactus, and minigraph with the Bos taurus taurus reference sequence and eleven haplotype-resolved assemblies from taurine and indicine cattle, bison, yak, and gaur. We recover 221 k nonredundant structural variations (SVs) from the pangenomes, of which 135 k (61%) are common to all three. SVs derived from assembly-based calling show high agreement with the consensus calls from the pangenomes (96%), but validate only a small proportion of variations private to each graph. Pggb and cactus, which also incorporate base-level variation, have approximately 95% exact matches with assembly-derived small variant calls, which significantly improves the edit rate when realigning assemblies compared to minigraph. We use the three pangenomes to investigate 9566 variable number tandem repeats (VNTRs), finding 63% have identical predicted repeat counts in the three graphs, while minigraph can over or underestimate the count given its approximate coordinate system. We examine a highly variable VNTR locus and show that repeat unit copy number impacts the expression of proximal genes and non-coding RNA.
CONCLUSIONS: Our findings indicate good consensus between the three pangenome methods but also show their individual strengths and weaknesses that need to be considered when analysing different types of variants from multiple input assemblies.}, }
@article {pmid37217755, year = {2023}, author = {}, title = {Combining reference genomes into a pangenome graph improves accuracy and reduces bias.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37217755}, issn = {1546-1696}, }
@article {pmid37216590, year = {2023}, author = {Geoffroy, V and Lamouche, JB and Guignard, T and Nicaise, S and Kress, A and Scheidecker, S and Le Béchec, A and Muller, J}, title = {The AnnotSV webserver in 2023: updated visualization and ranking.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad426}, pmid = {37216590}, issn = {1362-4962}, abstract = {Much of the human genetics variant repertoire is composed of single nucleotide variants (SNV) and small insertion/deletions (indel) but structural variants (SV) remain a major part of our modified DNA. SV detection has often been a complex question to answer either because of the necessity to use different technologies (array CGH, SNP array, Karyotype, Optical Genome Mapping…) to detect each category of SV or to get an appropriate resolution (Whole Genome Sequencing). Thanks to the deluge of pangenomic analysis, Human geneticists are accumulating SV and their interpretation remains time consuming and challenging. The AnnotSV webserver (https://www.lbgi.fr/AnnotSV/) aims at being an efficient tool to (i) annotate and interpret SV potential pathogenicity in the context of human diseases, (ii) recognize potential false positive variants from all the SV identified and (iii) visualize the patient variants repertoire. The most recent developments in the AnnotSV webserver are: (i) updated annotations sources and ranking, (ii) three novel output formats to allow diverse utilization (analysis, pipelines), as well as (iii) two novel user interfaces including an interactive circos view.}, }
@article {pmid37214944, year = {2023}, author = {Fan, J and Singh, NP and Khan, J and Pibiri, GE and Patro, R}, title = {Fulgor: A fast and compact k -mer index for large-scale matching and color queries.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.05.09.539895}, pmid = {37214944}, abstract = {UNLABELLED: The problem of sequence identification or matching - determining the subset of references from a given collection that are likely to contain a query nucleotide sequence - is relevant for many important tasks in Computational Biology, such as metagenomics and pan-genome analysis. Due to the complex nature of such analyses and the large scale of the reference collections a resource-efficient solution to this problem is of utmost importance. The reference collection should therefore be pre-processed into an index for fast queries. This poses the threefold challenge of designing an index that is efficient to query, has light memory usage, and scales well to large collections. To solve this problem, we describe how recent advancements in associative, order-preserving, k -mer dictionaries can be combined with a compressed inverted index to implement a fast and compact colored de Bruijn graph data structure. This index takes full advantage of the fact that unitigs in the colored de Bruijn graph are monochromatic (all k -mers in a unitig have the same set of references of origin, or "color"), leveraging the order-preserving property of its dictionary. In fact, k -mers are kept in unitig order by the dictionary, thereby allowing for the encoding of the map from k -mers to their inverted lists in as little as 1 + o (1) bits per unitig. Hence, one inverted list per unitig is stored in the index with almost no space/time overhead. By combining this property with simple but effective compression methods for inverted lists, the index achieves very small space. We implement these methods in a tool called Fulgor. Compared to Themisto, the prior state of the art, Fulgor indexes a heterogeneous collection of 30,691 bacterial genomes in 3.8 × less space, a collection of 150,000 Salmonella enterica genomes in approximately 2 × less space, and is at least twice as fast for color queries.
Applied computing → Bioinformatics.}, }
@article {pmid37214799, year = {2023}, author = {Ferrero-Serrano, Á and Chakravorty, D and Kirven, KJ and Assmann, SM}, title = {Oryza CLIMtools: An Online Portal for Investigating Genome-Environment Associations in Rice.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.05.10.540241}, pmid = {37214799}, abstract = {Elite crop varieties display an evident mismatch between their current distributions and the suitability of the local climate for their productivity. To this end, we present Oryza CLIMtools, (https://gramene.org/CLIMtools/oryza_v1.0/) the first pan-genome prediction of climate-associated genetic variants in a crop species. This resource consists of interactive web-based databases that allow the user to: i) explore the local environment and its interaction with natural existing genetic variation in local rice varieties (landraces) in South-Eastern Asia, and; ii) investigate the environment × genome associations for 658 Indica and 283 Japonica rice landrace accessions included in the 3K Rice Genomes Project and previously collected from their geo-referenced local environments. We exemplify the value of these resources, identifying an interplay between flowering time and temperature in the local environment that is facilitated by adaptive natural variation in OsHD2 and disrupted by maladaptive variation in OsSOC1 . Prior QTL analysis has suggested the importance of heterotrimeric G proteins in the control of agronomic traits. Accordingly, we analyzed the climate associations of the different heterotrimeric G protein subunits. We identified a coordinated role of G proteins in adaptation to the prevailing Potential Evapotranspiration gradient and their regulation of key agronomic traits including plant height, seed, and panicle length. We conclude by highlighting the prospect of targeting heterotrimeric G proteins to produce crops that are climate-change-ready.}, }
@article {pmid37213867, year = {2023}, author = {Zachariasen, T and Petersen, AØ and Brejnrod, A and Vestergaard, GA and Eklund, A and Nielsen, HB}, title = {Identification of representative species-specific genes for abundance measurements.}, journal = {Bioinformatics advances}, volume = {3}, number = {1}, pages = {vbad060}, pmid = {37213867}, issn = {2635-0041}, abstract = {MOTIVATION: Metagenomic binning facilitates the reconstruction of genomes and identification of Metagenomic Species Pan-genomes or Metagenomic Assembled Genomes. We propose a method for identifying a set of de novo representative genes, termed signature genes, which can be used to measure the relative abundance and used as markers of each metagenomic species with high accuracy.
RESULTS: An initial set of the 100 genes that correlate with the median gene abundance profile of the entity is selected. A variant of the coupon collector's problem was utilized to evaluate the probability of identifying a certain number of unique genes in a sample. This allows us to reject the abundance measurements of strains exhibiting a significantly skewed gene representation. A rank-based negative binomial model is employed to assess the performance of different gene sets across a large set of samples, facilitating identification of an optimal signature gene set for the entity. When benchmarked the method on a synthetic gene catalog, our optimized signature gene sets estimate relative abundance significantly closer to the true relative abundance compared to the starting gene sets extracted from the metagenomic species. The method was able to replicate results from a study with real data and identify around three times as many metagenomic entities.
The code used for the analysis is available on GitHub: https://github.com/trinezac/SG_optimization.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics Advances online.}, }
@article {pmid37213168, year = {2023}, author = {Youngblom, MA and Shockey, AC and Callaghan, MM and Dillard, JP and Pepperell, CS}, title = {The Gonococcal Genetic Island defines distinct sub-populations of Neisseria gonorrhoeae.}, journal = {Microbial genomics}, volume = {9}, number = {5}, pages = {}, doi = {10.1099/mgen.0.000985}, pmid = {37213168}, issn = {2057-5858}, abstract = {The incidence of gonorrhoea is increasing at an alarming pace, and therapeutic options continue to narrow as a result of worsening drug resistance. Neisseria gonorrhoeae is naturally competent, allowing the organism to adapt rapidly to selection pressures including antibiotics. A sub-population of N. gonorrhoeae carries the Gonococcal Genetic Island (GGI), which encodes a type IV secretion system (T4SS) that secretes chromosomal DNA. Previous research has shown that the GGI increases transformation efficiency in vitro, but the extent to which it contributes to horizontal gene transfer (HGT) during infection is unknown. Here we analysed genomic data from clinical isolates of N. gonorrhoeae to better characterize GGI+ and GGI- sub-populations and to delineate patterns of variation at the locus itself. We found the element segregating at an intermediate frequency (61%), and it appears to act as a mobile genetic element with examples of gain, loss, exchange and intra-locus recombination within our sample. We further found evidence suggesting that GGI+ and GGI- sub-populations preferentially inhabit distinct niches with different opportunities for HGT. Previously, GGI+ isolates were reported to be associated with more severe clinical infections, and our results suggest this could be related to metal-ion trafficking and biofilm formation. The co-segregation of GGI+ and GGI- isolates despite mobility of the element suggests that both niches inhabited by N. gonorrhoeae remain important to its overall persistence as has been demonstrated previously for cervical- and urethral-adapted sub-populations. These data emphasize the complex population structure of N. gonorrhoeae and its capacity to adapt to diverse niches.}, }
@article {pmid37207930, year = {2023}, author = {Qanmber, G and You, Q and Yang, Z and Fang, L and Zhang, Z and Chai, M and Gao, B and Li, F and Yang, Z}, title = {Transcriptional and translational landscape fine-tune genome annotation and explores translation control in cotton.}, journal = {Journal of advanced research}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jare.2023.05.004}, pmid = {37207930}, issn = {2090-1224}, abstract = {INTRODUCTION: The unavailability of intergenic region annotation in whole genome sequencing and pan-genomics hinders efforts to enhance crop improvement.
OBJECTIVES: Despite advances in research, the impact of post-transcriptional regulation on fiber development and translatome profiling at different stages of fiber growth in cotton (G. hirsutum) remains unexplored.
METHODS: We utilized a combination of reference-guided de novo transcriptome assembly and ribosome profiling techniques to uncover the hidden mechanisms of translational control in eight distinct tissues of upland cotton.
RESULTS: Our study identified P-site distribution at three-nucleotide periodicity and dominant ribosome footprint at 27 nucleotides. Specifically, we have detected 1,589 small open reading frames (sORFs), including 1,376 upstream ORFs (uORFs) and 213 downstream ORFs (dORFs), as well as 552 long non-coding RNAs (lncRNAs) with potential coding functions, which fine-tune the annotation of the cotton genome. Further, we have identified novel genes and lncRNAs with strong translation efficiency (TE), while sORFs were found to affect mRNA transcription levels during fiber elongation. The reliability of these findings was confirmed by the high consistency in correlation and synergetic fold change between RNA-sequencing (RNA-seq) and Ribosome-sequencing (Ribo-seq) analyses. Additionally, integrated omics analysis of the normal fiber ZM24 and short fiber pag1 cotton mutant revealed several differentially expressed genes (DEGs), and fiber-specific expressed (high/low) genes associated with sORFs (uORFs and dORFs). These findings were further supported by the overexpression and knockdown of GhKCS6, a gene associated with sORFs in cotton, and demonstrated the potential regulation of the mechanism governing fiber elongation on both the transcriptional and post-transcriptional levels.
CONCLUSION: Reference-guided transcriptome assembly and the identification of novel transcripts fine-tune the annotation of the cotton genome and predicted the landscape of fiber development. Our approach provided a high-throughput method, based on multi-omics, for discovering unannotated ORFs, hidden translational control, and complex regulatory mechanisms in crop plants.}, }
@article {pmid37202927, year = {2023}, author = {Zhang, B and Huang, H and Tibbs-Cortes, LE and Vanous, A and Zhang, Z and Sanguinet, K and Garland-Campbell, KA and Yu, J and Li, X}, title = {Streamline unsupervised machine learning to survey and graph indel-based haplotypes from pan-genomes.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2023.05.005}, pmid = {37202927}, issn = {1752-9867}, }
@article {pmid37202771, year = {2023}, author = {Ahmed, OY and Rossi, M and Gagie, T and Boucher, C and Langmead, B}, title = {SPUMONI 2: improved classification using a pangenome index of minimizer digests.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {122}, pmid = {37202771}, issn = {1474-760X}, support = {R01HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {Genomics analyses use large reference sequence collections, like pangenomes or taxonomic databases. SPUMONI 2 is an efficient tool for sequence classification of both short and long reads. It performs multi-class classification using a novel sampled document array. By incorporating minimizers, SPUMONI 2's index is 65 times smaller than minimap2's for a mock community pangenome. SPUMONI 2 achieves a speed improvement of 3-fold compared to SPUMONI and 15-fold compared to minimap2. We show SPUMONI 2 achieves an advantageous mix of accuracy and efficiency in practical scenarios such as adaptive sampling, contamination detection and multi-class metagenomics classification.}, }
@article {pmid37202587, year = {2023}, author = {Anbazhagan, S and Himani, KM and Karthikeyan, R and Prakasan, L and Dinesh, M and Nair, SS and Lalsiamthara, J and Abhishek, and Ramachandra, SG and Chaturvedi, VK and Chaudhuri, P and Thomas, P}, title = {Comparative genomics of Brucella abortus and Brucella melitensis unravels the gene sharing, virulence factors and SNP diversity among the standard, vaccine and field strains.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {37202587}, issn = {1618-1905}, abstract = {Brucella abortus and Brucella melitensis are the primary etiological agents of brucellosis in large and small ruminants, respectively. There are limited comparative genomic studies involving Brucella strains that explore the relatedness among both species. In this study, we involved strains (n=44) representing standard, vaccine and Indian field origin for pangenome, single nucleotide polymorphism (SNP) and phylogenetic analysis. Both species shared a common gene pool representing 2884 genes out of a total 3244 genes. SNP-based phylogenetic analysis indicated higher SNP diversity among B. melitensis (3824) strains in comparison to B. abortus (540) strains, and a clear demarcation was identified between standard/vaccine and field strains. The analysis for virulence genes revealed that virB3, virB7, ricA, virB5, ipx5, wbkC, wbkB, and acpXL genes were highly conserved in most of the Brucella strains. Interestingly, virB10 gene was found to have high variability among the B. abortus strains. The cgMLST analysis revealed distinct sequence types for the standard/vaccine and field strains. B. abortus strains from north-eastern India fall within similar sequence type differing from other strains. In conclusion, the analysis revealed a highly shared core genome among two Brucella species. SNP analysis revealed B. melitensis strains exhibit high diversity as compared to B. abortus strains. Strains with absence or high polymorphism of virulence genes can be exploited for the development of novel vaccine candidates effective against both B. abortus and B. melitensis.}, }
@article {pmid37196842, year = {2023}, author = {Tian, R and Xu, S and Li, P and Li, M and Liu, Y and Wang, K and Liu, G and Li, Y and Dai, L and Zhang, W}, title = {Characterization of G-type Clostridium perfringens bacteriophages and their disinfection effect on chicken meat.}, journal = {Anaerobe}, volume = {}, number = {}, pages = {102736}, doi = {10.1016/j.anaerobe.2023.102736}, pmid = {37196842}, issn = {1095-8274}, abstract = {OBJECTIVE: Clostridium perfringens is one of most important bacterial pathogens in the poultry industry and mainly causes necrotizing enteritis (NE). This pathogen and its toxins can cause foodborne diseases in humans through the food chain. In China, with the rise of antibiotic resistance and the banning of antibiotic growth promoters (AGPs) in poultry farming, food contamination and NE are becoming more prevalent. Bacteriophages are a viable technique to control C. perfringens as an alternative to antibiotics. We isolated Clostridium phage from the environment, providing a new method for the prevention of NE and C. perfringens contamination in meat.
METHODS: In this study, we selected C. perfringens strains from various regions and animal sources in China for phage isolation. The biological characteristics of Clostridium phage were studied in terms of host range, MOI, one-step curve, temperature and pH stability. We sequenced and annotated the genome of the Clostridium phage and performed phylogenetic and pangenomic analyses. Finally, we studied its antibacterial activity against bacterial culture and its disinfection effect against C. perfringens in meat.
RESULTS: A Clostridium phage, named ZWPH-P21 (P21), was isolated from chicken farm sewage in Jiangsu, China. P21 has been shown to specifically lyse C. perfringens type G. Further analysis of basic biological characteristics showed that P21 was stable under the conditions of pH 4-11 and temperature 4-60 °C, and the optimal multiple severity of infection (MOI) was 0.1. In addition, P21 could form a "halo" on agar plates, suggesting that the phage may encode depolymerase. Genome sequence analysis showed that P21 was the most closely related to Clostridium phage CPAS-15 belonging to the Myoviridae family, with a recognition rate of 97.24% and a query coverage rate of 98%. No virulence factors or drug resistance genes were found in P21. P21 showed promising antibacterial activity in vitro and in chicken disinfection experiments. In conclusion, P21 has the potential to be used for preventing and controlling C. perfringens in chicken food production.}, }
@article {pmid37195730, year = {2023}, author = {Tanwar, AS and Shruptha, P and Jnana, A and Brand, A and Ballal, M and Satyamoorthy, K and Murali, TS}, title = {Emerging Pathogens in Planetary Health and Lessons from Comparative Genome Analyses of Three Clostridia Species.}, journal = {Omics : a journal of integrative biology}, volume = {}, number = {}, pages = {}, doi = {10.1089/omi.2023.0034}, pmid = {37195730}, issn = {1557-8100}, abstract = {Clostridioides difficile (CD) is a major planetary health burden. A Gram-positive opportunistic pathogen, CD, colonizes the large intestine and is implicated in sepsis, pseudomembranous colitis, and colorectal cancer. C. difficile infection typically following antibiotic exposure results in dysbiosis of the gut microbiome, and is one of the leading causes of diarrhea in the elderly population. While several studies have focused on the toxigenic strains of CD, gut commensals such as Clostridium butyricum (CB) and Clostridium tertium (CT) could harbor toxin/virulence genes, and thus pose a threat to human health. In this study, we sequenced and characterized three isolates, namely, CT (MALS001), CB (MALS002), and CD (MALS003) for their antimicrobial, cytotoxic, antiproliferative, genomic, and proteomic profiles. Although in vitro cytotoxic and antiproliferative potential were observed predominantly in CD MALS003, genome analysis revealed pathogenic potential of CB MALS002 and CT MALS001. Pangenome analysis revealed the presence of several accessory genes typically involved in fitness, virulence, and resistance characteristics in the core genomes of sequenced strains. The presence of an array of virulence and antimicrobial resistance genes in CB MALS002 and CT MALS001 suggests their potential role as emerging pathogens with significant impact on planetary health.}, }
@article {pmid37195188, year = {2023}, author = {Murik, O and Zeevi, DA and Mann, T and Kashat, L and Assous, MV and Megged, O and Yagupsky, P}, title = {Whole-Genome Sequencing Reveals Differences among Kingella kingae Strains from Carriers and Patients with Invasive Infections.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0389522}, doi = {10.1128/spectrum.03895-22}, pmid = {37195188}, issn = {2165-0497}, abstract = {As a result of the increasing use of sensitive nucleic acid amplification tests, Kingella kingae is being recognized as a common pathogen of early childhood, causing medical conditions ranging from asymptomatic oropharyngeal colonization to bacteremia, osteoarthritis, and life-threatening endocarditis. However, the genomic determinants associated with the different clinical outcomes are unknown. Employing whole-genome sequencing, we studied 125 international K. kingae isolates derived from 23 healthy carriers and 102 patients with invasive infections, including bacteremia (n = 23), osteoarthritis (n = 61), and endocarditis (n = 18). We compared their genomic structures and contents to identify genomic determinants associated with the different clinical conditions. The mean genome size of the strains was 2,024,228 bp, and the pangenome comprised 4,026 predicted genes, of which 1,460 (36.3%) were core genes shared by >99% of the isolates. No single gene discriminated between carried and invasive strains; however, 43 genes were significantly more frequent in invasive isolates, compared to asymptomatically carried organisms, and a few showed a significant differential distribution among isolates from skeletal system infections, bacteremia, and endocarditis. The gene encoding the iron-regulated protein FrpC was uniformly absent in all 18 endocarditis-associated strains but was present in one-third of other invasive isolates. Similar to other members of the Neisseriaceae family, the K. kingae differences in invasiveness and tropism for specific body tissues appear to depend on combinations of multiple virulence-associated determinants that are widely distributed throughout the genome. The potential role of the absence of the FrpC protein in the pathogenesis of endocardial invasion deserves further investigation. IMPORTANCE The wide range of clinical severities exhibited by invasive Kingella kingae infections strongly suggests that isolates differ in their genomic contents, and strains associated with life-threatening endocarditis may harbor distinct genomic determinants that result in cardiac tropism and severe tissue damage. The results of the present study show that no single gene discriminated between asymptomatically carried isolates and invasive strains. However, 43 putative genes were significantly more frequent among invasive isolates than among pharyngeal colonizers. In addition, several genes displayed a significant differential distribution among isolates from bacteremia, skeletal system infections, and endocarditis, suggesting that the virulence and tissue tropism of K. kingae are multifactorial and polygenic, depending on changes in the allele content and genomic organization. Further analysis of these putative genes may identify genomic determinants of the invasiveness of K. kingae and its affinity for specific body tissues and potential targets for a future protective vaccine.}, }
@article {pmid37193328, year = {2023}, author = {Kalaivanan, NS and Ghoshal, T and Lakshmi, MA and Mondal, KK and Kulshreshtha, A and Singh, KBM and Thakur, JK and Supriya, P and Bhatnagar, S and Mani, C}, title = {Complete genome resource unravels the close relation of an Indian Xanthomonas oryzae pv. oryzae strain IXOBB0003 with Philippines strain causing bacterial blight of rice.}, journal = {3 Biotech}, volume = {13}, number = {6}, pages = {187}, doi = {10.1007/s13205-023-03596-x}, pmid = {37193328}, issn = {2190-572X}, abstract = {UNLABELLED: Xanthomonas oryzae pv. oryzae (Xoo) is a pathogen of concern for rice growers as it limits the production potential of rice varieties worldwide. Due to their high genomic plasticity, the pathogen continues to evolve, nullifying the deployed resistance mechanisms. It is pertinent to monitor the evolving Xoo population for the virulent novel stains, and the affordable sequencing technologies made the task feasible with an in-depth understanding of their pathogenesis arsenals. We present the complete genome of a highly virulent Indian Xoo strain IXOBB0003, predominantly found in northwestern parts of India, by employing next-generation sequencing and single-molecule sequencing in real-time technologies. The final genome assembly comprises 4,962,427 bp and has 63.96% GC content. The pan genome analysis reveals that strain IXOBB0003 houses total of 3655 core genes, 1276 accessory genes and 595 unique genes. Comparative analysis of the predicted gene clusters of coding sequences and protein count of strain IXOBB0003 depicts 3687 of almost 90% gene clusters shared by other Asian strains, 17 unique to IXOBB0003 and 139 CDSs of IXOBB0003 are shared with PXO99[A]. AnnoTALE-based studies revealed 16 TALEs conferred from the whole genome sequence. Prominent TALEs of our strain are found orthologous to TALEs of the Philippines strain PXO99[A]. The genomic features of Indian Xoo strain IXOBB0003 and in comparison with other Asian strains would certainly contribute significantly while formulating novel strategies for BB management.
SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-023-03596-x.}, }
@article {pmid37192177, year = {2023}, author = {Price, RJ and Davik, J and Fernandéz Fernandéz, F and Bates, HJ and Lynn, S and Nellist, CF and Buti, M and Røen, D and Šurbanovski, N and Alsheikh, M and Harrison, RJ and Sargent, DJ}, title = {Chromosome-scale genome sequence assemblies of the 'Autumn Bliss' and 'Malling Jewel' cultivars of the highly heterozygous red raspberry (Rubus idaeus L.) derived from long-read Oxford Nanopore sequence data.}, journal = {PloS one}, volume = {18}, number = {5}, pages = {e0285756}, doi = {10.1371/journal.pone.0285756}, pmid = {37192177}, issn = {1932-6203}, abstract = {Red raspberry (Rubus idaeus L.) is an economically valuable soft-fruit species with a relatively small (~300 Mb) but highly heterozygous diploid (2n = 2x = 14) genome. Chromosome-scale genome sequences are a vital tool in unravelling the genetic complexity controlling traits of interest in crop plants such as red raspberry, as well as for functional genomics, evolutionary studies, and pan-genomics diversity studies. In this study, we developed genome sequences of a primocane fruiting variety ('Autumn Bliss') and a floricane variety ('Malling Jewel'). The use of long-read Oxford Nanopore Technologies sequencing data yielded long read lengths that permitted well resolved genome sequences for the two cultivars to be assembled. The de novo assemblies of 'Malling Jewel' and 'Autumn Bliss' contained 79 and 136 contigs respectively, and 263.0 Mb of the 'Autumn Bliss' and 265.5 Mb of the 'Malling Jewel' assembly could be anchored unambiguously to a previously published red raspberry genome sequence of the cultivar 'Anitra'. Single copy ortholog analysis (BUSCO) revealed high levels of completeness in both genomes sequenced, with 97.4% of sequences identified in 'Autumn Bliss' and 97.7% in 'Malling Jewel'. The density of repetitive sequence contained in the 'Autumn Bliss' and 'Malling Jewel' assemblies was significantly higher than in the previously published assembly and centromeric and telomeric regions were identified in both assemblies. A total of 42,823 protein coding regions were identified in the 'Autumn Bliss' assembly, whilst 43,027 were identified in the 'Malling Jewel' assembly. These chromosome-scale genome sequences represent an excellent genomics resource for red raspberry, particularly around the highly repetitive centromeric and telomeric regions of the genome that are less complete in the previously published 'Anitra' genome sequence.}, }
@article {pmid37186225, year = {2023}, author = {Kuzmanović, N and diCenzo, GC and Bunk, B and Spröer, C and Frühling, A and Neumann-Schaal, M and Overmann, J and Smalla, K}, title = {Genomics of the "tumorigenes" clade of the family Rhizobiaceae and description of Rhizobium rhododendri sp. nov.}, journal = {MicrobiologyOpen}, volume = {12}, number = {2}, pages = {e1352}, doi = {10.1002/mbo3.1352}, pmid = {37186225}, issn = {2045-8827}, abstract = {Tumorigenic members of the family Rhizobiaceae, known as agrobacteria, are responsible for crown and cane gall diseases of various crops worldwide. Tumorigenic agrobacteria are commonly found in the genera Agrobacterium, Allorhizobium, and Rhizobium. In this study, we analyzed a distinct "tumorigenes" clade of the genus Rhizobium, which includes the tumorigenic species Rhizobium tumorigenes, as well as strains causing crown gall disease on rhododendron. Here, high-quality, closed genomes of representatives of the "tumorigenes" clade were generated, followed by comparative genomic and phylogenomic analyses. Additionally, the phenotypic characteristics of representatives of the "tumorigenes" clade were analyzed. Our results showed that the tumorigenic strains isolated from rhododendron represent a novel species of the genus Rhizobium for which the name Rhizobium rhododendri sp. nov. is proposed. This species also includes additional strains originating from blueberry and Himalayan blackberry in the United States, whose genome sequences were retrieved from GenBank. Both R. tumorigenes and R. rhododendri contain multipartite genomes, including a chromosome, putative chromids, and megaplasmids. Synteny and phylogenetic analyses indicated that a large putative chromid of R. rhododendri resulted from the cointegration of an ancestral megaplasmid and two putative chromids, following its divergence from R. tumorigenes. Moreover, gene clusters specific for both species of the "tumorigenes" clade were identified, and their biological functions and roles in the ecological diversification of R. rhododendri and R. tumorigenes were predicted and discussed.}, }
@article {pmid37180381, year = {2023}, author = {Pham, HH and Kim, DH and Nguyen, TL}, title = {Wide-genome selection of lactic acid bacteria harboring genes that promote the elimination of antinutritional factors.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1145041}, doi = {10.3389/fpls.2023.1145041}, pmid = {37180381}, issn = {1664-462X}, abstract = {Anti-nutritional factors (ANFs) substances in plant products, such as indigestible non-starchy polysaccharides (α-galactooligosaccharides, α-GOS), phytate, tannins, and alkaloids can impede the absorption of many critical nutrients and cause major physiological disorders. To enhance silage quality and its tolerance threshold for humans as well as other animals, ANFs must be reduced. This study aims to identify and compare the bacterial species/strains that are potential use for industrial fermentation and ANFs reduction. A pan-genome study of 351 bacterial genomes was performed, and binary data was processed to quantify the number of genes involved in the removal of ANFs. Among four pan-genomes analysis, all 37 tested Bacillus subtilis genomes had one phytate degradation gene, while 91 out of 150 Enterobacteriacae genomes harbor at least one genes (maximum three). Although, no gene encoding phytase detected in genomes of Lactobacillus and Pediococcus species, they have genes involving indirectly in metabolism of phytate-derivatives to produce Myo-inositol, an important compound in animal cells physiology. In contrast, genes related to production of lectin, tannase and saponin degrading enzyme did not include in genomes of B. subtilis and Pediococcus species. Our findings suggest a combination of bacterial species and/or unique strains in fermentation, for examples, two Lactobacillus strains (DSM 21115 and ATCC 14869) with B. subtilis SRCM103689, would maximize the efficiency in reducing the ANFs concentration. In conclusion, this study provides insights into bacterial genomes analysis for maximizing nutritional value in plant-based food. Further investigations of gene numbers and repertories correlated to metabolism of different ANFs will help clarifying the efficiency of time consuming and food qualities.}, }
@article {pmid37180261, year = {2023}, author = {Meng, X and Chen, F and Xiong, M and Hao, H and Wang, KJ}, title = {A new pathogenic isolate of Kocuria kristinae identified for the first time in the marine fish Larimichthys crocea.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1129568}, doi = {10.3389/fmicb.2023.1129568}, pmid = {37180261}, issn = {1664-302X}, abstract = {In recent years, new emerging pathogenic microorganisms have frequently appeared in animals, including marine fish, possibly due to climate change, anthropogenic activities, and even cross-species transmission of pathogenic microorganisms among animals or between animals and humans, which poses a serious issue for preventive medicine. In this study, a bacterium was clearly characterized among 64 isolates from the gills of diseased large yellow croaker Larimichthys crocea that were raised in marine aquaculture. This strain was identified as K. kristinae by biochemical tests with a VITEK 2.0 analysis system and 16S rRNA sequencing and named K. kristinae_LC. The potential genes that might encode virulence-factors were widely screened through sequence analysis of the whole genome of K. kristinae_LC. Many genes involved in the two-component system and drug-resistance were also annotated. In addition, 104 unique genes in K. kristinae_LC were identified by pan genome analysis with the genomes of this strain from five different origins (woodpecker, medical resource, environment, and marine sponge reef) and the analysis results demonstrated that their predicted functions might be associated with adaptation to living conditions such as higher salinity, complex marine biomes, and low temperature. A significant difference in genomic organization was found among the K. kristinae strains that might be related to their hosts living in different environments. The animal regression test for this new bacterial isolate was carried out using L. crocea, and the results showed that this bacterium could cause the death of L. crocea and that the fish mortality was dose-dependent within 5 days post infection, indicating the pathogenicity of K. kristinae_LC to marine fish. Since K. kristinae has been reported as a pathogen for humans and bovines, in our study, we revealed a new isolate of K. kristinae_LC from marine fish for the first time, suggesting the potentiality of cross-species transmission among animals or from marine animals to humans, from which we would gain insight to help in future public prevention strategies for new emerging pathogens.}, }
@article {pmid37175750, year = {2023}, author = {An, B and Cai, H and Li, B and Zhang, S and He, Y and Wang, R and Jiao, C and Guo, Y and Xu, L and Xu, Y}, title = {Molecular Evolution of Histone Methylation Modification Families in the Plant Kingdom and Their Genome-Wide Analysis in Barley.}, journal = {International journal of molecular sciences}, volume = {24}, number = {9}, pages = {}, doi = {10.3390/ijms24098043}, pmid = {37175750}, issn = {1422-0067}, abstract = {In this study, based on the OneKP database and through comparative genetic analysis, we found that HMT and HDM may originate from Chromista and are highly conserved in green plants, and that during the evolution from algae to land plants, histone methylation modifications gradually became complex and diverse, which is more conducive to the adaptation of plants to complex and variable environments. We also characterized the number of members, genetic similarity, and phylogeny of HMT and HDM families in barley using the barley pangenome and the Tibetan Lasa Goumang genome. The results showed that HMT and HDM were highly conserved in the domestication of barley, but there were some differences in the Lasa Goumang SDG subfamily. Expression analysis showed that HvHMTs and HvHDMs were highly expressed in specific tissues and had complex expression patterns under multiple stress treatments. In summary, the amplification and variation of HMT and HDM facilitate plant adaptation to complex terrestrial environments, while they are highly conserved in barley and play an important role in barley growth and development with abiotic stresses. In brief, our findings provide a novel perspective on the origin and evolutionary history of plant HvHMTs and HvHDMs, and lay a foundation for further investigation of their functions in barley.}, }
@article {pmid37173388, year = {2023}, author = {Abdella, B and Abozahra, NA and Shokrak, NM and Mohamed, RA and El-Helow, ER}, title = {Whole spectrum of Aeromonas hydrophila virulence determinants and the identification of novel SNPs using comparative pathogenomics.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {7712}, pmid = {37173388}, issn = {2045-2322}, abstract = {Aeromonas hydrophila is a ubiquitous fish pathogen and an opportunistic human pathogen. It is mostly found in aquatic habitats, but it has also been isolated from food and bottled mineral waters. It causes hemorrhagic septicemia, ulcerative disease, and motile Aeromonas septicemia (MAS) in fish and other aquatic animals. Moreover, it might cause gastroenteritis, wound infections, and septicemia in humans. Different variables influence A. hydrophila virulence, including the virulence genes expressed, host susceptibility, and environmental stresses. The identification of virulence factors for a bacterial pathogen will help in the development of preventive and control measures. 95 Aeromonas spp. genomes were examined in the current study, and 53 strains were determined to be valid A. hydrophila. These genomes were examined for pan- and core-genomes using a comparative genomics technique. A. hydrophila has an open pan-genome with 18,306 total genes and 1620 genes in its core-genome. In the pan-genome, 312 virulence genes have been detected. The effector delivery system category had the largest number of virulence genes (87), followed by immunological modulation and motility genes (69 and 46, respectively). This provides new insight into the pathogenicity of A. hydrophila. In the pan-genome, a few distinctive single-nucleotide polymorphisms (SNPs) have been identified in four genes, namely: D-glycero-beta-D-manno-heptose-1,7-bisphosphate 7-phosphatase, chemoreceptor glutamine deamidase, Spermidine N (1)-acetyltransferase, and maleylpyruvate isomerase, which are present in all A. hydrophila genomes, which make them molecular marker candidates for precise identification of A. hydrophila. Therefore, for precise diagnostic and discrimination results, we suggest these genes be considered when designing primers and probes for sequencing, multiplex-PCR, or real-time PCR.}, }
@article {pmid37173271, year = {2023}, author = {Raza, A and Bohra, A and Varshney, RK}, title = {Pan-genome for pearl millet that beats the heat.}, journal = {Trends in plant science}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tplants.2023.04.016}, pmid = {37173271}, issn = {1878-4372}, abstract = {A better understanding of crop genomes reveals that structural variations (SVs) are crucial for genetic improvement. A graph-based pan-genome by Yan et al. uncovered 424 085 genomic SVs and provided novel insights into heat tolerance of pearl millet. We discuss how these SVs can fast-track pearl millet breeding under harsh environments.}, }
@article {pmid37171844, year = {2023}, author = {Büchler, T and Olbrich, J and Ohlebusch, E}, title = {Efficient short read mapping to a pangenome that is represented by a graph of ED strings.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btad320}, pmid = {37171844}, issn = {1367-4811}, abstract = {MOTIVATION: A pangenome represents many diverse genome sequences of the same species. In order to cope with small variations as well as structural variations, recent research focused on the development of graph based models of pangenomes. Mapping is the process of finding the original location of a DNA read in a reference sequence, typically a genome. Using a pangenome instead of a (linear) reference genome can e.g. reduce mapping bias, the tendency to incorrectly map sequences that differ from the reference genome. Mapping reads to a graph, however, is more complex and needs more resources than mapping to a reference genome. Reducing the complexity of the graph by encoding simple variations like SNPs in a simple way can accelerate read mapping and reduce the memory requirements at the same time.
RESULTS: We introduce graphs based on elastic-degenerate strings (ED strings, EDS) and the linearised form of these EDS graphs as a new representation for pangenomes. In this representation, small variations are encoded directly in the sequence. Structural variations are encoded in a graph structure. This reduces the size of the representation in comparison to sequence graphs. In the linearised form, mapping techniques that are known from ordinary strings can be applied with appropriate adjustments. Since most variations are expressed directly in the sequence, the mapping process rarely has to take edges of the EDS graph into account. We developed a prototypical software tool GED-MAP that uses this representation together with a minimizer index to map short reads to the pangenome. Our experiments show that the new method works on a whole human genome scale, taking structural variants properly into account. The advantage of GED-MAP, compared with other pangenomic short read mappers, is that the new representation allows for a simple indexing method. This makes GED-MAP fast and memory efficient.
AVAILABILITY: Sources are available at: https://github.com/thomas-buechler-ulm/gedmap.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid37167256, year = {2023}, author = {Riborg, A and Gulla, S and Fiskebeck, EZ and Ryder, D and Verner-Jeffreys, DW and Colquhoun, DJ and Welch, TJ}, title = {Pan-genome survey of the fish pathogen Yersinia ruckeri links accessory- and amplified genes to virulence.}, journal = {PloS one}, volume = {18}, number = {5}, pages = {e0285257}, doi = {10.1371/journal.pone.0285257}, pmid = {37167256}, issn = {1932-6203}, abstract = {While both virulent and putatively avirulent Yersinia ruckeri strains exist in aquaculture environments, the relationship between the distribution of virulence-associated factors and de facto pathogenicity in fish remains poorly understood. Pan-genome analysis of 18 complete genomes, representing established virulent and putatively avirulent lineages of Y. ruckeri, revealed the presence of a number of accessory genetic determinants. Further investigation of 68 draft genome assemblies revealed that the distribution of certain putative virulence factors correlated well with virulence and host-specificity. The inverse-autotransporter invasin locus yrIlm was, however, the only gene present in all virulent strains, while absent in lineages regarded as avirulent. Strains known to be associated with significant mortalities in salmonid aquaculture display a combination of serotype O1-LPS and yrIlm, with the well-documented highly virulent lineages, represented by MLVA clonal complexes 1 and 2, displaying duplication of the yrIlm locus. Duplication of the yrIlm locus was further found to have evolved over time in clonal complex 1, where some modern, highly virulent isolates display up to three copies.}, }
@article {pmid37164484, year = {2023}, author = {Porubsky, D and Vollger, MR and Harvey, WT and Rozanski, AN and Ebert, P and Hickey, G and Hasenfeld, P and Sanders, AD and Stober, C and , and Korbel, JO and Paten, B and Marschall, T and Eichler, EE}, title = {Gaps and complex structurally variant loci in phased genome assemblies.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277334.122}, pmid = {37164484}, issn = {1549-5469}, abstract = {There has been tremendous progress in phased genome assembly production by combining long-read data with parental information or linked-read data. Nevertheless, a typical phased genome assembly generated by trio-hifiasm still generates more than 140 gaps. We perform a detailed analysis of gaps, assembly breaks, and misorientations from 182 haploid assemblies obtained from a diversity panel of 77 unique human samples. Although trio-based approaches using HiFi are the current gold standard, chromosome-wide phasing accuracy is comparable when using Strand-seq instead of parental data. Importantly, the majority of assembly gaps cluster near the largest and most identical repeats (including segmental duplications [35.4%], satellite DNA [22.3%], or regions enriched in GA/AT-rich DNA [27.4%]). Consequently, 1513 protein-coding genes overlap assembly gaps in at least one haplotype, and 231 are recurrently disrupted or missing from five or more haplotypes. Furthermore, we estimate that 6-7 Mbp of DNA are misorientated per haplotype irrespective of whether trio-free or trio-based approaches are used. Of these misorientations, 81% correspond to bona fide large inversion polymorphisms in the human species, most of which are flanked by large segmental duplications. We also identify large-scale alignment discontinuities consistent with 11.9 Mbp of deletions and 161.4 Mbp of insertions per haploid genome. Although 99% of this variation corresponds to satellite DNA, we identify 230 regions of euchromatic DNA with frequent expansions and contractions, nearly half of which overlap with 197 protein-coding genes. Such variable and incompletely assembled regions are important targets for future algorithmic development and pangenome representation.}, }
@article {pmid37165242, year = {2023}, author = {Liao, WW and Asri, M and Ebler, J and Doerr, D and Haukness, M and Hickey, G and Lu, S and Lucas, JK and Monlong, J and Abel, HJ and Buonaiuto, S and Chang, XH and Cheng, H and Chu, J and Colonna, V and Eizenga, JM and Feng, X and Fischer, C and Fulton, RS and Garg, S and Groza, C and Guarracino, A and Harvey, WT and Heumos, S and Howe, K and Jain, M and Lu, TY and Markello, C and Martin, FJ and Mitchell, MW and Munson, KM and Mwaniki, MN and Novak, AM and Olsen, HE and Pesout, T and Porubsky, D and Prins, P and Sibbesen, JA and Sirén, J and Tomlinson, C and Villani, F and Vollger, MR and Antonacci-Fulton, LL and Baid, G and Baker, CA and Belyaeva, A and Billis, K and Carroll, A and Chang, PC and Cody, S and Cook, DE and Cook-Deegan, RM and Cornejo, OE and Diekhans, M and Ebert, P and Fairley, S and Fedrigo, O and Felsenfeld, AL and Formenti, G and Frankish, A and Gao, Y and Garrison, NA and Giron, CG and Green, RE and Haggerty, L and Hoekzema, K and Hourlier, T and Ji, HP and Kenny, EE and Koenig, BA and Kolesnikov, A and Korbel, JO and Kordosky, J and Koren, S and Lee, H and Lewis, AP and Magalhães, H and Marco-Sola, S and Marijon, P and McCartney, A and McDaniel, J and Mountcastle, J and Nattestad, M and Nurk, S and Olson, ND and Popejoy, AB and Puiu, D and Rautiainen, M and Regier, AA and Rhie, A and Sacco, S and Sanders, AD and Schneider, VA and Schultz, BI and Shafin, K and Smith, MW and Sofia, HJ and Abou Tayoun, AN and Thibaud-Nissen, F and Tricomi, FF and Wagner, J and Walenz, B and Wood, JMD and Zimin, AV and Bourque, G and Chaisson, MJP and Flicek, P and Phillippy, AM and Zook, JM and Eichler, EE and Haussler, D and Wang, T and Jarvis, ED and Miga, KH and Garrison, E and Marschall, T and Hall, IM and Li, H and Paten, B}, title = {A draft human pangenome reference.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {312-324}, pmid = {37165242}, issn = {1476-4687}, abstract = {Here the Human Pangenome Reference Consortium presents a first draft of the human pangenome reference. The pangenome contains 47 phased, diploid assemblies from a cohort of genetically diverse individuals[1]. These assemblies cover more than 99% of the expected sequence in each genome and are more than 99% accurate at the structural and base pair levels. Based on alignments of the assemblies, we generate a draft pangenome that captures known variants and haplotypes and reveals new alleles at structurally complex loci. We also add 119 million base pairs of euchromatic polymorphic sequences and 1,115 gene duplications relative to the existing reference GRCh38. Roughly 90 million of the additional base pairs are derived from structural variation. Using our draft pangenome to analyse short-read data reduced small variant discovery errors by 34% and increased the number of structural variants detected per haplotype by 104% compared with GRCh38-based workflows, which enabled the typing of the vast majority of structural variant alleles per sample.}, }
@article {pmid37165241, year = {2023}, author = {Guarracino, A and Buonaiuto, S and de Lima, LG and Potapova, T and Rhie, A and Koren, S and Rubinstein, B and Fischer, C and , and Gerton, JL and Phillippy, AM and Colonna, V and Garrison, E}, title = {Recombination between heterologous human acrocentric chromosomes.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {335-343}, pmid = {37165241}, issn = {1476-4687}, abstract = {The short arms of the human acrocentric chromosomes 13, 14, 15, 21 and 22 (SAACs) share large homologous regions, including ribosomal DNA repeats and extended segmental duplications[1,2]. Although the resolution of these regions in the first complete assembly of a human genome-the Telomere-to-Telomere Consortium's CHM13 assembly (T2T-CHM13)-provided a model of their homology[3], it remained unclear whether these patterns were ancestral or maintained by ongoing recombination exchange. Here we show that acrocentric chromosomes contain pseudo-homologous regions (PHRs) indicative of recombination between non-homologous sequences. Utilizing an all-to-all comparison of the human pangenome from the Human Pangenome Reference Consortium[4] (HPRC), we find that contigs from all of the SAACs form a community. A variation graph[5] constructed from centromere-spanning acrocentric contigs indicates the presence of regions in which most contigs appear nearly identical between heterologous acrocentric chromosomes in T2T-CHM13. Except on chromosome 15, we observe faster decay of linkage disequilibrium in the pseudo-homologous regions than in the corresponding short and long arms, indicating higher rates of recombination[6,7]. The pseudo-homologous regions include sequences that have previously been shown to lie at the breakpoint of Robertsonian translocations[8], and their arrangement is compatible with crossover in inverted duplications on chromosomes 13, 14 and 21. The ubiquity of signals of recombination between heterologous acrocentric chromosomes seen in the HPRC draft pangenome suggests that these shared sequences form the basis for recurrent Robertsonian translocations, providing sequence and population-based confirmation of hypotheses first developed from cytogenetic studies 50 years ago[9].}, }
@article {pmid37165237, year = {2023}, author = {Vollger, MR and Dishuck, PC and Harvey, WT and DeWitt, WS and Guitart, X and Goldberg, ME and Rozanski, AN and Lucas, J and Asri, M and , and Munson, KM and Lewis, AP and Hoekzema, K and Logsdon, GA and Porubsky, D and Paten, B and Harris, K and Hsieh, P and Eichler, EE}, title = {Increased mutation and gene conversion within human segmental duplications.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {325-334}, pmid = {37165237}, issn = {1476-4687}, abstract = {Single-nucleotide variants (SNVs) in segmental duplications (SDs) have not been systematically assessed because of the limitations of mapping short-read sequencing data[1,2]. Here we constructed 1:1 unambiguous alignments spanning high-identity SDs across 102 human haplotypes and compared the pattern of SNVs between unique and duplicated regions[3,4]. We find that human SNVs are elevated 60% in SDs compared to unique regions and estimate that at least 23% of this increase is due to interlocus gene conversion (IGC) with up to 4.3 megabase pairs of SD sequence converted on average per human haplotype. We develop a genome-wide map of IGC donors and acceptors, including 498 acceptor and 454 donor hotspots affecting the exons of about 800 protein-coding genes. These include 171 genes that have 'relocated' on average 1.61 megabase pairs in a subset of human haplotypes. Using a coalescent framework, we show that SD regions are slightly evolutionarily older when compared to unique sequences, probably owing to IGC. SNVs in SDs, however, show a distinct mutational spectrum: a 27.1% increase in transversions that convert cytosine to guanine or the reverse across all triplet contexts and a 7.6% reduction in the frequency of CpG-associated mutations when compared to unique DNA. We reason that these distinct mutational properties help to maintain an overall higher GC content of SD DNA compared to that of unique DNA, probably driven by GC-biased conversion between paralogous sequences[5,6].}, }
@article {pmid37165235, year = {2023}, author = {Massarat, A and Gymrek, M and McStay, B and Jónsson, H}, title = {Human pangenome supports analysis of complex genomic regions.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {256-258}, pmid = {37165235}, issn = {1476-4687}, }
@article {pmid37165229, year = {2023}, author = {Liverpool, L}, title = {First human 'pangenome' aims to catalogue genetic diversity.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {37165229}, issn = {1476-4687}, }
@article {pmid37165225, year = {2023}, author = {Petrić Howe, N and Bundell, S}, title = {'Pangenome' aims to capture the breadth of human diversity.}, journal = {Nature}, volume = {}, number = {}, pages = {}, doi = {10.1038/d41586-023-01579-9}, pmid = {37165225}, issn = {1476-4687}, }
@article {pmid37165083, year = {2023}, author = {Hickey, G and Monlong, J and Ebler, J and Novak, AM and Eizenga, JM and Gao, Y and , and Marschall, T and Li, H and Paten, B}, title = {Pangenome graph construction from genome alignments with Minigraph-Cactus.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37165083}, issn = {1546-1696}, abstract = {Pangenome references address biases of reference genomes by storing a representative set of diverse haplotypes and their alignment, usually as a graph. Alternate alleles determined by variant callers can be used to construct pangenome graphs, but advances in long-read sequencing are leading to widely available, high-quality phased assemblies. Constructing a pangenome graph directly from assemblies, as opposed to variant calls, leverages the graph's ability to represent variation at different scales. Here we present the Minigraph-Cactus pangenome pipeline, which creates pangenomes directly from whole-genome alignments, and demonstrate its ability to scale to 90 human haplotypes from the Human Pangenome Reference Consortium. The method builds graphs containing all forms of genetic variation while allowing use of current mapping and genotyping tools. We measure the effect of the quality and completeness of reference genomes used for analysis within the pangenomes and show that using the CHM13 reference from the Telomere-to-Telomere Consortium improves the accuracy of our methods. We also demonstrate construction of a Drosophila melanogaster pangenome.}, }
@article {pmid37154680, year = {2023}, author = {Castillo, AI and Almeida, RPP}, title = {The Multifaceted Role of Homologous Recombination in a Fastidious Bacterial Plant Pathogen.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0043923}, doi = {10.1128/aem.00439-23}, pmid = {37154680}, issn = {1098-5336}, abstract = {Homologous recombination plays a key function in the evolution of bacterial genomes. Within Xylella fastidiosa, an emerging plant pathogen with increasing host and geographic ranges, it has been suggested that homologous recombination facilitates host switching, speciation, and the development of virulence. We used 340 whole-genome sequences to study the relationship between inter- and intrasubspecific homologous recombination, random mutation, and natural selection across individual X. fastidiosa genes. Individual gene orthologs were identified and aligned, and a maximum likelihood (ML) gene tree was generated. Each gene alignment and tree pair were then used to calculate gene-wide and branch-specific r/m values (relative effect of recombination to mutation), gene-wide and branch-site nonsynonymous over synonymous substitution rates (dN/dS values; episodic selection), and branch length (as a proxy for mutation rate). The relationships between these variables were evaluated at the global level (i.e., for all genes among and within a subspecies), among specific functional classes (i.e., COGs), and between pangenome components (i.e., accessory versus core genes). Our analysis showed that r/m varied widely among genes as well as across X. fastidiosa subspecies. While r/m and dN/dS values were positively correlated in some instances (e.g., core genes in X. fastidiosa subsp. fastidiosa and both core and accessory genes in X. fastidiosa subsp. multiplex), low correlation coefficients suggested no clear biological significance. Overall, our results indicate that, in addition to its adaptive role in certain genes, homologous recombination acts as a homogenizing and a neutral force across phylogenetic clades, gene functional groups, and pangenome components. IMPORTANCE There is ample evidence that homologous recombination occurs frequently in the economically important plant pathogen Xylella fastidiosa. Homologous recombination has been known to occur among sympatric subspecies and is associated with host-switching events and virulence-linked genes. As a consequence, is it generally assumed that recombinant events in X. fastidiosa are adaptive. This mindset influences expectations of how homologous recombination acts as an evolutionary force as well as how management strategies for X. fastidiosa diseases are determined. Yet, homologous recombination plays roles beyond that of a source for diversification and adaptation. Homologous recombination can act as a DNA repair mechanism, as a means to facilitate nucleotide compositional change, as a homogenization mechanism within populations, or even as a neutral force. Here, we provide a first assessment of long-held beliefs regarding the general role of recombination in adaptation for X. fastidiosa. We evaluate gene-specific variations in homologous recombination rate across three X. fastidiosa subspecies and its relationship to other evolutionary forces (e.g., natural selection, mutation, etc.). These data were used to assess the role of homologous recombination in X. fastidiosa evolution.}, }
@article {pmid37153161, year = {2023}, author = {Saroha, T and Patil, PP and Rana, R and Kumar, R and Kumar, S and Singhal, L and Gautam, V and Patil, PB}, title = {Genomic features, antimicrobial susceptibility, and epidemiological insights into Burkholderia cenocepacia clonal complex 31 isolates from bloodstream infections in India.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1151594}, pmid = {37153161}, issn = {2235-2988}, abstract = {INTRODUCTION: Burkholderia cepacia complex (Bcc) clonal complex (CC) 31, the predominant lineage causing devastating outbreaks globally, has been a growing concern of infections in non-cystic fibrosis (NCF) patients in India. B. cenocepacia is very challenging to treat owing to its virulence determinants and antibiotic resistance. Improving the management of these infections requires a better knowledge of their resistance patterns and mechanisms.
METHODS: Whole-genome sequences of 35 CC31 isolates obtained from patient samples, were analyzed against available 210 CC31 genomes in the NCBI database to glean details of resistance, virulence, mobile elements, and phylogenetic markers to study genomic diversity and evolution of CC31 lineage in India.
RESULTS: Genomic analysis revealed that 35 isolates belonging to CC31 were categorized into 11 sequence types (ST), of which five STs were reported exclusively from India. Phylogenetic analysis classified 245 CC31 isolates into eight distinct clades (I-VIII) and unveiled that NCF isolates are evolving independently from the global cystic fibrosis (CF) isolates forming a distinct clade. The detection rate of seven classes of antibiotic-related genes in 35 isolates was 35 (100%) for tetracyclines, aminoglycosides, and fluoroquinolones; 26 (74.2%) for sulphonamides and phenicols; 7 (20%) for beta-lactamases; and 1 (2.8%) for trimethoprim resistance genes. Additionally, 3 (8.5%) NCF isolates were resistant to disinfecting agents and antiseptics. Antimicrobial susceptibility testing revealed that majority of NCF isolates were resistant to chloramphenicol (77%) and levofloxacin (34%). NCF isolates have a comparable number of virulence genes to CF isolates. A well-studied pathogenicity island of B. cenocepacia, GI11 is present in ST628 and ST709 isolates from the Indian Bcc population. In contrast, genomic island GI15 (highly similar to the island found in B. pseudomallei strain EY1) is exclusively reported in ST839 and ST824 isolates from two different locations in India. Horizontal acquisition of lytic phage ST79 of pathogenic B. pseudomallei is demonstrated in ST628 isolates Bcc1463, Bcc29163, and BccR4654 amongst CC31 lineage.
DISCUSSION: The study reveals a high diversity of CC31 lineages among B. cenocepacia isolates from India. The extensive information from this study will facilitate the development of rapid diagnostic and novel therapeutic approaches to manage B. cenocepacia infections.}, }
@article {pmid37152722, year = {2023}, author = {Aziz, T and Naveed, M and Jabeen, K and Shabbir, MA and Sarwar, A and Zhennai, Y and Alharbi, M and Alshammari, A and Alasmari, AF}, title = {Integrated genome based evaluation of safety and probiotic characteristics of Lactiplantibacillus plantarum YW11 isolated from Tibetan kefir.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1157615}, pmid = {37152722}, issn = {1664-302X}, abstract = {The comparative genomic analysis of Lactiplantibacillus plantarum YW11 (L. plantarum YW11) isolated from Tibetan kefir involves comparison of the complete genome sequences of the isolated strain with other closely related L. plantarum strains. This type of analysis can be used to identify the genetic diversity among strains and to explore the genetic characteristics of the YW11 strain. The genome of L. plantarum YW11 was found to be composed of a circular single chromosome of 4,597,470 bp with a G + C content of 43.2%. A total of 4,278 open reading frames (ORFs) were identified in the genome and the coding density was found to be 87.8%. A comparative genomic analysis was conducted using two other L. plantarum strains, L. plantarum C11 and L. plantarum LMG21703. Genomic comparison revealed that L. plantarum YW11 shared 72.7 and 75.2% of gene content with L. plantarum C11 and L. plantarum LMG21703, respectively. Most of the genes shared between the three L. plantarum strains were involved in carbohydrate metabolism, energy production and conversion, amino acid metabolism, and transcription. In this analysis, 10 previously sequenced entire genomes of the species were compared using an in-silico technique to discover genomic divergence in genes linked with carbohydrate intake and their potential adaptations to distinct human intestinal environments. The subspecies pan-genome was open, which correlated with its extraordinary capacity to colonize several environments. Phylogenetic analysis revealed that the novel genomes were homogenously grouped among subspecies of l Lactiplantibacillus. L. plantarum was resistant to cefoxitin, erythromycin, and metronidazole, inhibited pathogens including Listeria monocytogenes, Clostridium difficile, Vibrio cholera, and others, and had excellent aerotolerance, which is useful for industrial operations. The comparative genomic analysis of L. plantarum YW11 isolated from Tibetan kefir can provide insights into the genetic characteristics of the strain, which can be used to further understand its role in the production of kefir.}, }
@article {pmid37147657, year = {2023}, author = {Mun, T and Vaddadi, NSK and Langmead, B}, title = {Pangenomic genotyping with the marker array.}, journal = {Algorithms for molecular biology : AMB}, volume = {18}, number = {1}, pages = {2}, pmid = {37147657}, issn = {1748-7188}, support = {R01HG011392/HG/NHGRI NIH HHS/United States ; R35GM139602/GM/NIGMS NIH HHS/United States ; }, abstract = {We present a new method and software tool called rowbowt that applies a pangenome index to the problem of inferring genotypes from short-read sequencing data. The method uses a novel indexing structure called the marker array. Using the marker array, we can genotype variants with respect from large panels like the 1000 Genomes Project while reducing the reference bias that results when aligning to a single linear reference. rowbowt can infer accurate genotypes in less time and memory compared to existing graph-based methods. The method is implemented in the open source software tool rowbowt available at https://github.com/alshai/rowbowt .}, }
@article {pmid37144759, year = {2023}, author = {Basharat, Z and Meshal, A}, title = {Pan-genome mediated therapeutic target mining in Kingella kingae and inhibition assessment using traditional Chinese medicinal compounds: an informatics approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-14}, doi = {10.1080/07391102.2023.2208221}, pmid = {37144759}, issn = {1538-0254}, abstract = {Kingella kingae causes bacteremia, endocarditis, osteomyelitis, septic arthritis, meningitis, spondylodiscitis, and lower respiratory tract infections in pediatric patients. Usually it demonstrates disease after inflammation of mouth, lips or infections of the upper respiratory tract. To date, therapeutic targets in this bacterium remain unexplored. We have utilized a battery of bioinformatics tools to mine these targets in this study. Core genes were initially inferred from 55 genomes of K. kingae and 39 therapeutic targets were mined using an in-house pipeline. We selected aroG product (KDPG aldolase) involved in chorismate pathway, for inhibition analysis of this bacterium using lead-like metabolites from traditional Chinese medicinal plants. Pharmacophore generation was done using control ZINC36444158 (1,16-bis[(dihydroxyphosphinyl)oxy]hexadecane), followed by molecular docking of top hits from a library of 36,000 compounds. Top prioritized compounds were ZINC95914016, ZINC33833283 and ZINC95914219. ADME profiling and simulation of compound dosing (100 mg tablet) was done to infer compartmental pharmacokinetics in a population of 300 individuals in fasting state. PkCSM based toxicity analysis revealed the compounds ZINC95914016 and ZINC95914219 as safe and with almost similar bioavailability. However, ZINC95914016 takes less time to reach maximum concentration in the plasma and shows several optimal parameters compared to other leads. In light of obtained data, we recommend this compound for further testing and induction in experimental drug design pipeline.Communicated by Ramaswamy H. Sarma.}, }
@article {pmid37143156, year = {2023}, author = {Gong, Y and Li, Y and Liu, X and Ma, Y and Jiang, L}, title = {A review of the pangenome: how it affects our understanding of genomic variation, selection and breeding in domestic animals?.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {73}, pmid = {37143156}, issn = {1674-9782}, abstract = {As large-scale genomic studies have progressed, it has been revealed that a single reference genome pattern cannot represent genetic diversity at the species level. While domestic animals tend to have complex routes of origin and migration, suggesting a possible omission of some population-specific sequences in the current reference genome. Conversely, the pangenome is a collection of all DNA sequences of a species that contains sequences shared by all individuals (core genome) and is also able to display sequence information unique to each individual (variable genome). The progress of pangenome research in humans, plants and domestic animals has proved that the missing genetic components and the identification of large structural variants (SVs) can be explored through pangenomic studies. Many individual specific sequences have been shown to be related to biological adaptability, phenotype and important economic traits. The maturity of technologies and methods such as third-generation sequencing, Telomere-to-telomere genomes, graphic genomes, and reference-free assembly will further promote the development of pangenome. In the future, pangenome combined with long-read data and multi-omics will help to resolve large SVs and their relationship with the main economic traits of interest in domesticated animals, providing better insights into animal domestication, evolution and breeding. In this review, we mainly discuss how pangenome analysis reveals genetic variations in domestic animals (sheep, cattle, pigs, chickens) and their impacts on phenotypes and how this can contribute to the understanding of species diversity. Additionally, we also go through potential issues and the future perspectives of pangenome research in livestock and poultry.}, }
@article {pmid37138640, year = {2023}, author = {Sorouri, B and Rodriguez, CI and Gaut, BS and Allison, SD}, title = {Variation in Sphingomonas traits across habitats and phylogenetic clades.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1146165}, pmid = {37138640}, issn = {1664-302X}, abstract = {Whether microbes show habitat preferences is a fundamental question in microbial ecology. If different microbial lineages have distinct traits, those lineages may occur more frequently in habitats where their traits are advantageous. Sphingomonas is an ideal bacterial clade in which to investigate how habitat preference relates to traits because these bacteria inhabit diverse environments and hosts. Here we downloaded 440 publicly available Sphingomonas genomes, assigned them to habitats based on isolation source, and examined their phylogenetic relationships. We sought to address whether: (1) there is a relationship between Sphingomonas habitat and phylogeny, and (2) whether there is a phylogenetic correlation between key, genome-based traits and habitat preference. We hypothesized that Sphingomonas strains from similar habitats would cluster together in phylogenetic clades, and key traits that improve fitness in specific environments should correlate with habitat. Genome-based traits were categorized into the Y-A-S trait-based framework for high growth yield, resource acquisition, and stress tolerance. We selected 252 high quality genomes and constructed a phylogenetic tree with 12 well-defined clades based on an alignment of 404 core genes. Sphingomonas strains from the same habitat clustered together within the same clades, and strains within clades shared similar clusters of accessory genes. Additionally, key genome-based trait frequencies varied across habitats. We conclude that Sphingomonas gene content reflects habitat preference. This knowledge of how environment and host relate to phylogeny may also help with future functional predictions about Sphingomonas and facilitate applications in bioremediation.}, }
@article {pmid37138622, year = {2023}, author = {Zhou, Y and Jiang, D and Yao, X and Luo, Y and Yang, Z and Ren, M and Zhang, G and Yu, Y and Lu, A and Wang, Y}, title = {Pan-genome wide association study of Glaesserella parasuis highlights genes associated with virulence and biofilm formation.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1160433}, pmid = {37138622}, issn = {1664-302X}, abstract = {Glaesserella parasuis is a gram-negative bacterium that causes fibrotic polyserositis and arthritis in pig, significantly affecting the pig industry. The pan-genome of G. parasuis is open. As the number of genes increases, the core and accessory genomes may show more pronounced differences. The genes associated with virulence and biofilm formation are also still unclear due to the diversity of G. parasuis. Therefore, we have applied a pan-genome-wide association study (Pan-GWAS) to 121 strains G. parasuis. Our analysis revealed that the core genome consists of 1,133 genes associated with the cytoskeleton, virulence, and basic biological processes. The accessory genome is highly variable and is a major cause of genetic diversity in G. parasuis. Furthermore, two biologically important traits (virulence, biofilm formation) of G. parasuis were studied via pan-GWAS to search for genes associated with the traits. A total of 142 genes were associated with strong virulence traits. By affecting metabolic pathways and capturing the host nutrients, these genes are involved in signal pathways and virulence factors, which are beneficial for bacterial survival and biofilm formation. This research lays the foundation for further studies on virulence and biofilm formation and provides potential new drug and vaccine targets against G. parasuis.}, }
@article {pmid37138596, year = {2023}, author = {Zhao, Y and Wei, HM and Yuan, JL and Xu, L and Sun, JQ}, title = {A comprehensive genomic analysis provides insights on the high environmental adaptability of Acinetobacter strains.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1177951}, pmid = {37138596}, issn = {1664-302X}, abstract = {Acinetobacter is ubiquitous, and it has a high species diversity and a complex evolutionary pattern. To elucidate the mechanism of its high ability to adapt to various environment, 312 genomes of Acinetobacter strains were analyzed using the phylogenomic and comparative genomics methods. It was revealed that the Acinetobacter genus has an open pan-genome and strong genome plasticity. The pan-genome consists of 47,500 genes, with 818 shared by all the genomes of Acinetobacter, while 22,291 are unique genes. Although Acinetobacter strains do not have a complete glycolytic pathway to directly utilize glucose as carbon source, most of them harbored the n-alkane-degrading genes alkB/alkM (97.1% of tested strains) and almA (96.7% of tested strains), which were responsible for medium-and long-chain n-alkane terminal oxidation reaction, respectively. Most Acinetobacter strains also have catA (93.3% of tested strains) and benAB (92.0% of tested strains) genes that can degrade the aromatic compounds catechol and benzoic acid, respectively. These abilities enable the Acinetobacter strains to easily obtain carbon and energy sources from their environment for survival. The Acinetobacter strains can manage osmotic pressure by accumulating potassium and compatible solutes, including betaine, mannitol, trehalose, glutamic acid, and proline. They respond to oxidative stress by synthesizing superoxide dismutase, catalase, disulfide isomerase, and methionine sulfoxide reductase that repair the damage caused by reactive oxygen species. In addition, most Acinetobacter strains contain many efflux pump genes and resistance genes to manage antibiotic stress and can synthesize a variety of secondary metabolites, including arylpolyene, β-lactone and siderophores among others, to adapt to their environment. These genes enable Acinetobacter strains to survive extreme stresses. The genome of each Acinetobacter strain contained different numbers of prophages (0-12) and genomic islands (GIs) (6-70), and genes related to antibiotic resistance were found in the GIs. The phylogenetic analysis revealed that the alkM and almA genes have a similar evolutionary position with the core genome, indicating that they may have been acquired by vertical gene transfer from their ancestor, while catA, benA, benB and the antibiotic resistance genes could have been acquired by horizontal gene transfer from the other organisms.}, }
@article {pmid37138544, year = {2023}, author = {Oddy, J and Chhetry, M and Awal, R and Addy, J and Wilkinson, M and Smith, D and King, R and Hall, C and Testa, R and Murray, E and Raffan, S and Curtis, TY and Wingen, L and Griffiths, S and Berry, S and Elmore, JS and Cryer, N and Moreira de Almeida, I and Halford, NG}, title = {Genetic control of grain amino acid composition in a UK soft wheat mapping population.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20335}, doi = {10.1002/tpg2.20335}, pmid = {37138544}, issn = {1940-3372}, support = {BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T017007/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T50838X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Wheat (Triticum aestivum L.) is a major source of nutrients for populations across the globe, but the amino acid composition of wheat grain does not provide optimal nutrition. The nutritional value of wheat grain is limited by low concentrations of lysine (the most limiting essential amino acid) and high concentrations of free asparagine (precursor to the processing contaminant acrylamide). There are currently few available solutions for asparagine reduction and lysine biofortification through breeding. In this study, we investigated the genetic architecture controlling grain free amino acid composition and its relationship to other traits in a Robigus × Claire doubled haploid population. Multivariate analysis of amino acids and other traits showed that the two groups are largely independent of one another, with the largest effect on amino acids being from the environment. Linkage analysis of the population allowed identification of quantitative trait loci (QTL) controlling free amino acids and other traits, and this was compared against genomic prediction methods. Following identification of a QTL controlling free lysine content, wheat pangenome resources facilitated analysis of candidate genes in this region of the genome. These findings can be used to select appropriate strategies for lysine biofortification and free asparagine reduction in wheat breeding programs.}, }
@article {pmid37138543, year = {2023}, author = {Derbyshire, MC and Marsh, J and Tirnaz, S and Nguyen, HT and Batley, J and Bayer, PE and Edwards, D}, title = {Diversity of fatty acid biosynthesis genes across the soybean pangenome.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20334}, doi = {10.1002/tpg2.20334}, pmid = {37138543}, issn = {1940-3372}, abstract = {Soybean (Glycine max) is a major crop that contributes more than half of global oilseed production. Much research has been directed towards improvement of the fatty acid profile of soybean seeds through marker assisted breeding. Recently published soybean pangenomes, based on thousands of soybean lines, provide an opportunity to identify new alleles that may be involved in fatty acid biosynthesis. In this study, we identify fatty acid biosynthesis genes in soybean pangenomes based on sequence identity with known genes and examine their sequence diversity across diverse soybean collections. We find three possible instances of a gene missing in wild soybean, including FAD8 and FAD2-2D, which may be involved in oleic and linoleic acid desaturation, respectively, although we recommend follow-up research to verify the absence of these genes. More than half of the 53 fatty acid biosynthesis genes identified contained missense variants, including one linked with a previously identified QTL for oil quality. These variants were present in multiple studies based on either short read mappings or alignment of reference grade genomes. Missense variants were found in previously characterized genes including FAD2-1A and FAD2-1B, both of which are involved in desaturation of oleic acid, as well as uncharacterized candidate fatty acid biosynthesis genes. We find that the frequency of missense alleles in fatty acid biosynthesis genes has been reduced significantly more than the global average frequency of missense mutations during domestication, and missense variation in some genes is near absent in modern cultivars. This could be due to the selection for fatty acid profiles in seed, though future work should be conducted towards understanding the phenotypic impacts of these variants.}, }
@article {pmid37129508, year = {2023}, author = {Maki, JJ and Howard, M and Connelly, S and Pettengill, MA and Hardy, DJ and Cameron, A}, title = {Species Delineation and Comparative Genomics within the Campylobacter ureolyticus Complex.}, journal = {Journal of clinical microbiology}, volume = {}, number = {}, pages = {e0004623}, doi = {10.1128/jcm.00046-23}, pmid = {37129508}, issn = {1098-660X}, abstract = {Campylobacter ureolyticus is an emerging pathogen increasingly appreciated as a common cause of gastroenteritis and extra-intestinal infections in humans. Outside the setting of gastroenteritis, little work has been done to describe the genomic content and relatedness of the species, especially regarding clinical isolates. We reviewed the epidemiology of clinical C. ureolyticus cultured by our institution over the past 10 years. Fifty-one unique C. ureolyticus isolates were identified between January 2010 and August 2022, mostly originating from abscesses and blood cultures. To clarify the taxonomic relationships between isolates and to attribute specific genes with different clinical manifestations, we sequenced 19 available isolates from a variety of clinical specimen types and conducted a pangenomic analysis with publicly available C. ureolyticus genomes. Digital DNA:DNA hybridization suggested that these C. ureolyticus comprised a species complex of 10 species clusters (SCs) and several subspecies clusters. Although some orthologous genes or gene functions were enriched in isolates found in different SCs and clinical specimens, no association was significant. Nearly a third of the isolates possessed antimicrobial resistance genes, including the ermA resistance gene, potentially conferring resistance to macrolides, the treatment of choice for severe human campylobacteriosis. This work effectively doubles the number of publicly available C. ureolyticus genomes, provides further clarification of taxonomic relationships within this bacterial complex, and identifies target SCs for future analysis.}, }
@article {pmid37127330, year = {2023}, author = {Weller, CA and Andreev, I and Chambers, MJ and Park, M and , and Bloom, JS and Sadhu, MJ}, title = {Highly complete long-read genomes reveal pangenomic variation underlying yeast phenotypic diversity.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277515.122}, pmid = {37127330}, issn = {1549-5469}, abstract = {Understanding the genetic causes of trait variation is a primary goal of genetic research. One way that individuals can vary genetically is through variable pangenomic genes - genes that are only present in some individuals in a population. The presence or absence of entire genes could have large effects on trait variation. However, variable pangenomic genes can be missed in standard genotyping workflows, due to reliance on aligning short-read sequencing to reference genomes. A popular method for studying the genetic basis of trait variation is linkage mapping, which identifies quantitative trait loci (QTLs), regions of the genome that harbor causative genetic variants. Large-scale linkage mapping in the budding yeast Saccharomyces cerevisiae has found thousands of QTLs affecting myriad yeast phenotypes. To enable the resolution of QTLs caused by variable pangenomic genes, we used long-read sequencing to generate highly complete de novo assemblies of 16 diverse yeast isolates. With these assemblies we resolved QTLs for growth on maltose, sucrose, raffinose, and oxidative stress to specific genes that are absent from the reference genome but present in the broader yeast population at appreciable frequency. Copies of genes also duplicate onto chromosomes where they are absent in the reference genome, and we found that these copies generate additional QTLs whose resolution requires pangenome characterization. Our findings demonstrate the need for highly complete genome assemblies to identify the genetic basis of trait variation.}, }
@article {pmid37125195, year = {2023}, author = {Saxena, P and Rauniyar, S and Thakur, P and Singh, RN and Bomgni, A and Alaba, MO and Tripathi, AK and Gnimpieba, EZ and Lushbough, C and Sani, RK}, title = {Integration of text mining and biological network analysis: Identification of essential genes in sulfate-reducing bacteria.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1086021}, pmid = {37125195}, issn = {1664-302X}, abstract = {The growth and survival of an organism in a particular environment is highly depends on the certain indispensable genes, termed as essential genes. Sulfate-reducing bacteria (SRB) are obligate anaerobes which thrives on sulfate reduction for its energy requirements. The present study used Oleidesulfovibrio alaskensis G20 (OA G20) as a model SRB to categorize the essential genes based on their key metabolic pathways. Herein, we reported a feedback loop framework for gene of interest discovery, from bio-problem to gene set of interest, leveraging expert annotation with computational prediction. Defined bio-problem was applied to retrieve the genes of SRB from literature databases (PubMed, and PubMed Central) and annotated them to the genome of OA G20. Retrieved gene list was further used to enrich protein-protein interaction and was corroborated to the pangenome analysis, to categorize the enriched gene sets and the respective pathways under essential and non-essential. Interestingly, the sat gene (dde_2265) from the sulfur metabolism was the bridging gene between all the enriched pathways. Gene clusters involved in essential pathways were linked with the genes from seleno-compound metabolism, amino acid metabolism, secondary metabolite synthesis, and cofactor biosynthesis. Furthermore, pangenome analysis demonstrated the gene distribution, where 69.83% of the 116 enriched genes were mapped under "persistent," inferring the essentiality of these genes. Likewise, 21.55% of the enriched genes, which involves specially the formate dehydrogenases and metallic hydrogenases, appeared under "shell." Our methodology suggested that semi-automated text mining and network analysis may play a crucial role in deciphering the previously unexplored genes and key mechanisms which can help to generate a baseline prior to perform any experimental studies.}, }
@article {pmid37122002, year = {2023}, author = {Porubsky, D and Harvey, WT and Rozanski, AN and Ebler, J and Höps, W and Ashraf, H and Hasenfeld, P and , and , and Paten, B and Sanders, AD and Marschall, T and Korbel, JO and Eichler, EE}, title = {Inversion polymorphism in a complete human genome assembly.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {100}, pmid = {37122002}, issn = {1474-760X}, abstract = {The telomere-to-telomere (T2T) complete human reference has significantly improved our ability to characterize genome structural variation. To understand its impact on inversion polymorphisms, we remapped data from 41 genomes against the T2T reference genome and compared it to the GRCh38 reference. We find a ~ 21% increase in sensitivity improving mapping of 63 inversions on the T2T reference. We identify 26 misorientations within GRCh38 and show that the T2T reference is three times more likely to represent the correct orientation of the major human allele. Analysis of 10 additional samples reveals novel rare inversions at chromosomes 15q25.2, 16p11.2, 16q22.1-23.1, and 22q11.21.}, }
@article {pmid37115804, year = {2023}, author = {Jacob, JJ and Pragasam, AK and Vasudevan, K and Velmurugan, A and Priya Teekaraman, M and Priya Thirumoorthy, T and Ray, P and Gupta, M and Kapil, A and Bai, SP and Nagaraj, S and Saigal, K and Chandola, TR and Thomas, M and Bavdekar, A and Ebenezer, SE and Shastri, J and De, A and Dutta, S and Alexander, AP and Koshy, RM and Jinka, DR and Singh, A and Srivastava, SK and Anandan, S and Dougan, G and John, J and Kang, G and Veeraraghavan, B and Mutreja, A}, title = {Genomic analysis unveils genome degradation events and gene flux in the emergence and persistence of S. Paratyphi A lineages.}, journal = {PLoS pathogens}, volume = {19}, number = {4}, pages = {e1010650}, doi = {10.1371/journal.ppat.1010650}, pmid = {37115804}, issn = {1553-7374}, abstract = {Paratyphoid fever caused by S. Paratyphi A is endemic in parts of South Asia and Southeast Asia. The proportion of enteric fever cases caused by S. Paratyphi A has substantially increased, yet only limited data is available on the population structure and genetic diversity of this serovar. We examined the phylogenetic distribution and evolutionary trajectory of S. Paratyphi A isolates collected as part of the Indian enteric fever surveillance study "Surveillance of Enteric Fever in India (SEFI)." In the study period (2017-2020), S. Paratyphi A comprised 17.6% (441/2503) of total enteric fever cases in India, with the isolates highly susceptible to all the major antibiotics used for treatment except fluoroquinolones. Phylogenetic analysis clustered the global S. Paratyphi A collection into seven lineages (A-G), and the present study isolates were distributed in lineages A, C and F. Our analysis highlights that the genome degradation events and gene acquisitions or losses are key molecular events in the evolution of new S. Paratyphi A lineages/sub-lineages. A total of 10 hypothetically disrupted coding sequences (HDCS) or pseudogenes-forming mutations possibly associated with the emergence of lineages were identified. The pan-genome analysis identified the insertion of P2/PSP3 phage and acquisition of IncX1 plasmid during the selection in 2.3.2/2.3.3 and 1.2.2 genotypes, respectively. We have identified six characteristic missense mutations associated with lipopolysaccharide (LPS) biosynthesis genes of S. Paratyphi A, however, these mutations confer only a low structural impact and possibly have minimal impact on vaccine effectiveness. Since S. Paratyphi A is human-restricted, high levels of genetic drift are not expected unless these bacteria transmit to naive hosts. However, public-health investigation and monitoring by means of genomic surveillance would be constantly needed to avoid S. Paratyphi A serovar becoming a public health threat similar to the S. Typhi of today.}, }
@article {pmid37110377, year = {2023}, author = {Ariute, JC and Felice, AG and Soares, S and da Gama, MAS and de Souza, EB and Azevedo, V and Brenig, B and Aburjaile, F and Benko-Iseppon, AM}, title = {Characterization and Association of Rips Repertoire to Host Range of Novel Ralstonia solanacearum Strains by In Silico Approaches.}, journal = {Microorganisms}, volume = {11}, number = {4}, pages = {}, doi = {10.3390/microorganisms11040954}, pmid = {37110377}, issn = {2076-2607}, abstract = {Ralstonia solanacearum species complex (RSSC) cause several phytobacteriosis in many economically important crops around the globe, especially in the tropics. In Brazil, phylotypes I and II cause bacterial wilt (BW) and are indistinguishable by classical microbiological and phytopathological methods, while Moko disease is caused only by phylotype II strains. Type III effectors of RSSC (Rips) are key molecular actors regarding pathogenesis and are associated with specificity to some hosts. In this study, we sequenced and characterized 14 newly RSSC isolates from Brazil's Northern and Northeastern regions, including BW and Moko ecotypes. Virulence and resistance sequences were annotated, and the Rips repertoire was predicted. Confirming previous studies, RSSC pangenome is open as α≅0.77. Genomic information regarding these isolates matches those for R. solanacearum in NCBI. All of them fit in phylotype II with a similarity above 96%, with five isolates in phylotype IIB and nine in phylotype IIA. Almost all R. solanacearum genomes in NCBI are actually from other species in RSSC. Rips repertoire of Moko IIB was more homogeneous, except for isolate B4, which presented ten non-shared Rips. Rips repertoire of phylotype IIA was more diverse in both Moko and BW, with 43 common shared Rips among all 14 isolates. New BW isolates shared more Rips with Moko IIA and Moko IIB than with other public BW genome isolates from Brazil. Rips not shared with other isolates might contribute to individual virulence, but commonly shared Rips are good avirulence candidates. The high number of Rips shared by new Moko and BW isolates suggests they are actually Moko isolates infecting solanaceous hosts. Finally, infection assays and Rips expression on different hosts are needed to better elucidate the association between Rips repertoire and host specificities.}, }
@article {pmid37105244, year = {2023}, author = {Henaut-Jacobs, S and Passarelli-Araujo, H and Venancio, TM}, title = {Comparative genomics and phylogenomics of Campylobacter unveil potential novel species and provide insights into niche segregation.}, journal = {Molecular phylogenetics and evolution}, volume = {}, number = {}, pages = {107786}, doi = {10.1016/j.ympev.2023.107786}, pmid = {37105244}, issn = {1095-9513}, abstract = {Campylobacter is a bacterial genus associated with community outbreaks and gastrointestinal symptoms. Studies on Campylobacter generally focus on specific pathogenic species such as C. coli and C. jejuni. Currently, there are thousands of publicly available Campylobacter genomes, allowing a more complete assessment of the genus diversity. In this work, we report a network-based analysis of all available Campylobacter genomes to explore the genus structure and diversity, revealing potentially new species and elucidating genus features. We also hypothesize that the previously established Clade III of C. coli is in fact a novel species (referred here as Campylobacter spp12). Finally, we found a negative correlation between pangenome fluidity and saturation coefficient, with potential implications to the lifestyles of distinct Campylobacter species. Since pangenome analysis depends on the number of available genomes, this correlation could help estimate pangenome metrics of Campylobacter species with less sequenced genomes, helping understand their lifestyle and niche adaptation. Together, our results indicate that the Campylobacter genus should be re-evaluated, with particular attention to the interplay between genome structure and niche segregation.}, }
@article {pmid37103716, year = {2023}, author = {Matussek, A and Mernelius, S and Chromek, M and Zhang, J and Frykman, A and Hansson, S and Georgieva, V and Xiong, Y and Bai, X}, title = {Genome-wide association study of hemolytic uremic syndrome causing Shiga toxin-producing Escherichia coli from Sweden, 1994-2018.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {}, number = {}, pages = {}, pmid = {37103716}, issn = {1435-4373}, abstract = {Shiga toxin-producing Escherichia coli (STEC) infection can cause clinical manifestations ranging from diarrhea to potentially fatal hemolytic uremic syndrome (HUS). This study is aimed at identifying STEC genetic factors associated with the development of HUS in Sweden. A total of 238 STEC genomes from STEC-infected patients with and without HUS between 1994 and 2018 in Sweden were included in this study. Serotypes, Shiga toxin gene (stx) subtypes, and virulence genes were characterized in correlation to clinical symptoms (HUS and non-HUS), and pan-genome wide association study was performed. Sixty-five strains belonged to O157:H7, and 173 belonged to non-O157 serotypes. Our study revealed that strains of O157:H7 serotype especially clade 8 were most commonly found in patients with HUS in Sweden. stx2a and stx2a + stx2c subtypes were significantly associated with HUS. Other virulence factors associated with HUS mainly included intimin (eae) and its receptor (tir), adhesion factors, toxins, and secretion system proteins. Pangenome wide-association study identified numbers of accessory genes significantly overrepresented in HUS-STEC strains, including genes encoding outer membrane proteins, transcriptional regulators, phage-related proteins, and numerous genes related to hypothetical proteins. Whole-genome phylogeny and multiple correspondence analysis of pangenomes could not differentiate HUS-STEC from non-HUS-STEC strains. In O157:H7 cluster, strains from HUS patients clustered closely; however, no significant difference in virulence genes was found in O157 strains from patients with and without HUS. These results suggest that STEC strains from different phylogenetic backgrounds may independently acquire genes determining their pathogenicity and confirm that other non-bacterial factors and/or bacteria-host interaction may affect STEC pathogenesis.}, }
@article {pmid37098951, year = {2023}, author = {Rodrigues, C and Lanza, VF and Peixe, L and Coque, TM and Novais, Â}, title = {Phylogenomics of Globally Spread Clonal Groups 14 and 15 of Klebsiella pneumoniae.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0339522}, doi = {10.1128/spectrum.03395-22}, pmid = {37098951}, issn = {2165-0497}, abstract = {Klebsiella pneumoniae sequence type 14 (ST14) and ST15 caused outbreaks of CTX-M-15 and/or carbapenemase producers worldwide, but their phylogeny and global dynamics remain unclear. We clarified the evolution of K. pneumoniae clonal group 14 (CG14) and CG15 by analyzing the capsular locus (KL), resistome, virulome, and plasmidome of public genomes (n = 481) and de novo sequences (n = 9) representing main sublineages circulating in Portugal. CG14 and CG15 evolved independently within 6 main subclades defined according to the KL and the accessory genome. The CG14 (n = 65) clade was structured in two large monophyletic subclades, CG14-I (KL2, 86%) and CG14-II (KL16, 14%), whose emergences were dated to 1932 and 1911, respectively. Genes encoding extended-spectrum β-lactamase (ESBL), AmpC, and/or carbapenemases were mostly observed in CG14-I (71% versus 22%). CG15 clade (n = 170) was segregated into subclades CG15-IA (KL19/KL106, 9%), CG15-IB (variable KL types, 6%), CG15-IIA (KL24, 43%) and CG15-IIB (KL112, 37%). Most CG15 genomes carried specific GyrA and ParC mutations and emerged from a common ancestor in 1989. CTX-M-15 was especially prevalent in CG15 (68% CG15 versus 38% CG14) and in CG15-IIB (92%). Plasmidome analysis revealed 27 predominant plasmid groups (PG), including particularly pervasive and recombinant F-type (n = 10), Col (n = 10), and new plasmid types. While blaCTX-M-15 was acquired multiple times by a high diversity of F-type mosaic plasmids, other antibiotic resistance genes (ARGs) were dispersed by IncL (blaOXA-48) or IncC (blaCMY/TEM-24) plasmids. We first demonstrate an independent evolutionary trajectory for CG15 and CG14 and how the acquisition of specific KL, quinolone-resistance determining region (QRDR) mutations (CG15), and ARGs in highly recombinant plasmids could have shaped the expansion and diversification of particular subclades (CG14-I and CG15-IIA/IIB). IMPORTANCE Klebsiella pneumoniae represents a major threat in the burden of antibiotic resistance (ABR). Available studies to explain the origin, the diversity, and the evolution of certain ABR K. pneumoniae populations have mainly been focused on a few clonal groups (CGs) using phylogenetic analysis of the core genome, the accessory genome being overlooked. Here, we provide unique insights into the phylogenetic evolution of CG14 and CG15, two poorly characterized CGs which have contributed to the global dissemination of genes responsible for resistance to first-line antibiotics such as β-lactams. Our results point out an independent evolution of these two CGs and highlight the existence of different subclades structured by the capsular type and the accessory genome. Moreover, the contribution of a turbulent flux of plasmids (especially multireplicon F type and Col) and adaptive traits (antibiotic resistance and metal tolerance genes) to the pangenome reflect the exposure and adaptation of K. pneumoniae under different selective pressures.}, }
@article {pmid37098652, year = {2023}, author = {Cui, X and Hu, M and Yao, S and Zhang, Y and Tang, M and Liu, L and Cheng, X and Tong, C and Liu, S}, title = {BnaOmics: a comprehensive platform combining pan-genome and multi-omics data of Brassica napus.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100609}, doi = {10.1016/j.xplc.2023.100609}, pmid = {37098652}, issn = {2590-3462}, }
@article {pmid37098416, year = {2023}, author = {Gong, H and Huang, X and Zhu, W and Chen, J and Huang, Y and Zhao, Z and Weng, J and Che, Y and Wang, J and Wang, X}, title = {Pan-genome analysis of the Burkholderia gladioli PV. Cocovenenans reveal the extent of variation in the toxigenic gene cluster.}, journal = {Food microbiology}, volume = {113}, number = {}, pages = {104249}, doi = {10.1016/j.fm.2023.104249}, pmid = {37098416}, issn = {1095-9998}, abstract = {Burkholderia gladioli has been reported as the pathogen responsible for cases of foodborne illness in many countries. The poisonous bongkrekic acid (BA) produced by B. gladioli was linked to a gene cluster absent in non-pathogenic strains. The whole genome sequence of eight bacteria strains, which were screened from the collected 175 raw food and environmental samples, were assembled and analyzed to detect a significant association of 19 protein-coding genes with the pathogenic status. Except for the common BA synthesis-related gene, several other genes, including the toxin-antitoxin genes, were also absent in the non-pathogenic strains. The bacteria strains with the BA gene cluster were found to form a single cluster in the analysis of all B. gladioli genome assemblies for the variants in the gene cluster. Divergence of this cluster was detected in the analysis for both the flanking sequences and those of the whole genome level, which indicates its complex origin. Genome recombination was found to cause a precise sequence deletion in the gene cluster region, which was found to be predominant in the non-pathogenic strains indicating the possible effect of horizontal gene transfer. Our study provided new information and resources for understanding the evolution and divergence of the B. gladioli species.}, }
@article {pmid37093956, year = {2023}, author = {Baumdicker, F and Kupczok, A}, title = {Tackling the pangenome dilemma requires the concerted analysis of multiple population genetic processes.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evad067}, pmid = {37093956}, issn = {1759-6653}, abstract = {The pangenome is the set of all genes present in a prokaryotic population. Most pangenomes contain many accessory genes of low and intermediate frequencies. Different population genetics processes contribute to the shape of these pangenomes, namely selection and fitness-independent-processes such as gene transfer, gene loss, and migration. However, their relative importance is unknown and highly debated. Here we argue that the debate around prokaryotic pangenomes arose due to the imprecise application of population genetics models. Most importantly, two different processes of horizontal gene transfer act on prokaryotic populations, which are frequently confused, despite their fundamentally different behavior. Genes acquired from distantly related organisms (termed here acquiring gene transfer, AGT) is most comparable to mutation in nucleotide sequences. In contrast, gene gain within the population (termed here spreading gene transfer, SGT) has an effect on gene frequencies that is identical to the effect of positive selection on single genes. We thus show that selection and fitness-independent population genetic processes affecting pangenomes are indistinguishable at the level of single gene dynamics. Nevertheless, population genetics processes are fundamentally different when considering the joint distribution of all accessory genes across individuals of a population. We propose that, to understand to which degree the different processes shaped pangenome diversity, the development of comprehensive models and simulation tools is mandatory. Furthermore, we need to identify summary statistics and measurable features that can distinguish between the processes, where considering the joint distribution of accessory genes across individuals of a population will be particularly relevant.}, }
@article {pmid37089548, year = {2023}, author = {Zhong, H and Zheng, N and Wang, J and Zhao, S}, title = {Isolation and pan-genome analysis of Enterobacter hormaechei Z129, a ureolytic bacterium, from the rumen of dairy cow.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1169973}, pmid = {37089548}, issn = {1664-302X}, abstract = {INTRODUCTION: Urea is an important non-protein nitrogen source for ruminants. In the rumen, ureolytic bacteria play critical roles in urea-nitrogen metabolism, however, a few ureolytic strains have been isolated and genomically sequenced. The purpose of this study was to isolate a novel ureolytic bacterial strain from cattle rumen and characterize its genome and function.
METHODS: The ureolytic bacterium was isolated using an anaerobic medium with urea and phenol red as a screening indicator from the rumen fluid of dairy cattle. The genome of isolates was sequenced, assembled, annotated, and comparatively analyzed. The pan-genome analysis was performed using IPGA and the biochemical activity was also analyzed by test kits.
RESULTS: A gram-positive ureolytic strain was isolated. Its genome had a length of 4.52 Mbp and predicted genes of 4223. The 16S rRNA gene and genome GTDB-Tk taxonomic annotation showed that it was a novel strain of Enterobacter hormaechei, and it was named E. hormaechei Z129. The pan-genome analysis showed that Z129 had the highest identity to E. hormaechei ATCC 49162 with a genome average nucleotide identity of 98.69% and possessed 238 unique genes. Strain Z129 was the first E. hormaechei strain isolated from the rumen as we know. The functional annotation of the Z129 genome showed genes related to urea metabolism, including urea transport (urtA-urtE), nickel ion transport (ureJ, tonB, nixA, exbB, exbD, and rcnA), urease activation (ureA-ureG) and ammonia assimilation (gdhA, glnA, glnB, glnE, glnL, glsA, gltB, and gltD) were present. Genes involved in carbohydrate metabolism were also present, including starch hydrolysis (amyE), cellulose hydrolysis (celB and bglX), xylose transport (xylF-xylH) and glycolysis (pgi, pgk, fbaA, eno, pfkA, gap, pyk, gpmL). Biochemical activity analysis showed that Z129 was positive for alkaline phosphatase, leucine arylamidase, acid phosphatase, naphthol-AS-BI-phosphohydrolase, α-glucosidase, β-glucosidase, and pyrrolidone arylaminase, and had the ability to use D-ribose, L-arabinose, and D-lactose. Urea-nitrogen hydrolysis rate of Z129 reached 55.37% at 48 h of incubation.
DISCUSSION: Therefore, the isolated novel ureolytic strain E. hormaechei Z129 had diverse nitrogen and carbon metabolisms, and is a preferred model to study the urea hydrolysis mechanism in the rumen.}, }
@article {pmid37084119, year = {2023}, author = {Williams, AN and Croxen, MA and Demczuk, WHB and Martin, I and Tyrrell, GJ}, title = {Genomic characterization of emerging invasive Streptococcus agalactiae serotype VIII in Alberta, Canada.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {}, number = {}, pages = {}, pmid = {37084119}, issn = {1435-4373}, abstract = {Invasive Group B Streptococcus (GBS) can infect pregnant women, neonates, and older adults. Invasive GBS serotype VIII is infrequent in Alberta; however, cases have increased in recent years. Here, genomic analysis was used to characterize fourteen adult invasive serotype VIII isolates from 2009 to 2021. Trends in descriptive clinical data and antimicrobial susceptibility results were evaluated for invasive serotype VIII isolates from Alberta. Isolate genomes were sequenced and subjected to molecular sequence typing, virulence and antimicrobial resistance gene identification, phylogenetic analysis, and pangenome determination. Multilocus sequencing typing identified eight ST42 (Clonal Complex; CC19), four ST1 (CC1), and two ST2 (CC1) profiles. Isolates were susceptible to penicillin, erythromycin, chloramphenicol, and clindamycin, apart from one isolate that displayed erythromycin and inducible clindamycin resistance. All isolates carried genes for peptide antibiotic resistance, three isolates for tetracycline resistance, and one for macrolide, lincosamide, and streptogramin resistance. All genomes carried targets currently being considered for protein-based vaccines (e.g., pili and/or Alpha family proteins). Overall, invasive GBS serotype VIII is emerging in Alberta, primarily due to ST42. Characterization and continued surveillance of serotype VIII will be important for outbreak prevention, informing vaccine development, and contributing to our understanding of the global epidemiology of this rare serotype.}, }
@article {pmid37082513, year = {2022}, author = {Gangurde, SS and Xavier, A and Naik, YD and Jha, UC and Rangari, SK and Kumar, R and Reddy, MSS and Channale, S and Elango, D and Mir, RR and Zwart, R and Laxuman, C and Sudini, HK and Pandey, MK and Punnuri, S and Mendu, V and Reddy, UK and Guo, B and Gangarao, NVPR and Sharma, VK and Wang, X and Zhao, C and Thudi, M}, title = {Two decades of association mapping: Insights on disease resistance in major crops.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1064059}, pmid = {37082513}, issn = {1664-462X}, abstract = {Climate change across the globe has an impact on the occurrence, prevalence, and severity of plant diseases. About 30% of yield losses in major crops are due to plant diseases; emerging diseases are likely to worsen the sustainable production in the coming years. Plant diseases have led to increased hunger and mass migration of human populations in the past, thus a serious threat to global food security. Equipping the modern varieties/hybrids with enhanced genetic resistance is the most economic, sustainable and environmentally friendly solution. Plant geneticists have done tremendous work in identifying stable resistance in primary genepools and many times other than primary genepools to breed resistant varieties in different major crops. Over the last two decades, the availability of crop and pathogen genomes due to advances in next generation sequencing technologies improved our understanding of trait genetics using different approaches. Genome-wide association studies have been effectively used to identify candidate genes and map loci associated with different diseases in crop plants. In this review, we highlight successful examples for the discovery of resistance genes to many important diseases. In addition, major developments in association studies, statistical models and bioinformatic tools that improve the power, resolution and the efficiency of identifying marker-trait associations. Overall this review provides comprehensive insights into the two decades of advances in GWAS studies and discusses the challenges and opportunities this research area provides for breeding resistant varieties.}, }
@article {pmid37077982, year = {2022}, author = {Pucker, B and Irisarri, I and de Vries, J and Xu, B}, title = {Plant genome sequence assembly in the era of long reads: Progress, challenges and future directions.}, journal = {Quantitative plant biology}, volume = {3}, number = {}, pages = {e5}, pmid = {37077982}, issn = {2632-8828}, abstract = {Third-generation long-read sequencing is transforming plant genomics. Oxford Nanopore Technologies and Pacific Biosciences are offering competing long-read sequencing technologies and enable plant scientists to investigate even large and complex plant genomes. Sequencing projects can be conducted by single research groups and sequences of smaller plant genomes can be completed within days. This also resulted in an increased investigation of genomes from multiple species in large scale to address fundamental questions associated with the origin and evolution of land plants. Increased accessibility of sequencing devices and user-friendly software allows more researchers to get involved in genomics. Current challenges are accurately resolving diploid or polyploid genome sequences and better accounting for the intra-specific diversity by switching from the use of single reference genome sequences to a pangenome graph.}, }
@article {pmid37074150, year = {2023}, author = {Pugh, HL and Connor, C and Siasat, P and McNally, A and Blair, JMA}, title = {E. coli ST11 (O157:H7) does not encode a functional AcrF efflux pump.}, journal = {Microbiology (Reading, England)}, volume = {169}, number = {4}, pages = {}, doi = {10.1099/mic.0.001324}, pmid = {37074150}, issn = {1465-2080}, abstract = {Escherichia coli is a facultative anaerobe found in a wide range of environments. Commonly described as the laboratory workhorse, E. coli is one of the best characterized bacterial species to date, however much of our understanding comes from studies involving the laboratory strain E. coli K-12. Resistance-nodulation-division efflux pumps are found in Gram-negative bacteria and can export a diverse range of substrates, including antibiotics. E. coli K-12 has six RND pumps; AcrB, AcrD, AcrF, CusA, MdtBC and MdtF, and it is frequently reported that all E. coli strains possess these six pumps. However, this is not true of E. coli ST11, a lineage of E. coli, which is primarily composed of the highly virulent important human pathogen, E. coli O157:H7. Here we show that acrF is absent from the pangenome of ST11 and that this lineage of E. coli has a highly conserved insertion within the acrF gene, which when translated encodes 13 amino acids and two stop codons. This insertion was found to be present in 97.59 % of 1787 ST11 genome assemblies. Non-function of AcrF in ST11 was confirmed in the laboratory as complementation with acrF from ST11 was unable to restore AcrF function in E. coli K-12 substr. MG1655 ΔacrB ΔacrF. This shows that the complement of RND efflux pumps present in laboratory bacterial strains may not reflect the situation in virulent strains of bacterial pathogens.}, }
@article {pmid37072518, year = {2023}, author = {Eisenstein, M}, title = {Every base everywhere all at once: pangenomics comes of age.}, journal = {Nature}, volume = {616}, number = {7957}, pages = {618-620}, pmid = {37072518}, issn = {1476-4687}, }
@article {pmid37066137, year = {2023}, author = {Garrison, E and Guarracino, A and Heumos, S and Villani, F and Bao, Z and Tattini, L and Hagmann, J and Vorbrugg, S and Marco-Sola, S and Kubica, C and Ashbrook, DG and Thorell, K and Rusholme-Pilcher, RL and Liti, G and Rudbeck, E and Nahnsen, S and Yang, Z and Moses, MN and Nobrega, FL and Wu, Y and Chen, H and de Ligt, J and Sudmant, PH and Soranzo, N and Colonna, V and Williams, RW and Prins, P}, title = {Building pangenome graphs.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.04.05.535718}, pmid = {37066137}, abstract = {Pangenome graphs can represent all variation between multiple genomes, but existing methods for constructing them are biased due to reference-guided approaches. In response, we have developed PanGenome Graph Builder (PGGB), a reference-free pipeline for constructing unbi-ased pangenome graphs. PGGB uses all-to-all whole-genome alignments and learned graph embeddings to build and iteratively refine a model in which we can identify variation, measure conservation, detect recombination events, and infer phylogenetic relationships.}, }
@article {pmid37065164, year = {2023}, author = {Wan, X and Takala, TM and Huynh, VA and Ahonen, SL and Paulin, L and Björkroth, J and Sironen, T and Kant, R and Saris, P}, title = {Comparative genomics of 40 Weissella paramesenteroides strains.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1128028}, pmid = {37065164}, issn = {1664-302X}, abstract = {Weissella strains are often detected in spontaneously fermented foods. Because of their abilities to produce lactic acid and functional exopolysaccharides as well as their probiotic traits, Weissella spp. improve not only the sensorial properties but also nutritional values of the fermented food products. However, some Weissella species have been associated with human and animal diseases. In the era of vast genomic sequencing, new genomic/genome data are becoming available to the public on daily pace. Detailed genomic analyses are due to provide a full understanding of individual Weissella species. In this study, the genomes of six Weissella paramesenteroides strains were de novo sequenced. The genomes of 42 W. paramesenteroides strains were compared to discover their metabolic and functional potentials in food fermentation. Comparative genomics and metabolic pathway reconstructions revealed that W. paramesenteroides is a compact group of heterofermentative bacteria with good capacity of producing secondary metabolites and vitamin Bs. Since the strains rarely harbored plasmid DNA, they did not commonly possess the genes associated with bacteriocin production. All 42 strains were shown to bear vanT gene from the glycopeptide resistance gene cluster vanG. Yet none of the strains carried virulence genes.}, }
@article {pmid37059810, year = {2023}, author = {Olson, ND and Wagner, J and Dwarshuis, N and Miga, KH and Sedlazeck, FJ and Salit, M and Zook, JM}, title = {Variant calling and benchmarking in an era of complete human genome sequences.}, journal = {Nature reviews. Genetics}, volume = {}, number = {}, pages = {}, pmid = {37059810}, issn = {1471-0064}, abstract = {Genetic variant calling from DNA sequencing has enabled understanding of germline variation in hundreds of thousands of humans. Sequencing technologies and variant-calling methods have advanced rapidly, routinely providing reliable variant calls in most of the human genome. We describe how advances in long reads, deep learning, de novo assembly and pangenomes have expanded access to variant calls in increasingly challenging, repetitive genomic regions, including medically relevant regions, and how new benchmark sets and benchmarking methods illuminate their strengths and limitations. Finally, we explore the possible future of more complete characterization of human genome variation in light of the recent completion of a telomere-to-telomere human genome reference assembly and human pangenomes, and we consider the innovations needed to benchmark their newly accessible repetitive regions and complex variants.}, }
@article {pmid37052486, year = {2023}, author = {Miranda, RP and Turrini, PCG and Bonadio, DT and Zerillo, MM and Berselli, AP and Creste, S and Van Sluys, MA}, title = {Genome Organization of Four Brazilian Xanthomonas albilineans Strains Does Not Correlate with Aggressiveness.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0280222}, doi = {10.1128/spectrum.02802-22}, pmid = {37052486}, issn = {2165-0497}, abstract = {An integrative approach combining genomics, transcriptomics, and cell biology is presented to address leaf scald disease, a major problem for the sugarcane industry. To gain insight into the biology of the causal agent, the complete genome sequences of four Brazilian Xanthomonas albilineans strains with differing virulence capabilities are presented and compared to the GPEPC73 reference strain and FJ1. Based on the aggressiveness index, different strains were compared: Xa04 and Xa11 are highly aggressive, Xa26 is intermediate, and Xa21 is the least, while, based on genome structure, Xa04 shares most of its genomic features with Xa26, and Xa11 share most of its genomic features with Xa21. In addition to presenting more clustered regularly interspaced short palindromic repeats (CRISPR) clusters, four more novel prophage insertions are present than the previously sequenced GPEPC73 and FJ1 strains. Incorporating the aggressiveness index and in vitro cell biology into these genome features indicates that disease establishment is not a result of a single determinant factor, as in most other Xanthomonas species. The Brazilian strains lack the previously described plasmids but present more prophage regions. In pairs, the most virulent and the least virulent share unique prophages. In vitro transcriptomics shed light on the 54 most highly expressed genes among the 4 strains compared to ribosomal proteins (RPs), of these, 3 outer membrane proteins. Finally, comparative albicidin inhibition rings and in vitro growth curves of the four strains also do not correlate with pathogenicity. In conclusion, the results disclose that leaf scald disease is not associated with a single shared characteristic between the most or the least pathogenic strains. IMPORTANCE An integrative approach is presented which combines genomics, transcriptomics, and cell biology to address leaf scald disease. The results presented here disclose that the disease is not associated with a single shared characteristic between the most pathogenic strains or a unique genomic pattern. Sequence data from four Brazilian strains are presented that differ in pathogenicity index: Xa04 and Xa11 are highly virulent, Xa26 is intermediate, and Xa21 is the least pathogenic strain, while, based on genome structure, Xa04 shares with Xa26, and Xa11 shares with X21 most of the genome features. Other than presenting more CRISPR clusters and prophages than the previously sequenced strains, the integration of aggressiveness and cell biology points out that disease establishment is not a result of a single determinant factor as in other xanthomonads.}, }
@article {pmid37047101, year = {2023}, author = {Tenea, GN}, title = {Metabiotics Signature through Genome Sequencing and In Vitro Inhibitory Assessment of a Novel Lactococcus lactis Strain UTNCys6-1 Isolated from Amazonian Camu-Camu Fruits.}, journal = {International journal of molecular sciences}, volume = {24}, number = {7}, pages = {}, doi = {10.3390/ijms24076127}, pmid = {37047101}, issn = {1422-0067}, abstract = {Metabiotics are the structural components of probiotic bacteria, functional metabolites, and/or signaling molecules with numerous beneficial properties. A novel Lactococcus lactis strain, UTNCys6-1, was isolated from wild Amazonian camu-camu fruits (Myrciaria dubia), and various functional metabolites with antibacterial capacity were found. The genome size is 2,226,248 base pairs, and it contains 2248 genes, 2191 protein-coding genes (CDSs), 50 tRNAs, 6 rRNAs, 1 16S rRNA, 1 23S rRNA, and 1 tmRNA. The average GC content is 34.88%. In total, 2148 proteins have been mapped to the EggNOG database. The specific annotation consisted of four incomplete prophage regions, one CRISPR-Cas array, six genomic islands (GIs), four insertion sequences (ISs), and four regions of interest (AOI regions) spanning three classes of bacteriocins (enterolysin_A, nisin_Z, and sactipeptides). Based on pangenome analysis, there were 6932 gene clusters, of which 751 (core genes) were commonly observed within the 11 lactococcal strains. Among them, 3883 were sample-specific genes (cloud genes) and 2298 were shell genes, indicating high genetic diversity. A sucrose transporter of the SemiSWEET family (PTS system: phosphoenolpyruvate-dependent transport system) was detected in the genome of UTNCys6-1 but not the other 11 lactococcal strains. In addition, the metabolic profile, antimicrobial susceptibility, and inhibitory activity of both protein-peptide extract (PPE) and exopolysaccharides (EPSs) against several foodborne pathogens were assessed in vitro. Furthermore, UTNCys6-1 was predicted to be a non-human pathogen that was unable to tolerate all tested antibiotics except gentamicin; metabolized several substrates; and lacks virulence factors (VFs), genes related to the production of biogenic amines, and acquired antibiotic resistance genes (ARGs). Overall, this study highlighted the potential of this strain for producing bioactive metabolites (PPE and EPSs) for agri-food and pharmaceutical industry use.}, }
@article {pmid37042769, year = {2023}, author = {Ma, X and Sun, T and Zhou, J and Zhi, M and Shen, S and Wang, Y and Gu, X and Li, Z and Gao, H and Wang, P and Feng, Q}, title = {Pangenomic Study of Fusobacterium nucleatum Reveals the Distribution of Pathogenic Genes and Functional Clusters at the Subspecies and Strain Levels.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0518422}, doi = {10.1128/spectrum.05184-22}, pmid = {37042769}, issn = {2165-0497}, abstract = {Fusobacterium nucleatum is a prevalent periodontal pathogen and is associated with many systemic diseases. Our knowledge of the genomic characteristics and pathogenic effectors of different F. nucleatum strains is limited. In this study, we completed the whole genome assembly of the 4 F. nucleatum strains and carried out a comprehensive pangenomic study of 30 strains with their complete genome sequences. Phylogenetic analysis revealed that the F. nucleatum strains are mainly divided into 4 subspecies, while 1 of the sequenced strains was classified into a new subspecies. Gene composition analysis revealed that a total of 517 "core/soft-core genes" with housekeeping functions widely distributed in almost all the strains. Each subspecies had a unique gene cluster shared by strains within the subspecies. Analysis of the virulence factors revealed that many virulence factors were widely distributed across all the strains, with some present in multiple copies. Some virulence genes showed no consistent occurrence rule at the subspecies level and were specifically distributed in certain strains. The genomic islands mainly revealed strain-specific characteristics instead of subspecies level consistency, while CRISPR types and secondary metabolite biosynthetic gene clusters were identically distributed in F. nucleatum strains from the same subspecies. The variation in amino acid sites in the adhesion protein FadA did not affect the monomer and dimer 3D structures, but it may affect the binding surface and the stability of binding to host receptors. This study provides a basis for the pathogenic study of F. nucleatum at the subspecies and strain levels. IMPORTANCE We used F. nucleatum as an example to analyze the genomic characteristics of oral pathogens at the species, subspecies, and strain levels and elucidate the similarities and differences in functional genes and virulence factors among different subspecies/strains of the same oral pathogen. We believe that the unique biological characteristics of each subspecies/strain can be attributed to the differences in functional gene clusters or the presence/absence of certain virulence genes. This study showed that F. nucleatum strains from the same subspecies had similar functional gene compositions, CRISPR types, and secondary metabolite biosynthetic gene clusters, while pathogenic genes, such as virulence genes, antibiotic resistance genes, and GIs, had more strain level specificity. The findings of this study suggest that, for microbial pathogenicity studies, we should carefully consider the subspecies/strains being used, as different strains may vary greatly.}, }
@article {pmid37037626, year = {2023}, author = {Lu, TY and Smaruj, PN and Fudenberg, G and Mancuso, N and Chaisson, MJ}, title = {The motif composition of variable-number tandem repeats impacts gene expression.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.276768.122}, pmid = {37037626}, issn = {1549-5469}, abstract = {Understanding the impact of DNA variation on human traits is a fundamental question in human genetics. Variable number tandem repeats (VNTRs) make up roughly 3% of the human genome but are often excluded from association analysis due to poor read mappability or divergent repeat content. While methods exist to estimate VNTR length from short-read data, it is known that VNTRs vary in both length and repeat (motif) composition. Here, we use a repeat-pangenome graph (RPGG) constructed on 35 haplotype-resolved assemblies to detect variation in both VNTR length and repeat composition. We align population scale data from the Genotype-Tissue Expression (GTEx) Consortium to examine how variations in sequence composition may be linked to expression, including cases independent of overall VNTR length. We find that 9,422 out of 39,125 VNTRs are associated with nearby gene expression through motif variations, of which only 23.4% associations are accessible from length. Fine-mapping identifies 174 genes to be likely driven by variation in certain VNTR motifs and not overall length. We highlight two genes, CACNA1C and RNF213 that have expression associated with motif variation, demonstrating the utility of RPGG analysis as a new approach for trait association in multiallelic and highly variable loci.}, }
@article {pmid37029275, year = {2023}, author = {}, title = {Tomato super-pangenome highlights the potential use of wild relatives in tomato breeding.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {37029275}, issn = {1546-1718}, }
@article {pmid37025802, year = {2023}, author = {De Mesa, CA and Mendoza, RM and Penir, SMU and de la Peña, LD and Amar, EC and Saloma, CP}, title = {Genomic analysis of Vibrio harveyi strain PH1009, a potential multi-drug resistant pathogen due to acquisition of toxin genes.}, journal = {Heliyon}, volume = {9}, number = {4}, pages = {e14926}, pmid = {37025802}, issn = {2405-8440}, abstract = {In has increasingly been observed that viral and bacterial coinfection frequently occurs among cultured shrimp and this coinfection could exacerbate the disease phenotype. Here, we describe a newly discovered bacterial strain, Vibrio harveyi PH1009 collected from Masbate Island, Philippines that was found to be co-infecting with the White Spot Syndrome virus in a sample of black tiger prawn, Penaeus monodon. The genome of V. harveyi PH1009 was sequenced, assembled, and annotated. Average Nucleotide identity calculation with Vibrio harveyi strains confirmed its taxonomic identity. It is a potential multi-drug and multi-heavy metal resistant strain based on the multiple antibiotic and heavy metal resistance determinants annotated on its genome. Two prophage regions were identified in its genome. One contained genes for Zona occludens toxin (Zot) and Accessory cholera toxin (Ace), essential toxins of toxigenic V. cholerae strains apart from CTX toxins. Pan-genome analysis of V. harveyi strains, including PH1009, revealed an "open" pan-genome for V. harveyi and a core genome mainly composed of genes necessary for growth and metabolism. Phylogenetic tree based on the core genome alignment revealed that PH1009 was closest to strains QT520, CAIM 1754, and 823tez1. Published virulence factors present on the strain QT520 suggest similar pathogenicity with PH1009. However, PH1009 Zot was not found on related strains but was present in strains HENC-01 and CAIM 148. Most unique genes found in the PH1009 strain were identified as hypothetical proteins. Further annotation showed that several of these hypothetical proteins were phage transposases, integrases, and transcription regulators, implying the role of bacteriophages in the distinct genomic features of the PH1009 genome. The PH1009 genome will serve as a valuable genomic resource for comparative genomic studies and in understanding the disease mechanism of the Vibrio harveyi species.}, }
@article {pmid37024581, year = {2023}, author = {Li, N and He, Q and Wang, J and Wang, B and Zhao, J and Huang, S and Yang, T and Tang, Y and Yang, S and Aisimutuola, P and Xu, R and Hu, J and Jia, C and Ma, K and Li, Z and Jiang, F and Gao, J and Lan, H and Zhou, Y and Zhang, X and Huang, S and Fei, Z and Wang, H and Li, H and Yu, Q}, title = {Super-pangenome analyses highlight genomic diversity and structural variation across wild and cultivated tomato species.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {37024581}, issn = {1546-1718}, abstract = {Effective utilization of wild relatives is key to overcoming challenges in genetic improvement of cultivated tomato, which has a narrow genetic basis; however, current efforts to decipher high-quality genomes for tomato wild species are insufficient. Here, we report chromosome-scale tomato genomes from nine wild species and two cultivated accessions, representative of Solanum section Lycopersicon, the tomato clade. Together with two previously released genomes, we elucidate the phylogeny of Lycopersicon and construct a section-wide gene repertoire. We reveal the landscape of structural variants and provide entry to the genomic diversity among tomato wild relatives, enabling the discovery of a wild tomato gene with the potential to increase yields of modern cultivated tomatoes. Construction of a graph-based genome enables structural-variant-based genome-wide association studies, identifying numerous signals associated with tomato flavor-related traits and fruit metabolites. The tomato super-pangenome resources will expedite biological studies and breeding of this globally important crop.}, }
@article {pmid37023146, year = {2023}, author = {Hochhauser, D and Millman, A and Sorek, R}, title = {The defense island repertoire of the Escherichia coli pan-genome.}, journal = {PLoS genetics}, volume = {19}, number = {4}, pages = {e1010694}, doi = {10.1371/journal.pgen.1010694}, pmid = {37023146}, issn = {1553-7404}, abstract = {It has become clear in recent years that anti-phage defense systems cluster non-randomly within bacterial genomes in so-called "defense islands". Despite serving as a valuable tool for the discovery of novel defense systems, the nature and distribution of defense islands themselves remain poorly understood. In this study, we comprehensively mapped the defense system repertoire of >1,300 strains of Escherichia coli, the most widely studied organism for phage-bacteria interactions. We found that defense systems are usually carried on mobile genetic elements including prophages, integrative conjugative elements and transposons, which preferentially integrate at several dozens of dedicated hotspots in the E. coli genome. Each mobile genetic element type has a preferred integration position but can carry a diverse variety of defensive cargo. On average, an E. coli genome has 4.7 hotspots occupied by defense system-containing mobile elements, with some strains possessing up to eight defensively occupied hotspots. Defense systems frequently co-localize with other systems on the same mobile genetic element, in agreement with the observed defense island phenomenon. Our data show that the overwhelming majority of the E. coli pan-immune system is carried on mobile genetic elements, explaining why the immune repertoire varies substantially between different strains of the same species.}, }
@article {pmid37019751, year = {2023}, author = {Dart, E and Ahlgren, NA}, title = {New tRNA-targeting transposons that hijack phage and vesicles.}, journal = {Trends in genetics : TIG}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tig.2023.03.004}, pmid = {37019751}, issn = {0168-9525}, abstract = {Genomic islands are hotspots for horizontal gene transfer (HGT) in bacteria, but, for Prochlorococcus, an abundant marine cyanobacterium, how these islands form has puzzled scientists. With the discovery of tycheposons, a new family of transposons, Hackl et al. provide evidence for elegant new mechanisms of gene rearrangement and transfer among Prochlorococcus and bacteria more broadly.}, }
@article {pmid37018035, year = {2023}, author = {Muzahid, NH and Hussain, MH and Huët, MAL and Dwiyanto, J and Su, TT and Reidpath, D and Mustapha, F and Ayub, Q and Tan, HS and Rahman, S}, title = {Molecular characterization and comparative genomic analysis of Acinetobacter baumannii isolated from the community and the hospital: an epidemiological study in Segamat, Malaysia.}, journal = {Microbial genomics}, volume = {9}, number = {4}, pages = {}, doi = {10.1099/mgen.0.000977}, pmid = {37018035}, issn = {2057-5858}, abstract = {Acinetobacter baumannii is a common cause of multidrug-resistant (MDR) nosocomial infections around the world. However, little is known about the persistence and dynamics of A. baumannii in a healthy community. This study investigated the role of the community as a prospective reservoir for A. baumannii and explored possible links between hospital and community isolates. A total of 12 independent A. baumannii strains were isolated from human faecal samples from the community in Segamat, Malaysia, in 2018 and 2019. Another 15 were obtained in 2020 from patients at the co-located tertiary public hospital. The antimicrobial resistance profile and biofilm formation ability were analysed, and the relatedness of community and hospital isolates was determined using whole-genome sequencing (WGS). Antibiotic profile analysis revealed that 12 out of 15 hospital isolates were MDR, but none of the community isolates were MDR. However, phylogenetic analysis based on single-nucleotide polymorphisms (SNPs) and a pangenome analysis of core genes showed clustering between four community and two hospital strains. Such clustering of strains from two different settings based on their genomes suggests that these strains could persist in both. WGS revealed 41 potential resistance genes on average in the hospital strains, but fewer (n=32) were detected in the community strains. In contrast, 68 virulence genes were commonly seen in strains from both sources. This study highlights the possible transmission threat to public health posed by virulent A. baumannii present in the gut of asymptomatic individuals in the community.}, }
@article {pmid37016310, year = {2023}, author = {Commichaux, S and Rand, H and Javkar, K and Molloy, EK and Pettengill, JB and Pightling, A and Hoffmann, M and Pop, M and Jayeola, V and Foley, S and Luo, Y}, title = {Assessment of plasmids for relating the 2020 Salmonella enterica serovar Newport onion outbreak to farms implicated by the outbreak investigation.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {165}, pmid = {37016310}, issn = {1471-2164}, abstract = {BACKGROUND: The Salmonella enterica serovar Newport red onion outbreak of 2020 was the largest foodborne outbreak of Salmonella in over a decade. The epidemiological investigation suggested two farms as the likely source of contamination. However, single nucleotide polymorphism (SNP) analysis of the whole genome sequencing data showed that none of the Salmonella isolates collected from the farm regions were linked to the clinical isolates-preventing the use of phylogenetics in source identification. Here, we explored an alternative method for analyzing the whole genome sequencing data driven by the hypothesis that if the outbreak strain had come from the farm regions, then the clinical isolates would disproportionately contain plasmids found in isolates from the farm regions due to horizontal transfer.
RESULTS: SNP analysis confirmed that the clinical isolates formed a single, nearly-clonal clade with evidence for ancestry in California going back a decade. The clinical clade had a large core genome (4,399 genes) and a large and sparsely distributed accessory genome (2,577 genes, at least 64% on plasmids). At least 20 plasmid types occurred in the clinical clade, more than were found in the literature for Salmonella Newport. A small number of plasmids, 14 from 13 clinical isolates and 17 from 8 farm isolates, were found to be highly similar (> 95% identical)-indicating they might be related by horizontal transfer. Phylogenetic analysis was unable to determine the geographic origin, isolation source, or time of transfer of the plasmids, likely due to their promiscuous and transient nature. However, our resampling analysis suggested that observing a similar number and combination of highly similar plasmids in random samples of environmental Salmonella enterica within the NCBI Pathogen Detection database was unlikely, supporting a connection between the outbreak strain and the farms implicated by the epidemiological investigation.
CONCLUSION: Horizontally transferred plasmids provided evidence for a connection between clinical isolates and the farms implicated as the source of the outbreak. Our case study suggests that such analyses might add a new dimension to source tracking investigations, but highlights the need for detailed and accurate metadata, more extensive environmental sampling, and a better understanding of plasmid molecular evolution.}, }
@article {pmid37016094, year = {2023}, author = {Li, W and Wang, D and Hong, X and Shi, J and Hong, J and Su, S and Loaiciga, CR and Li, J and Liang, W and Shi, J and Zhang, D}, title = {Identification and validation of new MADS-box homologous genes in 3010 rice pan-genome.}, journal = {Plant cell reports}, volume = {}, number = {}, pages = {}, pmid = {37016094}, issn = {1432-203X}, abstract = {Identification and validation of ten new MADS-box homologous genes in 3010 rice pan-genome for rice breeding. The functional genome is significant for rice breeding. MADS-box genes encode transcription factors that are indispensable for rice growth and development. The reported 15,362 novel genes in the rice pan-genome (RPAN) of Asian cultivated rice accessions provided a useful gene reservoir for the identification of more MADS-box candidates to overcome the limitation for the usage of only 75 MADS-box genes identified in Nipponbare for rice breeding. Here, we report the identification and validation of ten MADS-box homologous genes in RPAN. Origin and identity analysis indicated that they are originated from different wild rice accessions and structure of motif analysis revealed high variations in their amino acid sequences. Phylogenetic results with 277 MADS-box genes in 41 species showed that all these ten MADS-box homologous genes belong to type I (SRF-like, M-type). Gene expression analysis confirmed the existence of these ten MADS-box genes in IRIS_313-10,394, all of them were expressed in flower tissues, and six of them were highly expressed during seed development. Altogether, we identified and validated experimentally, for the first time, ten novel MADS-box genes in RPAN, which provides new genetic sources for rice improvement.}, }
@article {pmid37012375, year = {2023}, author = {von Meijenfeldt, FAB and Hogeweg, P and Dutilh, BE}, title = {A social niche breadth score reveals niche range strategies of generalists and specialists.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {37012375}, issn = {2397-334X}, abstract = {Generalists can survive in many environments, whereas specialists are restricted to a single environment. Although a classical concept in ecology, niche breadth has remained challenging to quantify for microorganisms because it depends on an objective definition of the environment. Here, by defining the environment of a microorganism as the community it resides in, we integrated information from over 22,000 environmental sequencing samples to derive a quantitative measure of the niche, which we call social niche breadth. At the level of genera, we explored niche range strategies throughout the prokaryotic tree of life. We found that social generalists include opportunists that stochastically dominate local communities, whereas social specialists are stable but low in abundance. Social generalists have a more diverse and open pan-genome than social specialists, but we found no global correlation between social niche breadth and genome size. Instead, we observed two distinct evolutionary strategies, whereby specialists have relatively small genomes in habitats with low local diversity, but relatively large genomes in habitats with high local diversity. Together, our analysis shines data-driven light on microbial niche range strategies.}, }
@article {pmid37010293, year = {2023}, author = {Maranga, M and Szczerbiak, P and Bezshapkin, V and Gligorijevic, V and Chandler, C and Bonneau, R and Xavier, RJ and Vatanen, T and Kosciolek, T}, title = {Comprehensive Functional Annotation of Metagenomes and Microbial Genomes Using a Deep Learning-Based Method.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0117822}, doi = {10.1128/msystems.01178-22}, pmid = {37010293}, issn = {2379-5077}, abstract = {Comprehensive protein function annotation is essential for understanding microbiome-related disease mechanisms in the host organisms. However, a large portion of human gut microbial proteins lack functional annotation. Here, we have developed a new metagenome analysis workflow integrating de novo genome reconstruction, taxonomic profiling, and deep learning-based functional annotations from DeepFRI. This is the first approach to apply deep learning-based functional annotations in metagenomics. We validate DeepFRI functional annotations by comparing them to orthology-based annotations from eggNOG on a set of 1,070 infant metagenomes from the DIABIMMUNE cohort. Using this workflow, we generated a sequence catalogue of 1.9 million nonredundant microbial genes. The functional annotations revealed 70% concordance between Gene Ontology annotations predicted by DeepFRI and eggNOG. DeepFRI improved the annotation coverage, with 99% of the gene catalogue obtaining Gene Ontology molecular function annotations, although they are less specific than those from eggNOG. Additionally, we constructed pangenomes in a reference-free manner using high-quality metagenome-assembled genomes (MAGs) and analyzed the associated annotations. eggNOG annotated more genes on well-studied organisms, such as Escherichia coli, while DeepFRI was less sensitive to taxa. Further, we show that DeepFRI provides additional annotations in comparison to the previous DIABIMMUNE studies. This workflow will contribute to novel understanding of the functional signature of the human gut microbiome in health and disease as well as guiding future metagenomics studies. IMPORTANCE The past decade has seen advancement in high-throughput sequencing technologies resulting in rapid accumulation of genomic data from microbial communities. While this growth in sequence data and gene discovery is impressive, the majority of microbial gene functions remain uncharacterized. The coverage of functional information coming from either experimental sources or inferences is low. To solve these challenges, we have developed a new workflow to computationally assemble microbial genomes and annotate the genes using a deep learning-based model DeepFRI. This improved microbial gene annotation coverage to 1.9 million metagenome-assembled genes, representing 99% of the assembled genes, which is a significant improvement compared to 12% Gene Ontology term annotation coverage by commonly used orthology-based approaches. Importantly, the workflow supports pangenome reconstruction in a reference-free manner, allowing us to analyze the functional potential of individual bacterial species. We therefore propose this alternative approach combining deep-learning functional predictions with the commonly used orthology-based annotations as one that could help us uncover novel functions observed in metagenomic microbiome studies.}, }
@article {pmid37007277, year = {2023}, author = {Heng, E and Tan, LL and Tay, DWP and Lim, YH and Yang, LK and Seow, DCS and Leong, CY and Ng, V and Ng, SB and Kanagasundaram, Y and Wong, FT and Koduru, L}, title = {Cost-effective hybrid long-short read assembly delineates alternative GC-rich Streptomyces hosts for natural product discovery.}, journal = {Synthetic and systems biotechnology}, volume = {8}, number = {2}, pages = {253-261}, pmid = {37007277}, issn = {2405-805X}, abstract = {With the advent of rapid automated in silico identification of biosynthetic gene clusters (BGCs), genomics presents vast opportunities to accelerate natural product (NP) discovery. However, prolific NP producers, Streptomyces, are exceptionally GC-rich (>80%) and highly repetitive within BGCs. These pose challenges in sequencing and high-quality genome assembly which are currently circumvented via intensive sequencing. Here, we outline a more cost-effective workflow using multiplex Illumina and Oxford Nanopore sequencing with hybrid long-short read assembly algorithms to generate high quality genomes. Our protocol involves subjecting long read-derived assemblies to up to 4 rounds of polishing with short reads to yield accurate BGC predictions. We successfully sequenced and assembled 8 GC-rich Streptomyces genomes whose lengths range from 7.1 to 12.1 Mb with a median N50 of 8.2 Mb. Taxonomic analysis revealed previous misrepresentation among these strains and allowed us to propose a potentially new species, Streptomyces sydneybrenneri. Further comprehensive characterization of their biosynthetic, pan-genomic and antibiotic resistance features especially for molecules derived from type I polyketide synthase (PKS) BGCs reflected their potential as alternative NP hosts. Thus, the genome assemblies and insights presented here are envisioned to serve as gateway for the scientific community to expand their avenues in NP discovery.}, }
@article {pmid37003962, year = {2023}, author = {Raza, Q and Rashid, MAR and Waqas, M and Ali, Z and Rana, IA and Khan, SH and Khan, IA and Atif, RM}, title = {Genomic diversity of aquaporins across genus Oryza provides a rich genetic resource for development of climate resilient rice cultivars.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {172}, pmid = {37003962}, issn = {1471-2229}, abstract = {BACKGROUND: Plant aquaporins are critical genetic players performing multiple biological functions, especially climate resilience and water-use efficiency. Their genomic diversity across genus Oryza is yet to be explored.
RESULTS: This study identified 369 aquaporin-encoding genes from 11 cultivated and wild rice species and further categorized these into four major subfamilies, among which small basic intrinsic proteins are speculated to be ancestral to all land plant aquaporins. Evolutionarily conserved motifs in peptides of aquaporins participate in transmembrane transport of materials and their relatively complex gene structures provide an evolutionary playground for regulation of genome structure and transcription. Duplication and evolution analyses revealed higher genetic conservation among Oryza aquaporins and strong purifying selections are assisting in conserving the climate resilience associated functions. Promoter analysis highlighted enrichment of gene upstream regions with cis-acting regulatory elements involved in diverse biological processes, whereas miRNA target site prediction analysis unveiled substantial involvement of osa-miR2102-3p, osa-miR2927 and osa-miR5075 in post-transcriptional regulation of gene expression patterns. Moreover, expression patterns of japonica aquaporins were significantly perturbed in response to different treatment levels of six phytohormones and four abiotic stresses, suggesting their multifarious roles in plants survival under stressed environments. Furthermore, superior haplotypes of seven conserved orthologous aquaporins for higher thousand-grain weight are reported from a gold mine of 3,010 sequenced rice pangenomes.
CONCLUSIONS: This study unveils the complete genomic atlas of aquaporins across genus Oryza and provides a comprehensive genetic resource for genomics-assisted development of climate-resilient rice cultivars.}, }
@article {pmid37000493, year = {2023}, author = {Pagnossin, D and Weir, W and Smith, A and Fuentes, M and Coelho, J and Oravcova, K}, title = {Streptococcus canis genomic epidemiology reveals the potential for zoonotic transfer.}, journal = {Microbial genomics}, volume = {9}, number = {3}, pages = {}, doi = {10.1099/mgen.0.000974}, pmid = {37000493}, issn = {2057-5858}, abstract = {Streptococcus canis, a multi-host pathogen commonly isolated from dogs and cats, has been occasionally reported in severe cases of human infection. To address the gap in knowledge on its virulence and host tropism, we investigated S. canis genomic epidemiology and report the results of this analysis for the first time. We analysed 59 S. canis whole genome sequences originating from a variety of host species, comprising 39 newly sequenced isolates from UK sources, along with all (n=20) publicly available genomes. Antimicrobial resistance (AMR) phenotype was determined for all 39 available isolates. Genomes were screened for determinants of resistance and virulence. We created a core SNP phylogeny and compared strain clustering to multi-locus sequence typing (MLST) and S. canis M-like protein (SCM) typing. We investigated the dataset for signals of host adaptation using phylogenetic analysis, accessory genome clustering and pan-genome-wide association study analysis. A total of 23 % (9/39) of isolates exhibited phenotypic resistance to lincosamides, macrolides and/or tetracyclines. This was complemented by the identification of AMR-encoding genes in all genomes: tetracycline (tetO 14 %, 8/59; and tetM 7 %, 4/59) and lincosamide/macrolide (ermB, 7 %, 4/59). AMR was more common in human (36 %, 4/11) compared to companion animal (18 %, 5/28) isolates. We identified 19 virulence gene homologues, 14 of which were present in all strains analysed. In an S. canis strain isolated from a dog with otitis externa we identified a homologue of S. pyogenes superantigen SMEZ. The MLST and SCM typing schemes were found to be incapable of accurately representing core SNP-based genomic diversity of the S. canis population. No evidence of host adaptation was detected, suggesting the potential for inter-species transmission, including zoonotic transfer.}, }
@article {pmid36993855, year = {2023}, author = {Akparov, Z and Hajiyeva, S and Abbasov, M and Kaur, S and Hamwieh, A and Alsamman, AM and Hajiyev, E and Babayeva, S and Izzatullayeva, V and Mustafayeva, Z and Mehdiyeva, S and Mustafayev, O and Shahmuradov, I and Kosarev, P and Solovyev, V and Salamov, A and Jighly, A}, title = {Two major chromosome evolution events with unrivaled conserved gene content in pomegranate.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1039211}, pmid = {36993855}, issn = {1664-462X}, abstract = {Pomegranate has a unique evolutionary history given that different cultivars have eight or nine bivalent chromosomes with possible crossability between the two classes. Therefore, it is important to study chromosome evolution in pomegranate to understand the dynamics of its population. Here, we de novo assembled the Azerbaijani cultivar "Azerbaijan guloyshasi" (AG2017; 2n = 16) and re-sequenced six cultivars to track the evolution of pomegranate and to compare it with previously published de novo assembled and re-sequenced cultivars. High synteny was observed between AG2017, Bhagawa (2n = 16), Tunisia (2n = 16), and Dabenzi (2n = 18), but these four cultivars diverged from the cultivar Taishanhong (2n = 18) with several rearrangements indicating the presence of two major chromosome evolution events. Major presence/absence variations were not observed as >99% of the five genomes aligned across the cultivars, while >99% of the pan-genic content was represented by Tunisia and Taishanhong only. We also revisited the divergence between soft- and hard-seeded cultivars with less structured population genomic data, compared to previous studies, to refine the selected genomic regions and detect global migration routes for pomegranate. We reported a unique admixture between soft- and hard-seeded cultivars that can be exploited to improve the diversity, quality, and adaptability of local pomegranate varieties around the world. Our study adds body knowledge to understanding the evolution of the pomegranate genome and its implications for the population structure of global pomegranate diversity, as well as planning breeding programs aiming to develop improved cultivars.}, }
@article {pmid36993842, year = {2023}, author = {Carballo, J and Bellido, AM and Selva, JP and Zappacosta, D and Gallo, CA and Albertini, E and Caccamo, M and Echenique, V}, title = {From tetraploid to diploid, a pangenomic approach to identify genes lost during synthetic diploidization of Eragrostis curvula.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1133986}, pmid = {36993842}, issn = {1664-462X}, abstract = {INTRODUCTION: In Eragrostis curvula, commonly known as weeping lovegrass, a synthetic diploidization event of the facultative apomictic tetraploid Tanganyika INTA cv. originated from the sexual diploid Victoria cv. Apomixis is an asexual reproduction by seeds in which the progeny is genetically identical to the maternal plant.
METHODS: To assess the genomic changes related to ploidy and to the reproductive mode occurring during diploidization, a mapping approach was followed to obtain the first E. curvula pangenome assembly. In this way, gDNA of Tanganyika INTA was extracted and sequenced in 2x250 Illumina pair-end reads and mapped against the Victoria genome assembly. The unmapped reads were used for variant calling, while the mapped reads were assembled using Masurca software.
RESULTS: The length of the assembly was 28,982,419 bp distributed in 18,032 contigs, and the variable genes annotated in these contigs rendered 3,952 gene models. Functional annotation of the genes showed that the reproductive pathway was differentially enriched. PCR amplification in gDNA and cDNA of Tanganyika INTA and Victoria was conducted to validate the presence/absence variation in five genes related to reproduction and ploidy. The polyploid nature of the Tanganyika INTA genome was also evaluated through the variant calling analysis showing the single nucleotide polymorphism (SNP) coverage and allele frequency distribution with a segmental allotetraploid pairing behavior.
DISCUSSION: The results presented here suggest that the genes were lost in Tanganyika INTA during the diploidization process that was conducted to suppress the apomictic pathway, affecting severely the fertility of Victoria cv.}, }
@article {pmid36991151, year = {2023}, author = {Zhen, C and Chen, XK and Ge, XF and Liu, WZ}, title = {Streptomonospora mangrovi sp. nov., isolated from mangrove soil showing similar metabolic capabilities, but distinct secondary metabolites profiles.}, journal = {Archives of microbiology}, volume = {205}, number = {4}, pages = {148}, pmid = {36991151}, issn = {1432-072X}, abstract = {A novel actinomycete, designated strain S1-112[ T], was isolated from a mangrove soil sample from Hainan, China, and characterized using a polyphasic approach. Strain S1-112[ T] showed the highest similarity of the 16S rRNA gene to Streptomonospora nanhaiensis 12A09[T] (99.24%). Their close relationship was further supported by phylogenetic analyses, which placed these two strains within a stable clade. The highest values of digital DNA-DNA hybridization (dDDH, 41.4%) and average nucleotide identity (ANI, 90.55%) were detected between strain S1-112[ T] and Streptomonospora halotolerans NEAU-Jh2-17[ T]. Genotypic and phenotypic characteristics demonstrated that strain S1-112[ T] could be distinguished from its closely related relatives. We also profiled the pan-genome and metabolic features of genomic assemblies of strains belonging to the genus Streptomonospora, indicating similar functional capacities and metabolic activities. However, all of these strains showed promising potential for producing diverse types of secondary metabolites. In conclusion, strain S1-112[ T] represents a novel species of the genus Streptomonospora, for which the name Streptomonospora mangrovi sp. nov. was proposed. The type strain is S1-112[ T] (= JCM 34292[ T]).}, }
@article {pmid36982787, year = {2023}, author = {Karetnikov, DI and Vasiliev, GV and Toshchakov, SV and Shmakov, NA and Genaev, MA and Nesterov, MA and Ibragimova, SM and Rybakov, DA and Gavrilenko, TA and Salina, EA and Patrushev, MV and Kochetov, AV and Afonnikov, DA}, title = {Analysis of Genome Structure and Its Variations in Potato Cultivars Grown in Russia.}, journal = {International journal of molecular sciences}, volume = {24}, number = {6}, pages = {}, doi = {10.3390/ijms24065713}, pmid = {36982787}, issn = {1422-0067}, abstract = {Solanum tuberosum L. (common potato) is one of the most important crops produced almost all over the world. Genomic sequences of potato opens the way for studying the molecular variations related to diversification. We performed a reconstruction of genomic sequences for 15 tetraploid potato cultivars grown in Russia using short reads. Protein-coding genes were identified; conserved and variable parts of pan-genome and the repertoire of the NBS-LRR genes were characterized. For comparison, we used additional genomic sequences for twelve South American potato accessions, performed analysis of genetic diversity, and identified the copy number variations (CNVs) in two these groups of potato. Genomes of Russian potato cultivars were more homogeneous by CNV characteristics and have smaller maximum deletion size in comparison with South American ones. Genes with different CNV occurrences in two these groups of potato accessions were identified. We revealed genes of immune/abiotic stress response, transport and five genes related to tuberization and photoperiod control among them. Four genes related to tuberization and photoperiod were investigated in potatoes previously (phytochrome A among them). A novel gene, homologous to the poly(ADP-ribose) glycohydrolase (PARG) of Arabidopsis, was identified that may be involved in circadian rhythm control and contribute to the acclimatization processes of Russian potato cultivars.}, }
@article {pmid36981047, year = {2023}, author = {Wartha, S and Bretschneider, N and Dangel, A and Hobmaier, B and Hörmansdorfer, S and Huber, I and Murr, L and Pavlovic, M and Sprenger, A and Wenning, M and Alter, T and Messelhäußer, U}, title = {Genetic Characterization of Listeria from Food of Non-Animal Origin Products and from Producing and Processing Companies in Bavaria, Germany.}, journal = {Foods (Basel, Switzerland)}, volume = {12}, number = {6}, pages = {}, doi = {10.3390/foods12061120}, pmid = {36981047}, issn = {2304-8158}, abstract = {Reported cases of listeriosis from food of non-animal origin (FNAO) are increasing. In order to assess the risk of exposure to Listeria monocytogenes from FNAO, the genetic characterization of the pathogen in FNAO products and in primary production and processing plants needs to be investigated. For this, 123 samples of fresh and frozen soft fruit and 407 samples of 39 plants in Bavaria, Germany that produce and process FNAO were investigated for Listeria contamination. As a result, 64 Listeria spp. isolates were detected using ISO 11290-1:2017. Environmental swabs and water and food samples were investigated. L. seeligeri (36/64, 56.25%) was the most frequently identified species, followed by L. monocytogenes (8/64, 12.50%), L. innocua (8/64, 12.50%), L. ivanovii (6/64, 9.38%), L. newyorkensis (5/64, 7.81%), and L. grayi (1/64, 1.56%). Those isolates were subsequently sequenced by whole-genome sequencing and subjected to pangenome analysis to retrieve data on the genotype, serotype, antimicrobial resistance (AMR), and virulence markers. Eight out of sixty-four Listeria spp. isolates were identified as L. monocytogenes. The serogroup analysis detected that 62.5% of the L. monocytogenes isolates belonged to serogroup IIa (1/2a and 3a) and 37.5% to serogroup IVb (4b, 4d, and 4e). Furthermore, the MLST (multilocus sequence typing) analysis of the eight detected L. monocytogenes isolates identified seven different sequence types (STs) and clonal complexes (CCs), i.e., ST1/CC1, ST2/CC2, ST6/CC6, ST7/CC7, ST21/CC21, ST504/CC475, and ST1413/CC739. The core genome MLST analysis also showed high allelic differences and suggests plant-specific isolates. Regarding the AMR, we detected phenotypic resistance against benzylpenicillin, fosfomycin, and moxifloxacin in all eight L. monocytogenes isolates. Moreover, virulence factors, such as prfA, hly, plcA, plcB, hpt, actA, inlA, inlB, and mpl, were identified in pathogenic and nonpathogenic Listeria species. The significance of L. monocytogenes in FNAO is growing and should receive increasing levels of attention.}, }
@article {pmid36980919, year = {2023}, author = {Weltzer, ML and Wall, D}, title = {Social Diversification Driven by Mobile Genetic Elements.}, journal = {Genes}, volume = {14}, number = {3}, pages = {}, doi = {10.3390/genes14030648}, pmid = {36980919}, issn = {2073-4425}, abstract = {Social diversification in microbes is an evolutionary process where lineages bifurcate into distinct populations that cooperate with themselves but not with other groups. In bacteria, this is frequently driven by horizontal transfer of mobile genetic elements (MGEs). Here, the resulting acquisition of new genes changes the recipient's social traits and consequently how they interact with kin. These changes include discriminating behaviors mediated by newly acquired effectors. Since the producing cell is protected by cognate immunity factors, these selfish elements benefit from selective discrimination against recent ancestors, thus facilitating their proliferation and benefiting the host. Whether social diversification benefits the population at large is less obvious. The widespread use of next-generation sequencing has recently provided new insights into population dynamics in natural habitats and the roles MGEs play. MGEs belong to accessory genomes, which often constitute the majority of the pangenome of a taxon, and contain most of the kin-discriminating loci that fuel rapid social diversification. We further discuss mechanisms of diversification and its consequences to populations and conclude with a case study involving myxobacteria.}, }
@article {pmid36979037, year = {2023}, author = {Sedeek, AM and Salah, I and Kamel, HL and Soltan, MA and Nour, E and Alshammari, A and Riaz Rajoka, MS and Elsayed, TR}, title = {Genome-Based Analysis of the Potential Bioactivity of the Terrestrial Streptomyces vinaceusdrappus Strain AC-40.}, journal = {Biology}, volume = {12}, number = {3}, pages = {}, doi = {10.3390/biology12030345}, pmid = {36979037}, issn = {2079-7737}, abstract = {Streptomyces are factories of antimicrobial secondary metabolites. We isolated a Streptomyces species associated with the Pelargonium graveolens rhizosphere. Its total metabolic extract exhibited potent antibacterial and antifungal properties against all the tested pathogenic microbes. Whole genome sequencing and genome analyses were performed to take a look at its main characteristics and to reconstruct the metabolic pathways that can be associated with biotechnologically useful traits. AntiSMASH was used to identify the secondary metabolite gene clusters. In addition, we searched for known genes associated with plant growth-promoting characteristics. Finally, a comparative and pan-genome analysis with three closely related genomes was conducted. It was identified as Streptomyces vinaceusdrappus strain AC-40. Genome mining indicated the presence of several secondary metabolite gene clusters. Some of them are identical or homologs to gene clusters of known metabolites with antimicrobial, antioxidant, and other bioactivities. It also showed the presence of several genes related to plant growth promotion traits. The comparative genome analysis indicated that at least five of these gene clusters are highly conserved through rochei group genomes. The genotypic and phenotypic characteristics of S. vinaceusdrappus strain AC-40 indicate that it is a promising source of beneficial secondary metabolites with pharmaceutical and biotechnological applications.}, }
@article {pmid36975929, year = {2023}, author = {Lu, W and Zhang, T and Zhang, Q and Zhang, N and Jia, L and Ma, S and Xia, Q}, title = {FibH Gene Complete Sequences (FibHome) Revealed Silkworm Pedigree.}, journal = {Insects}, volume = {14}, number = {3}, pages = {}, doi = {10.3390/insects14030244}, pmid = {36975929}, issn = {2075-4450}, abstract = {The highly repetitive and variable fibroin heavy chain (FibH) gene can be used as a silkworm identification; however, only a few complete FibH sequences are known. In this study, we extracted and examined 264 FibH gene complete sequences (FibHome) from a high-resolution silkworm pan-genome. The average FibH lengths of the wild silkworm, local, and improved strains were 19,698 bp, 16,427 bp, and 15,795 bp, respectively. All FibH sequences had a conserved 5' and 3' terminal non-repetitive (5' and 3' TNR, 99.74% and 99.99% identity, respectively) sequence and a variable repetitive core (RC). The RCs differed greatly, but they all shared the same motif. During domestication or breeding, the FibH gene mutated with hexanucleotide (GGTGCT) as the core unit. Numerous variations existed that were not unique to wild and domesticated silkworms. However, the transcriptional factor binding sites, such as fibroin modulator-binding protein, were highly conserved and had 100% identity in the FibH gene's intron and upstream sequences. The local and improved strains with the same FibH gene were divided into four families using this gene as a marker. Family I contained a maximum of 62 strains with the optional FibH (Opti-FibH, 15,960 bp) gene. This study provides new insights into FibH variations and silkworm breeding.}, }
@article {pmid36969737, year = {2022}, author = {Baaijens, JA and Bonizzoni, P and Boucher, C and Della Vedova, G and Pirola, Y and Rizzi, R and Sirén, J}, title = {Computational graph pangenomics: a tutorial on data structures and their applications.}, journal = {Natural computing}, volume = {21}, number = {1}, pages = {81-108}, pmid = {36969737}, issn = {1567-7818}, abstract = {Computational pangenomics is an emerging research field that is changing the way computer scientists are facing challenges in biological sequence analysis. In past decades, contributions from combinatorics, stringology, graph theory and data structures were essential in the development of a plethora of software tools for the analysis of the human genome. These tools allowed computational biologists to approach ambitious projects at population scale, such as the 1000 Genomes Project. A major contribution of the 1000 Genomes Project is the characterization of a broad spectrum of genetic variations in the human genome, including the discovery of novel variations in the South Asian, African and European populations-thus enhancing the catalogue of variability within the reference genome. Currently, the need to take into account the high variability in population genomes as well as the specificity of an individual genome in a personalized approach to medicine is rapidly pushing the abandonment of the traditional paradigm of using a single reference genome. A graph-based representation of multiple genomes, or a graph pangenome, is replacing the linear reference genome. This means completely rethinking well-established procedures to analyze, store, and access information from genome representations. Properly addressing these challenges is crucial to face the computational tasks of ambitious healthcare projects aiming to characterize human diversity by sequencing 1M individuals (Stark et al. 2019). This tutorial aims to introduce readers to the most recent advances in the theory of data structures for the representation of graph pangenomes. We discuss efficient representations of haplotypes and the variability of genotypes in graph pangenomes, and highlight applications in solving computational problems in human and microbial (viral) pangenomes.}, }
@article {pmid36968469, year = {2023}, author = {Rehman, MNU and Dawar, FU and Zeng, J and Fan, L and Feng, W and Wang, M and Yang, N and Guo, G and Zheng, J}, title = {Complete genome sequence analysis of Edwardsiella tarda SC002 from hatchlings of Siamese crocodile.}, journal = {Frontiers in veterinary science}, volume = {10}, number = {}, pages = {1140655}, pmid = {36968469}, issn = {2297-1769}, abstract = {Edwardsiella tarda is a Gram-negative, facultative anaerobic rod-shaped bacterium and the causative agent of the systemic disease "Edwardsiellosis". It is commonly prevalent in aquatic organisms with subsequent economic loss and hence has attracted increasing attention from researchers. In this study, we investigated the complete genome sequence of a highly virulent isolate Edwardsiella tarda SC002 isolated from hatchlings of the Siamese crocodile. The genome of SC002 consisted of one circular chromosome of length 3,662,469 bp with a 57.29% G+C content and four novel plasmids. A total of 3,734 protein-coding genes, 12 genomic islands (GIs), 7 prophages, 48 interspersed repeat sequences, 248 tandem repeat sequences, a CRISPR component with a total length of 175 bp, and 171 ncRNAs (tRNA = 106, sRNA = 37, and rRNA = 28) were predicted. In addition, the coding genes of assembled genome were successfully annotated against eight general databases (NR = 3,618/3,734, COG = 2,947/3,734, KEGG = 3,485/3,734, SWISS-PROT = 2,787/3,734, GO = 2,648/3,734, Pfam = 2,648/3,734, CAZy = 130/3,734, and TCDB = 637/3,734) and four pathogenicity-related databases (ARDB = 11/3,734, CARD = 142/3,734, PHI = 538/3,734, and VFDB = 315/3,734). Pan-genome and comparative genome analyses of the complete sequenced genomes confirmed their evolutionary relationships. The present study confirmed that E. tarda SC002 is a potential pathogen bearing a bulk amount of antibiotic resistance, virulence, and pathogenic genes and its open pan-genome may enhance its host range in the future.}, }
@article {pmid36968185, year = {2023}, author = {Zhou, H and Yan, F and Hao, F and Ye, H and Yue, M and Woeste, K and Zhao, P and Zhang, S}, title = {Pan-genome and transcriptome analyses provide insights into genomic variation and differential gene expression profiles related to disease resistance and fatty acid biosynthesis in eastern black walnut (Juglans nigra).}, journal = {Horticulture research}, volume = {10}, number = {3}, pages = {uhad015}, pmid = {36968185}, issn = {2662-6810}, abstract = {Walnut (Juglans) species are used as nut crops worldwide. Eastern black walnut (EBW, Juglans nigra), a diploid, horticultural important woody species is native to much of eastern North America. Although it is highly valued for its wood and nut, there are few resources for understanding EBW genetics. Here, we present a high-quality genome assembly of J. nigra based on Illumina, Pacbio, and Hi-C technologies. The genome size was 540.8 Mb, with a scaffold N50 size of 35.1 Mb, and 99.0% of the assembly was anchored to 16 chromosomes. Using this genome as a reference, the resequencing of 74 accessions revealed the effective population size of J. nigra declined during the glacial maximum. A single whole-genome duplication event was identified in the J. nigra genome. Large syntenic blocks among J. nigra, Juglans regia, and Juglans microcarpa predominated, but inversions of more than 600 kb were identified. By comparing the EBW genome with those of J. regia and J. microcarpa, we detected InDel sizes of 34.9 Mb in J. regia and 18.3 Mb in J. microcarpa, respectively. Transcriptomic analysis of differentially expressed genes identified five presumed NBS-LRR (NUCLEOTIDE BINDING SITE-LEUCINE-RICH REPEAT) genes were upregulated during the development of walnut husks and shells compared to developing embryos. We also identified candidate genes with essential roles in seed oil synthesis, including FAD (FATTY ACID DESATURASE) and OLE (OLEOSIN). Our work advances the understanding of fatty acid bioaccumulation and disease resistance in nut crops, and also provides an essential resource for conducting genomics-enabled breeding in walnut.}, }
@article {pmid36966465, year = {2023}, author = {Velt, A and Frommer, B and Blanc, S and Holtgräwe, D and Duchêne, É and Dumas, V and Grimplet, J and Hugueney, P and Kim, C and Lahaye, M and Matus, JT and Navarro-Payá, D and Orduña, L and Tello-Ruiz, MK and Vitulo, N and Ware, D and Rustenholz, C}, title = {An improved reference of the grapevine genome reasserts the origin of the PN40024 highly-homozygous genotype.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkad067}, pmid = {36966465}, issn = {2160-1836}, abstract = {The genome sequence of the diploid and highly homozygous V. vinifera genotype PN40024 serves as the reference for many grapevine studies. Despite several improvements to the PN40024 genome assembly, its current version PN12X.v2 is quite fragmented and only represents the haploid state of the genome with mixed haplotypes. In fact, being nearly homozygous, this genome contains several heterozygous regions that are yet to be resolved. Taking the opportunity of improvements that long-read sequencing technologies offer to fully discriminate haplotype sequences, an improved version of the reference, called PN40024.v4, was generated. Through incorporating long genomic sequencing reads to the assembly, the continuity of the 12X.v2 scaffolds was highly increased with a total number decreasing from 2,059 to 640 and a reduction in N bases of 88%. Additionally, the full alternative haplotype sequence was built for the first time, the chromosome anchoring was improved and the number of unplaced scaffolds was reduced by half. To obtain a high-quality gene annotation that outperforms previous versions, a liftover approach was complemented with an optimized annotation workflow for Vitis. Integration of the gene reference catalogue and its manual curation have also assisted in improving the annotation, while defining the most reliable estimation of 35,230 genes to date. Finally, we demonstrated that PN40024 resulted from nine selfings of cv. 'Helfensteiner' (cross of cv. 'Pinot noir' and 'Schiava grossa') instead of a single 'Pinot noir'. These advances will help maintain the PN40024 genome as a gold-standard reference, also contributing towards the eventual elaboration of the grapevine pangenome.}, }
@article {pmid36966359, year = {2023}, author = {Yu, Z and Chen, Y and Zhou, Y and Zhang, Y and Li, M and Ouyang, Y and Chebotarov, D and Mauleon, R and Zhao, H and Xie, W and McNally, KL and Wing, RA and Guo, W and Zhang, J}, title = {Rice Gene Index (RGI): a comprehensive pan-genome database for comparative and functional genomics of Asian rice.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2023.03.012}, pmid = {36966359}, issn = {1752-9867}, }
@article {pmid36961900, year = {2023}, author = {Rubio, A and Sprang, M and Garzón, A and Moreno-Rodriguez, A and Pachón-Ibáñez, ME and Pachón, J and Andrade-Navarro, MA and Pérez-Pulido, AJ}, title = {Analysis of bacterial pangenomes reduces CRISPR dark matter and reveals strong association between membranome and CRISPR-Cas systems.}, journal = {Science advances}, volume = {9}, number = {12}, pages = {eadd8911}, doi = {10.1126/sciadv.add8911}, pmid = {36961900}, issn = {2375-2548}, abstract = {CRISPR-Cas systems are prokaryotic acquired immunity mechanisms, which are found in 40% of bacterial genomes. They prevent viral infections through small DNA fragments called spacers. However, the vast majority of these spacers have not yet been associated with the virus they recognize, and it has been named CRISPR dark matter. By analyzing the spacers of tens of thousands of genomes from six bacterial species, we have been able to reduce the CRISPR dark matter from 80% to as low as 15% in some of the species. In addition, we have observed that, when a genome presents CRISPR-Cas systems, this is accompanied by particular sets of membrane proteins. Our results suggest that when bacteria present membrane proteins that make it compete better in its environment and these proteins are, in turn, receptors for specific phages, they would be forced to acquire CRISPR-Cas.}, }
@article {pmid36961866, year = {2023}, author = {Matlock, W and Lipworth, S and Chau, KK and AbuOun, M and Barker, L and Kavanagh, J and Andersson, M and Oakley, S and Morgan, M and Crook, DW and Read, DS and Anjum, M and Shaw, LP and Stoesser, N and , }, title = {Enterobacterales plasmid sharing amongst human bloodstream infections, livestock, wastewater, and waterway niches in Oxfordshire, UK.}, journal = {eLife}, volume = {12}, number = {}, pages = {}, doi = {10.7554/eLife.85302}, pmid = {36961866}, issn = {2050-084X}, support = {MRF-145-0004-TPG-AVISO/MRF/MRF/United Kingdom ; }, abstract = {Plasmids enable the dissemination of antimicrobial resistance (AMR) in common Enterobacterales pathogens, representing a major public health challenge. However, the extent of plasmid sharing and evolution between Enterobacterales causing human infections and other niches remains unclear, including the emergence of resistance plasmids. Dense, unselected sampling is highly relevant to developing our understanding of plasmid epidemiology and designing appropriate interventions to limit the emergence and dissemination of plasmid-associated AMR. We established a geographically and temporally restricted collection of human bloodstream infection (BSI)-associated, livestock-associated (cattle, pig, poultry, and sheep faeces, farm soils) and wastewater treatment work (WwTW)-associated (influent, effluent, waterways upstream/downstream of effluent outlets) Enterobacterales. Isolates were collected between 2008-2020 from sites <60km apart in Oxfordshire, UK. Pangenome analysis of plasmid clusters revealed shared 'backbones', with phylogenies suggesting an intertwined ecology where well-conserved plasmid backbones carry diverse accessory functions, including AMR genes. Many plasmid 'backbones' were seen across species and niches, raising the possibility that plasmid movement between these followed by rapid accessory gene change could be relatively common. Overall, the signature of identical plasmid sharing is likely to be a highly transient one, implying that plasmid movement might be occurring at greater rates than previously estimated, raising a challenge for future genomic One Health studies.}, }
@article {pmid36961505, year = {2023}, author = {Delgado-Blas, JF and Ovejero, CM and David, S and Serna, C and Pulido-Vadillo, M and Montero, N and Aanensen, DM and Abadia-Patiño, L and Gonzalez-Zorn, B}, title = {Global scenario of the RmtE pan-aminoglycoside-resistance mechanism: emergence of the rmtE4 gene in South America associated with a hospital-related IncL plasmid.}, journal = {Microbial genomics}, volume = {9}, number = {3}, pages = {}, doi = {10.1099/mgen.0.000946}, pmid = {36961505}, issn = {2057-5858}, abstract = {Antimicrobial resistance (AMR) mechanisms, especially those conferring resistance to critically important antibiotics, are a great concern for public health. 16S rRNA methyltransferases (16S-RMTases) abolish the effectiveness of most clinically used aminoglycosides, but some of them are considered sporadic, such as RmtE. The main goals of this work were the genomic analysis of bacteria producing 16S-RMTases from a 'One Health' perspective in Venezuela, and the study of the epidemiological and evolutionary scenario of RmtE variants and their related mobile genetic elements (MGEs) worldwide. A total of 21 samples were collected in 2014 from different animal and environmental sources in the Cumaná region (Venezuela). Highly aminoglycoside-resistant Enterobacteriaceae isolates were selected, identified and screened for 16S-RMTase genes. Illumina and Nanopore whole-genome sequencing data were combined to obtain hybrid assemblies and analyse their sequence type, resistome, plasmidome and pan-genome. Genomic collections of rmtE variants and their associated MGEs were generated to perform epidemiological and phylogenetic analyses. A single 16S-RMTase, the novel RmtE4, was identified in five Klebsiella isolates from wastewater samples of Cumaná. This variant possessed three amino acid modifications with respect to RmtE1-3 (Asn152Asp, Val216Ile and Lys267Ile), representing the most genetic distant among all known and novel variants described in this work, and the second most prevalent. rmtE variants were globally spread, and their geographical distribution was determined by the associated MGEs and the carrying bacterial species. Thus, rmtE4 was found to be confined to Klebsiella isolates from South America, where it was closely related to ISVsa3 and an uncommon IncL plasmid related with hospital environments. This work uncovered the global scenario of RmtE and the existence of RmtE4, which could potentially emerge from South America. Surveillance and control measures should be developed based on these findings in order to prevent the dissemination of this AMR mechanism and preserve public health worldwide.}, }
@article {pmid36958270, year = {2023}, author = {Botelho, J and Tüffers, L and Fuss, J and Buchholz, F and Utpatel, C and Klockgether, J and Niemann, S and Tümmler, B and Schulenburg, H}, title = {Phylogroup-specific variation shapes the clustering of antimicrobial resistance genes and defence systems across regions of genome plasticity in Pseudomonas aeruginosa.}, journal = {EBioMedicine}, volume = {90}, number = {}, pages = {104532}, doi = {10.1016/j.ebiom.2023.104532}, pmid = {36958270}, issn = {2352-3964}, abstract = {BACKGROUND: Pseudomonas aeruginosa is an opportunistic pathogen consisting of three phylogroups (hereafter named A, B, and C). Here, we assessed phylogroup-specific evolutionary dynamics across available and also new P. aeruginosa genomes.
METHODS: In this genomic analysis, we first generated new genome assemblies for 18 strains of the major P. aeruginosa clone type (mPact) panel, comprising a phylogenetically diverse collection of clinical and environmental isolates for this species. Thereafter, we combined these new genomes with 1991 publicly available P. aeruginosa genomes for a phylogenomic and comparative analysis. We specifically explored to what extent antimicrobial resistance (AMR) genes, defence systems, and virulence genes vary in their distribution across regions of genome plasticity (RGPs) and "masked" (RGP-free) genomes, and to what extent this variation differs among the phylogroups.
FINDINGS: We found that members of phylogroup B possess larger genomes, contribute a comparatively larger number of pangenome families, and show lower abundance of CRISPR-Cas systems. Furthermore, AMR and defence systems are pervasive in RGPs and integrative and conjugative/mobilizable elements (ICEs/IMEs) from phylogroups A and B, and the abundance of these cargo genes is often significantly correlated. Moreover, inter- and intra-phylogroup interactions occur at the accessory genome level, suggesting frequent recombination events. Finally, we provide here the mPact panel of diverse P. aeruginosa strains that may serve as a valuable reference for functional analyses.
INTERPRETATION: Altogether, our results highlight distinct pangenome characteristics of the P. aeruginosa phylogroups, which are possibly influenced by variation in the abundance of CRISPR-Cas systems and are shaped by the differential distribution of other defence systems and AMR genes.
FUNDING: German Science Foundation, Max-Planck Society, Leibniz ScienceCampus Evolutionary Medicine of the Lung, BMBF program Medical Infection Genomics, Kiel Life Science Postdoc Award.}, }
@article {pmid36952017, year = {2023}, author = {Boden, SA and McIntosh, RA and Uauy, C and Krattinger, SG and Dubcovsky, J and Rogers, WJ and Xia, XC and Badaeva, ED and Bentley, AR and Brown-Guedira, G and Caccamo, M and Cattivelli, L and Chhuneja, P and Cockram, J and Contreras-Moreira, B and Dreisigacker, S and Edwards, D and González, FG and Guzmán, C and Ikeda, TM and Karsai, I and Nasuda, S and Pozniak, C and Prins, R and Sen, TZ and Silva, P and Simkova, H and Zhang, Y and , }, title = {Updated guidelines for gene nomenclature in wheat.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {136}, number = {4}, pages = {72}, pmid = {36952017}, issn = {1432-2242}, support = {BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/OS/NW/000016/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P010741/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Here, we provide an updated set of guidelines for naming genes in wheat that has been endorsed by the wheat research community. The last decade has seen a proliferation in genomic resources for wheat, including reference- and pan-genome assemblies with gene annotations, which provide new opportunities to detect, characterise, and describe genes that influence traits of interest. The expansion of genetic information has supported growth of the wheat research community and catalysed strong interest in the genes that control agronomically important traits, such as yield, pathogen resistance, grain quality, and abiotic stress tolerance. To accommodate these developments, we present an updated set of guidelines for gene nomenclature in wheat. These guidelines can be used to describe loci identified based on morphological or phenotypic features or to name genes based on sequence information, such as similarity to genes characterised in other species or the biochemical properties of the encoded protein. The updated guidelines provide a flexible system that is not overly prescriptive but provides structure and a common framework for naming genes in wheat, which may be extended to related cereal species. We propose these guidelines be used henceforth by the wheat research community to facilitate integration of data from independent studies and allow broader and more efficient use of text and data mining approaches, which will ultimately help further accelerate wheat research and breeding.}, }
@article {pmid36946261, year = {2023}, author = {Liang, Q and Muñoz-Amatriaín, M and Shu, S and Lo, S and Wu, X and Carlson, JW and Davidson, P and Goodstein, DM and Phillips, J and Janis, NM and Lee, EJ and Liang, C and Morrell, PL and Farmer, AD and Xu, P and Close, TJ and Lonardi, S}, title = {A view of the pan-genome of domesticated Cowpea (Vigna unguiculata [L.] Walp.).}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20319}, doi = {10.1002/tpg2.20319}, pmid = {36946261}, issn = {1940-3372}, abstract = {Cowpea, Vigna unguiculata L. Walp., is a diploid warm-season legume of critical importance as both food and fodder in sub-Saharan Africa. This species is also grown in Northern Africa, Europe, Latin America, North America, and East to Southeast Asia. To capture the genomic diversity of domesticates of this important legume, de novo genome assemblies were produced for representatives of six subpopulations of cultivated cowpea identified previously from genotyping of several hundred diverse accessions. In the most complete assembly (IT97K-499-35), 26,026 core and 4963 noncore genes were identified, with 35,436 pan genes when considering all seven accessions. GO terms associated with response to stress and defense response were highly enriched among the noncore genes, while core genes were enriched in terms related to transcription factor activity, and transport and metabolic processes. Over 5 million single nucleotide polymorphisms (SNPs) relative to each assembly and over 40 structural variants >1 Mb in size were identified by comparing genomes. Vu10 was the chromosome with the highest frequency of SNPs, and Vu04 had the most structural variants. Noncore genes harbor a larger proportion of potentially disruptive variants than core genes, including missense, stop gain, and frameshift mutations; this suggests that noncore genes substantially contribute to diversity within domesticated cowpea.}, }
@article {pmid36944612, year = {2023}, author = {Zhou, Y and Yu, Z and Chebotarov, D and Chougule, K and Lu, Z and Rivera, LF and Kathiresan, N and Al-Bader, N and Mohammed, N and Alsantely, A and Mussurova, S and Santos, J and Thimma, M and Troukhan, M and Fornasiero, A and Green, CD and Copetti, D and Kudrna, D and Llaca, V and Lorieux, M and Zuccolo, A and Ware, D and McNally, K and Zhang, J and Wing, RA}, title = {Pan-genome inversion index reveals evolutionary insights into the subpopulation structure of Asian rice.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {1567}, pmid = {36944612}, issn = {2041-1723}, abstract = {Understanding and exploiting genetic diversity is a key factor for the productive and stable production of rice. Here, we utilize 73 high-quality genomes that encompass the subpopulation structure of Asian rice (Oryza sativa), plus the genomes of two wild relatives (O. rufipogon and O. punctata), to build a pan-genome inversion index of 1769 non-redundant inversions that span an average of ~29% of the O. sativa cv. Nipponbare reference genome sequence. Using this index, we estimate an inversion rate of ~700 inversions per million years in Asian rice, which is 16 to 50 times higher than previously estimated for plants. Detailed analyses of these inversions show evidence of their effects on gene expression, recombination rate, and linkage disequilibrium. Our study uncovers the prevalence and scale of large inversions (≥100 bp) across the pan-genome of Asian rice and hints at their largely unexplored role in functional biology and crop performance.}, }
@article {pmid36944262, year = {2023}, author = {Milner, DS and Galindo, LJ and Irwin, NAT and Richards, TA}, title = {Transporter Proteins as Ecological Assets and Features of Microbial Eukaryotic Pangenomes.}, journal = {Annual review of microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1146/annurev-micro-032421-115538}, pmid = {36944262}, issn = {1545-3251}, abstract = {Here we review two connected themes in evolutionary microbiology: (a) the nature of gene repertoire variation within species groups (pangenomes) and (b) the concept of metabolite transporters as accessory proteins capable of providing niche-defining "bolt-on" phenotypes. We discuss the need for improved sampling and understanding of pangenome variation in eukaryotic microbes. We then review the functional factors that shape the repertoire of accessory genes within pangenomes. As part of this discussion, we outline how gene duplication is a key factor in both eukaryotic pangenome variation and transporter gene family evolution. We go on to outline how, through functional characterization of transporter-encoding genes, in combination with analyses of how transporter genes are gained and lost from accessory genomes, we can reveal much about the niche range, ecology, and the evolution of virulence of microbes. We advocate for the coordinated systematic study of eukaryotic pangenomes through genome sequencing and the functional analysis of genes found within the accessory gene repertoire. Expected final online publication date for the Annual Review of Microbiology, Volume 77 is September 2023. Please see http://www.annualreviews.org/page/journal/pubdates for revised estimates.}, }
@article {pmid36943133, year = {2023}, author = {Kronen, M and Vázquez-Campos, X and Wilkins, MR and Lee, M and Manefield, MJ}, title = {Evidence for a Putative Isoprene Reductase in Acetobacterium wieringae.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0011923}, doi = {10.1128/msystems.00119-23}, pmid = {36943133}, issn = {2379-5077}, abstract = {Recent discoveries of isoprene-metabolizing microorganisms suggest they might play an important role in the global isoprene budget. Under anoxic conditions, isoprene can be used as an electron acceptor and is reduced to methylbutene. This study describes the proteogenomic profiling of an isoprene-reducing bacterial culture to identify organisms and genes responsible for the isoprene hydrogenation reaction. A metagenome-assembled genome (MAG) of the most abundant (89% relative abundance) lineage in the enrichment, Acetobacterium wieringae, was obtained. Comparative proteogenomics and reverse transcription-PCR (RT-PCR) identified a putative five-gene operon from the A. wieringae MAG upregulated during isoprene reduction. The operon encodes a putative oxidoreductase, three pleiotropic nickel chaperones (2 × HypA, HypB), and one 4Fe-4S ferredoxin. The oxidoreductase is proposed as the putative isoprene reductase with a binding site for NADH, flavin adenine dinucleotide (FAD), two pairs of canonical [4Fe-4S] clusters, and a putative iron-sulfur cluster site in a Cys6-bonding environment. Well-studied Acetobacterium strains, such as A. woodii DSM 1030, A. wieringae DSM 1911, or A. malicum DSM 4132, do not encode the isoprene-regulated operon but encode, like many other bacteria, a homolog of the putative isoprene reductase (~47 to 49% amino acid sequence identity). Uncharacterized homologs of the putative isoprene reductase are observed across the Firmicutes, Spirochaetes, Tenericutes, Actinobacteria, Chloroflexi, Bacteroidetes, and Proteobacteria, suggesting the ability of biohydrogenation of unfunctionalized conjugated doubled bonds in other unsaturated hydrocarbons. IMPORTANCE Isoprene was recently shown to act as an electron acceptor for a homoacetogenic bacterium. The focus of this study is the molecular basis for isoprene reduction. By comparing a genome from our isoprene-reducing enrichment culture, dominated by Acetobacterium wieringae, with genomes of other Acetobacterium lineages that do not reduce isoprene, we shortlisted candidate genes for isoprene reduction. Using comparative proteogenomics and reverse transcription-PCR we have identified a putative five-gene operon encoding an oxidoreductase referred to as putative isoprene reductase.}, }
@article {pmid36941438, year = {2023}, author = {Zheng, X and Xu, S and Wang, Z and Tao, X and Liu, Y and Dai, L and Li, Y and Zhang, W}, title = {Sifting through the core-genome to identify putative cross-protective antigens against Riemerella anatipestifer.}, journal = {Applied microbiology and biotechnology}, volume = {}, number = {}, pages = {}, pmid = {36941438}, issn = {1432-0614}, abstract = {Infectious serositis of ducks, caused by Riemerella anatipestifer, is one of the main infectious diseases that harm commercial ducks. Whole-strain-based vaccines with no or few cross-protection were observed between different serotypes of R. anatipestifer, and so far, control of infection is hampered by a lack of effective vaccines, especially subunit vaccines with cross-protection. Since the concept of reverse vaccinology was introduced, it has been widely used to screen for protective antigens in important pathogens. In this study, pan-genome binding reverse vaccinology, an emerging approach to vaccine candidate screening, was used to screen for cross-protective antigens against R. anatipestifer. Thirty proteins were identified from the core-genome as potential cross-protective antigens. Three of these proteins were recombinantly expressed, and their immunoreactivity with five antisera (anti-serotypes 1, 2, 6, 10, and 11) was demonstrated by Western blotting. Our study established a method for high-throughput screening of cross-protective antigens against R. anatipestifer in silico, which will lay the foundation for the development of a cross-protective subunit vaccine controlling R. anatipestifer infection. KEY POINTS: • Pan-genome binding reverse vaccine approach was first established in R. anatipestifer to screen for subunit vaccine candidates. • Thirty potential cross-protective antigens against R. anatipestifer were identified by this method. • The reliability of the method was verified preliminarily by the results of Western blotting of three of these potential antigens.}, }
@article {pmid36938359, year = {2023}, author = {Nguyen, HN and Sharp, GM and Stahl-Rommel, S and Velez Justiniano, YA and Castro, CL and Nelman-Gonzalez, M and O'Rourke, A and Lee, MD and Williamson, J and McCool, C and Crucian, B and Clark, KW and Jain, M and Castro-Wallace, SL}, title = {Microbial isolation and characterization from two flex lines from the urine processor assembly onboard the international space station.}, journal = {Biofilm}, volume = {5}, number = {}, pages = {100108}, pmid = {36938359}, issn = {2590-2075}, abstract = {Urine, humidity condensate, and other sources of non-potable water are processed onboard the International Space Station (ISS) by the Water Recovery System (WRS) yielding potable water. While some means of microbial control are in place, including a phosphoric acid/hexavalent chromium urine pretreatment solution, many areas within the WRS are not available for routine microbial monitoring. Due to refurbishment needs, two flex lines from the Urine Processor Assembly (UPA) within the WRS were removed and returned to Earth. The water from within these lines, as well as flush water, was microbially evaluated. Culture and culture-independent analysis revealed the presence of Burkholderia, Paraburkholderia, and Leifsonia. Fungal culture also identified Fusarium and Lecythophora. Hybrid de novo genome analysis of the five distinct Burkholderia isolates identified them as B. contaminans, while the two Paraburkholderia isolates were identified as P. fungorum. Chromate-resistance gene clusters were identified through pangenomic analysis that differentiated these genomes from previously studied isolates recovered from the point-of-use potable water dispenser and/or current NCBI references, indicating that unique populations exist within distinct niches in the WRS. Beyond genomic analysis, fixed samples directly from the lines were imaged by environmental scanning electron microscopy, which detailed networks of fungal-bacterial biofilms. This is the first evidence of biofilm formation within flex lines from the UPA onboard the ISS. For all bacteria isolated, biofilm potential was further characterized, with the B. contaminans isolates demonstrating the most considerable biofilm formation. Moreover, the genomes of the B. contaminans revealed secondary metabolite gene clusters associated with quorum sensing, biofilm formation, antifungal compounds, and hemolysins. The potential production of these gene cluster metabolites was phenotypically evaluated through biofilm, bacterial-fungal interaction, and hemolytic assays. Collectively, these data identify the UPA flex lines as a unique ecological niche and novel area of biofilm growth within the WRS. Further investigation of these organisms and their resistance profiles will enable engineering controls directed toward biofilm prevention in future space station water systems.}, }
@article {pmid36936699, year = {2023}, author = {Ali Alghamdi, B and Al-Johani, I and Al-Shamrani, JM and Musamed Alshamrani, H and Al-Otaibi, BG and Almazmomi, K and Yusnoraini Yusof, N}, title = {Antimicrobial resistance in methicillin-resistant staphylococcus aureus.}, journal = {Saudi journal of biological sciences}, volume = {30}, number = {4}, pages = {103604}, pmid = {36936699}, issn = {1319-562X}, abstract = {In the medical community, antibiotics are revered as a miracle because they stop diseases brought on by pathogenic bacteria. Antibiotics have become the cornerstone of contemporary medical advancements ever since penicillin was discovered. Antibiotic resistance developed among germs quickly, placing a strain in the medical field. Methicillin-resistant Staphylococcus aureus (MRSA), Since 1961, has emerged as the major general antimicrobial resistant bacteria (AMR) worldwide. MRSA can easily transmit across the hospital system and has mostly gained resistance to medications called beta-lactamases. This enzyme destroys the cell wall of beta-lactam antibiotics resulting in resistance against that respective antibiotic. Daptomycin, linezolid and vancomycin were previously used to treat MRSA infections. However, due to mutations and Single nucleotide polymorphisms (SNPs) in Open reading frames (ORFs) and SCCmec machinery of respective antibody, MRSA developed resistance against those antibiotics. The MRSA strains (USA300, CC398, CC130 etc.), when their pan-genomes were analyzed were found the genes involved in invoking resistance against the antibiotics as well as the epidemiology of that respective strain. PENC (penicillin plus potassium clavulanate) is the new antibiotic showing potential in treatment of MRSA though it is itself resistant against penicillin alone. In this review, our main focus is on mechanism of development of AMR in MRSA, how different ORFs are involved in evoking resistance in MRSA and what is the core-genome of different antimicrobial resistant MRSA.}, }
@article {pmid36935100, year = {2023}, author = {Khan, K and Jalal, K and Uddin, R}, title = {Pangenome profiling of novel drug target against vancomycin-resistant Enterococcus faecium.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-14}, doi = {10.1080/07391102.2023.2191134}, pmid = {36935100}, issn = {1538-0254}, abstract = {Enterococcus faecium is a frequent causative agent of nosocomial infection mainly acquired from outgoing hospital patients (Hospital Acquired Infection-HAIs). They are largely involved in the outbreaks of bacteremia, UTI, and endocarditis with a high transmissibility rate. The recent emergence of VRE strain (i.e. vancomycin resistant enterococcus) turned it into high priority pathogen for which new drug research is of dire need. Therefore, in current study, pangenome and resistome analyses were performed for available antibiotic-resistant genomes (n = 216) of E. faecium. It resulted in the prediction of around 5,059 genes as an accessory gene, 1,076 genes as core and 1,558 genes made up a unique genome fraction. Core genes common to all strains were further used for the identification of potent drug targets by applying subtractive genomics approach. Moreover, the COG functional analysis showed that these genomes are highly enriched in metabolic pathways such as in translational, ribosomal, proteins, carbohydrates and nucleotide transport metabolism. Through subtractive genomics it was observed that 431 proteins were non-homologous to the human proteome, 166 identified as essential for pathogen survival while 26 as potential and unique therapeutic targets. Finally, 3-dehydroquinate dehydrogenase was proposed as a potent drug target for further therapeutic candidate identification. Moreover, the molecular docking and dynamic simulation technique were applied to performed a virtual screening of natural product libraries (i.e., TCM and Ayurvedic compounds) along with 3-amino-4,5-dihydroxy-cyclohex-1-enecarboxylate (DHS) as a standard compound to validate the study. Consequently, Argeloside I, Apigenin-7-O-gentiobioside (from Ayurvedic library), ZINC85571062, and ZINC85570908 (TCM library) compounds were identified as potential inhibitors of 3-dehydroquinate dehydrogenase. The study proposed new compounds as novel therapeutics, however, further experimental validation is needed as a follow-up.Communicated by Ramaswamy H. Sarma.}, }
@article {pmid36928221, year = {2023}, author = {Juscamayta-López, E and Valdivia, F and Soto, MP and Nureña, B and Horna, H}, title = {A pangenome approach-based loop-mediated isothermal amplification assay for the specific and early detection of Bordetella pertussis.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {4356}, pmid = {36928221}, issn = {2045-2322}, abstract = {Despite widespread vaccination, Bordetella pertussis continues to cause pertussis infections worldwide, leaving infants at the highest risk of severe illness and death, while people around them are likely the main sources of infection and rapidly spread the disease. Rapid and less complex molecular testing for the specific and timely diagnosis of pertussis remains a challenge that could help to prevent the disease from worsening and prevent its transmission. We aimed to develop and validate a colorimetric loop-mediated isothermal amplification (LAMP) assay using a new target uvrD_2 informed by the pangenome for the specific and early detection of B. pertussis. Compared to that of multitarget quantitative polymerase chain reaction (multitarget qPCR) using a large clinical DNA specimen (n = 600), the diagnostic sensitivity and specificity of the uvrD_2 LAMP assay were 100.0% and 98.6%, respectively, with a 99.7% degree of agreement between the two assays. The novel colorimetric uvrD_2 LAMP assay is highly sensitive and specific for detecting B. pertussis DNA in nasopharyngeal swabs and showed similar diagnostic accuracy to complex and high-cost multitarget qPCR, but it is faster, simpler, and inexpensive, which makes it very helpful for the reliable and timely diagnosis of pertussis in primary health care and resource-limited settings.}, }
@article {pmid36925467, year = {2023}, author = {Deng, Y and Jiang, ZM and Han, XF and Su, J and Yu, LY and Liu, WH and Zhang, YQ}, title = {Pangenome analysis of the genus Herbiconiux and proposal of four new species associated with Chinese medicinal plants.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1119226}, pmid = {36925467}, issn = {1664-302X}, abstract = {Five Gram-stain-positive, aerobic, non-motile actinobacterial strains designated as CPCC 205763[T], CPCC 203386[T], CPCC 205716[T], CPCC 203406[T], and CPCC 203407 were obtained from different ecosystems associated with four kinds of Chinese traditional medicinal plants. The 16S rRNA gene sequences of these five strains showed closely related to members of the genus Herbiconiux of the family Microbacteriaceae, with the highest similarities of 97.4-99.7% to the four validly named species of Herbiconiux. In the phylogenetic trees based on 16S rRNA gene sequences and the core genome, these isolates clustered into the clade of the genus Herbiconiux within the lineage of the family Microbacteriaceae. The overall genome relatedness indexes (values of ANI and dDDH) and the phenotypic properties (morphological, physiological and chemotaxonomic characteristics) of these isolates, readily supported to affiliate them to the genus Herbiconiux, representing four novel species, with the isolates CPCC 203406[T] and CPCC 203407 being classified in the same species. For which the names Herbiconiux aconitum sp. nov. (type strain CPCC 205763[T] = I19A-01430[T] = CGMCC 1.60067[T]), Herbiconiux daphne sp. nov. (type strain CPCC 203386[T] = I10A-01569[T] = DSM 24546[T] = KCTC 19839[T]), Herbiconiux gentiana sp. nov. (type strain CPCC 205716[T] = I21A-01427[T] = CGMCC 1.60064[T]), and Herbiconiux oxytropis sp. nov. (type strain CPCC 203406[T] = I10A-02268[T] = DSM 24549[T] = KCTC 19840[T]) were proposed, respectively. In the genomes of these five strains, the putative encoding genes for amidase, endoglucanase, phosphatase, and superoxidative dismutase were retrieved, which were classified as biosynthetic genes/gene-clusters regarding plant growth-promotion (PGP) functions. The positive results from IAA-producing, cellulose-degrading and anti-oxidation experiments further approved their potential PGP bio-functions. Pangenome analysis of the genus Herbiconiux supported the polyphasic taxonomy results and confirmed their bio-function potential.}, }
@article {pmid36919598, year = {2023}, author = {Lee, RR and Cher, WY and Wang, J and Chen, Y and Chae, E}, title = {Generating minimum set of gRNA to cover multiple targets in multiple genomes with MINORg.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad142}, pmid = {36919598}, issn = {1362-4962}, abstract = {MINORg is an offline gRNA design tool that generates the smallest possible combination of gRNA capable of covering all desired targets in multiple non-reference genomes. As interest in pangenomic research grows, so does the workload required for large screens in multiple individuals. MINORg aims to lessen this workload by capitalising on sequence homology to favour multi-target gRNA while simultaneously screening multiple genetic backgrounds in order to generate reusable gRNA panels. We demonstrated the practical application of MINORg by knocking out 11 homologous genes tandemly arrayed in a multi-gene cluster in two Arabidopsis thaliana lineages using three gRNA output by MINORg. We also described a new PCR-free modular cloning system for multiplexing gRNA, and used it to knockout three tandemly arrayed genes in another multi-gene cluster with gRNA designed by MINORg. Source code is freely available at https://github.com/rlrq/MINORg.}, }
@article {pmid36919166, year = {2023}, author = {Viana, MVC and Galdino, JH and Profeta, R and Oliveira, M and Tavares, L and de Castro Soares, S and Carneiro, P and Wattam, AR and Azevedo, V}, title = {Analysis of Corynebacterium silvaticum genomes from Portugal reveals a single cluster and a clade suggested to produce diphtheria toxin.}, journal = {PeerJ}, volume = {11}, number = {}, pages = {e14895}, doi = {10.7717/peerj.14895}, pmid = {36919166}, issn = {2167-8359}, abstract = {BACKGROUND: Corynebacterium silvaticum is a pathogenic, gram-positive bacterial species that causes caseous lymphadenitis in wild boars, domestic pigs and roe deer in Western Europe. It can affect animal production and cause zoonosis. Genome analysis has suggested that one strain from Portugal and one from Austria could probably produce the diphtheria toxin (DT), which inhibits protein synthesis and can cause death.
METHODS: To further investigate the species genetic diversity and probable production of DT by Portuguese strains, eight isolates from this country were sequenced and compared to 38 public ones.
RESULTS: Strains from Portugal are monophyletic, nearly identical, form a unique cluster and have 27 out of 36 known Corynebacterium virulence or niche factors. All of them lack a frameshift in the tox gene and were suggested to produce DT. A phylogenetic analysis shows that the species has diverged into two clades. Clade 1 is composed of strains that were suggested to have the ability to produce DT, represented by the monophyletic strains from Portugal and strain 05-13 from Austria. Clade 2 is composed of strains unable to produce DT due to a frameshifted tox gene. The second clade is represented by strains from Austria, Germany and Switzerland. Ten genome clusters were detected, in which strains from Germany are the most diverse. Strains from Portugal belong to an exclusive cluster. The pangenome has 2,961 proteins and is nearly closed (α = 0.968). Exclusive genes shared by clusters 1 and 2, and Portuguese strains are probably not related to disease manifestation as they share the same host but could play a role in their extra-host environmental adaptation. These results show the potential of the species to cause zoonosis, possibly diphtheria. The identified clusters, exclusively shaded genes, and exclusive STs identified in Portugal could be applied in the identification and epidemiology of the species.}, }
@article {pmid36916949, year = {2023}, author = {Svahn, AJ and Suster, CJE and Chang, SL and Rockett, RJ and Sim, EM and Cliff, OM and Wang, Q and Arnott, A and Ramsperger, M and Sorrell, TC and Sintchenko, V and Prokopenko, M}, title = {Pangenome Analysis of a Salmonella Enteritidis Population Links a Major Outbreak to a Gifsy-1-Like Prophage Containing Anti-Inflammatory Gene gogB.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0279122}, doi = {10.1128/spectrum.02791-22}, pmid = {36916949}, issn = {2165-0497}, abstract = {A major outbreak of the globally significant Salmonella Enteritidis foodborne pathogen was identified within a large clinical data set by a program of routine WGS of clinical presentations of salmonellosis in New South Wales, Australia. Pangenome analysis helped to quantify and isolate prophage content within the accessory partition of the pangenome. A prophage similar to Gifsy-1 (henceforth GF-1L) was found to occur in all isolates of the outbreak core SNP cluster, and in three other isolates. Further analysis revealed that the GF-1L prophage carried the gogB virulence factor. These observations suggest that GF-1L may be an important marker of virulence for S. Enteritidis population screening and, that anti-inflammatory, gogB-mediated virulence currently associated with Salmonella Typhimurium may also be displayed by S. Enteritidis. IMPORTANCE We examined 5 years of genomic and epidemiological data for the significant global foodborne pathogen, Salmonella enterica. Although Salmonella enterica subspecies enterica serovar Enteritidis (S. Enteritidis) is the leading cause of salmonellosis in the USA and Europe, prior to 2018 it was not endemic in the southern states of Australia. However, in 2018 a large outbreak led to the endemicity of S. Enteritidis in New South Wales, Australia, and a unique opportunity to study this phenomenon. Using pangenome analysis we uncovered that this clone contained a Gifsy-1-like prophage harboring the known virulence factor gogB. The prophage reported has not previously been described in S. Enteritidis isolates.}, }
@article {pmid36914349, year = {2023}, author = {Wang, D and Fletcher, GC and Gagic, D and On, SLW and Palmer, JS and Flint, SH}, title = {Comparative genome identification of accessory genes associated with strong biofilm formation in Vibrio parahaemolyticus.}, journal = {Food research international (Ottawa, Ont.)}, volume = {166}, number = {}, pages = {112605}, doi = {10.1016/j.foodres.2023.112605}, pmid = {36914349}, issn = {1873-7145}, abstract = {Vibrio parahaemolyticus biofilms on the seafood processing plant surfaces are a potential source of seafood contamination and subsequent food poisoning. Strains differ in their ability to form biofilm, but little is known about the genetic characteristics responsible for biofilm development. In this study, pangenome and comparative genome analysis of V. parahaemolyticus strains reveals genetic attributes and gene repertoire that contribute to robust biofilm formation. The study identified 136 accessory genes that were exclusively present in strong biofilm forming strains and these were functionally assigned to the Gene Ontology (GO) pathways of cellulose biosynthesis, rhamnose metabolic and catabolic processes, UDP-glucose processes and O antigen biosynthesis (p < 0.05). Strategies of CRISPR-Cas defence and MSHA pilus-led attachment were implicated via Kyoto Encyclopedia of Genes and Genomes (KEGG) annotation. Higher levels of horizontal gene transfer (HGT) were inferred to confer more putatively novel properties on biofilm-forming V. parahaemolyticus. Furthermore, cellulose biosynthesis, a neglected potential virulence factor, was identified as being acquired from within the order Vibrionales. The cellulose synthase operons in V. parahaemolyticus were examined for their prevalence (22/138, 15.94 %) and were found to consist of the genes bcsG, bcsE, bcsQ, bcsA, bcsB, bcsZ, bcsC. This study provides insights into robust biofilm formation of V. parahaemolyticus at the genomic level and facilitates: identification of key attributes for robust biofilm formation, elucidation of biofilm formation mechanisms and development of potential targets for novel control strategies of persistent V. parahaemolyticus.}, }
@article {pmid36912660, year = {2023}, author = {Ranković, T and Nikolić, I and Berić, T and Popović, T and Lozo, J and Medić, O and Stanković, S}, title = {Genome Analysis of Two Pseudomonas syringae pv. aptata Strains with Different Virulence Capacity Isolated from Sugar Beet: Features of Successful Pathogenicity in the Phyllosphere Microbiome.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0359822}, doi = {10.1128/spectrum.03598-22}, pmid = {36912660}, issn = {2165-0497}, abstract = {Members of the Pseudomonas syringae species complex are heterogeneous bacteria that are the most abundant bacterial plant pathogens in the plant phyllosphere, with strong abilities to exist on and infect different plant hosts and survive in/outside agroecosystems. In this study, the draft genome sequences of two pathogenic P. syringae pv. aptata strains with different in planta virulence capacities isolated from the phyllosphere of infected sugar beet were analyzed to evaluate putative features of survival strategies and to determine the pathogenic potential of the strains. The draft genomes of P. syringae pv. aptata strains P16 and P21 are 5,974,057 bp and 6,353,752 bp in size, have GC contents of 59.03% and 58.77%, respectively, and contain 3,439 and 3,536 protein-coding sequences, respectively. For both average nucleotide identity and pangenome analysis, P16 and P21 largely clustered with other pv. aptata strains from the same isolation source. We found differences in the repertoire of effectors of the type III secretion system among all 102 selected strains, suggesting that the type III secretion system is a critical factor in the different virulent phenotypes of P. syringae pv. aptata. During genome analysis of the highly virulent strain P21, we discovered genes for T3SS effectors (AvrRpm1, HopAW1, and HopAU1) that were not previously found in genomes of P. syringae pv. aptata. We also identified coding sequences for pantothenate kinase, VapC endonuclease, phospholipase, and pectate lyase in both genomes, which may represent novel effectors of the type III secretion system. IMPORTANCE Genome analysis has an enormous effect on understanding the life strategies of plant pathogens. Comparing similarities with pathogens involved in other epidemics could elucidate the pathogen life cycle when a new outbreak happens. This study represents the first in-depth genome analysis of Pseudomonas syringae pv. aptata, the causative agent of leaf spot disease of sugar beet. Despite the increasing number of disease reports in recent years worldwide, there is still a lack of information about the genomic features, epidemiology, and pathogenic life strategies of this particular pathogen. Our findings provide advances in disease etiology (especially T3SS effector repertoire) and elucidate the role of environmental adaptations required for prevalence in the pathobiome of the sugar beet. From the perspective of the very heterogeneous P. syringae species complex, this type of analysis has specific importance in reporting the characteristics of individual strains.}, }
@article {pmid36910224, year = {2023}, author = {Coskun, ÖK and Gomez-Saez, GV and Beren, M and Ozcan, D and Hosgormez, H and Einsiedl, F and Orsi, WD}, title = {Carbon metabolism and biogeography of candidate phylum "Candidatus Bipolaricaulota" in geothermal environments of Biga Peninsula, Turkey.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1063139}, pmid = {36910224}, issn = {1664-302X}, abstract = {Terrestrial hydrothermal springs and aquifers are excellent sites to study microbial biogeography because of their high physicochemical heterogeneity across relatively limited geographic regions. In this study, we performed 16S rRNA gene sequencing and metagenomic analyses of the microbial diversity of 11 different geothermal aquifers and springs across the tectonically active Biga Peninsula (Turkey). Across geothermal settings ranging in temperature from 43 to 79°C, one of the most highly represented groups in both 16S rRNA gene and metagenomic datasets was affiliated with the uncultivated phylum "Candidatus Bipolaricaulota" (former "Ca. Acetothermia" and OP1 division). The highest relative abundance of "Ca. Bipolaricaulota" was observed in a 68°C geothermal brine sediment, where it dominated the microbial community, representing 91% of all detectable 16S rRNA genes. Correlation analysis of "Ca. Bipolaricaulota" operational taxonomic units (OTUs) with physicochemical parameters indicated that salinity was the strongest environmental factor measured associated with the distribution of this novel group in geothermal fluids. Correspondingly, analysis of 23 metagenome-assembled genomes (MAGs) revealed two distinct groups of "Ca. Bipolaricaulota" MAGs based on the differences in carbon metabolism: one group encoding the bacterial Wood-Ljungdahl pathway (WLP) for H2 dependent CO2 fixation is selected for at lower salinities, and a second heterotrophic clade that lacks the WLP that was selected for under hypersaline conditions in the geothermal brine sediment. In conclusion, our results highlight that the biogeography of "Ca. Bipolaricaulota" taxa is strongly correlated with salinity in hydrothermal ecosystems, which coincides with key differences in carbon acquisition strategies. The exceptionally high relative abundance of apparently heterotrophic representatives of this novel candidate Phylum in geothermal brine sediment observed here may help to guide future enrichment experiments to obtain representatives in pure culture.}, }
@article {pmid36909378, year = {2023}, author = {Gupta, P and Li, S}, title = {Editorial: Methods in genome, pan-genome, pan-transcriptome, and gene regulatory network (GRN) construction and analysis.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1152708}, pmid = {36909378}, issn = {1664-462X}, }
@article {pmid36906708, year = {2023}, author = {Tanabe, Y and Yamaguchi, H and Yoshida, M and Kai, A and Okazaki, Y}, title = {Characterization of a bloom-associated alphaproteobacterial lineage, 'Candidatus Phycosocius': insights into freshwater algal-bacterial interactions.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {20}, pmid = {36906708}, issn = {2730-6151}, abstract = {Marine bacterial lineages associated with algal blooms, such as the Roseobacter clade, have been well characterized in ecological and genomic contexts, yet such lineages have rarely been explored in freshwater blooms. This study performed phenotypic and genomic analyses of an alphaproteobacterial lineage 'Candidatus Phycosocius' (denoted the CaP clade), one of the few lineages ubiquitously associated with freshwater algal blooms, and described a novel species: 'Ca. Phycosocius spiralis.' Phylogenomic analyses indicated that the CaP clade is a deeply branching lineage in the Caulobacterales. Pangenome analyses revealed characteristic features of the CaP clade: aerobic anoxygenic photosynthesis and essential vitamin B auxotrophy. Genome size varies widely among members of the CaP clade (2.5-3.7 Mb), likely a result of independent genome reductions at each lineage. This includes a loss of tight adherence pilus genes (tad) in 'Ca. P. spiralis' that may reflect its adoption of a unique spiral cell shape and corkscrew-like burrowing activity at the algal surface. Notably, quorum sensing (QS) proteins showed incongruent phylogenies, suggesting that horizontal transfers of QS genes and QS-involved interactions with specific algal partners might drive CaP clade diversification. This study elucidates the ecophysiology and evolution of proteobacteria associated with freshwater algal blooms.}, }
@article {pmid36901726, year = {2023}, author = {Sonnenberg, CB and Haugen, P}, title = {Bipartite Genomes in Enterobacterales: Independent Origins of Chromids, Elevated Openness and Donors of Horizontally Transferred Genes.}, journal = {International journal of molecular sciences}, volume = {24}, number = {5}, pages = {}, doi = {10.3390/ijms24054292}, pmid = {36901726}, issn = {1422-0067}, abstract = {Multipartite bacteria have one chromosome and one or more chromid. Chromids are believed to have properties that enhance genomic flexibility, making them a favored integration site for new genes. However, the mechanism by which chromosomes and chromids jointly contribute to this flexibility is not clear. To shed light on this, we analyzed the openness of chromosomes and chromids of the two bacteria, Vibrio and Pseudoalteromonas, both which belong to the Enterobacterales order of Gammaproteobacteria, and compared the genomic openness with that of monopartite genomes in the same order. We applied pangenome analysis, codon usage analysis and the HGTector software to detect horizontally transferred genes. Our findings suggest that the chromids of Vibrio and Pseudoalteromonas originated from two separate plasmid acquisition events. Bipartite genomes were found to be more open compared to monopartite. We found that the shell and cloud pangene categories drive the openness of bipartite genomes in Vibrio and Pseudoalteromonas. Based on this and our two recent studies, we propose a hypothesis that explains how chromids and the chromosome terminus region contribute to the genomic plasticity of bipartite genomes.}, }
@article {pmid36900455, year = {2023}, author = {López-García, E and Benítez-Cabello, A and Ramiro-García, J and Ladero, V and Arroyo-López, FN}, title = {In Silico Evidence of the Multifunctional Features of Lactiplantibacillus pentosus LPG1, a Natural Fermenting Agent Isolated from Table Olive Biofilms.}, journal = {Foods (Basel, Switzerland)}, volume = {12}, number = {5}, pages = {}, doi = {10.3390/foods12050938}, pmid = {36900455}, issn = {2304-8158}, abstract = {In recent years, there has been a growing interest in obtaining probiotic bacteria from plant origins. This is the case of Lactiplantibacillus pentosus LPG1, a lactic acid bacterial strain isolated from table olive biofilms with proven multifunctional features. In this work, we have sequenced and closed the complete genome of L. pentosus LPG1 using both Illumina and PacBio technologies. Our intention is to carry out a comprehensive bioinformatics analysis and whole-genome annotation for a further complete evaluation of the safety and functionality of this microorganism. The chromosomic genome had a size of 3,619,252 bp, with a GC (Guanine-Citosine) content of 46.34%. L. pentosus LPG1 also had two plasmids, designated as pl1LPG1 and pl2LPG1, with lengths of 72,578 and 8713 bp (base pair), respectively. Genome annotation revealed that the sequenced genome consisted of 3345 coding genes and 89 non-coding sequences (73 tRNA and 16 rRNA genes). Taxonomy was confirmed by Average Nucleotide Identity analysis, which grouped L. pentosus LPG1 with other sequenced L. pentosus genomes. Moreover, the pan-genome analysis showed that L. pentosus LPG1 was closely related to the L. pentosus strains IG8, IG9, IG11, and IG12, all of which were isolated from table olive biofilms. Resistome analysis reported the absence of antibiotic resistance genes, whilst PathogenFinder tool classified the strain as a non-human pathogen. Finally, in silico analysis of L. pentosus LPG1 showed that many of its previously reported technological and probiotic phenotypes corresponded with the presence of functional genes. In light of these results, we can conclude that L. pentosus LPG1 is a safe microorganism and a potential human probiotic with a plant origin and application as a starter culture for vegetable fermentations.}, }
@article {pmid36899131, year = {2023}, author = {Kim, E and Jung, HI and Park, SH and Kim, HY and Kim, SK}, title = {Comprehensive genome analysis of Burkholderia contaminans SK875, a quorum-sensing strain isolated from the swine.}, journal = {AMB Express}, volume = {13}, number = {1}, pages = {30}, pmid = {36899131}, issn = {2191-0855}, abstract = {The Burkholderia cepacia complex (BCC) is a Gram-negative bacterial, including Burkholderia contaminans species. Although the plain Burkholderia is pervasive from taxonomic and genetic perspectives, a common characteristic is that they may use the quorum-sensing (QS) system. In our previous study, we generated the complete genome sequence of Burkholderia contaminans SK875 isolated from the respiratory tract. To our knowledge, this is the first study to report functional genomic features of B. contaminans SK875 for understanding the pathogenic characteristics. In addition, comparative genomic analysis for five B. contaminans genomes was performed to provide comprehensive information on the disease potential of B. contaminans species. Analysis of average nucleotide identity (ANI) showed that the genome has high similarity (> 96%) with other B. contaminans strains. Five B. contaminans genomes yielded a pangenome of 8832 coding genes, a core genome of 5452 genes, the accessory genome of 2128 genes, and a unique genome of 1252 genes. The 186 genes were specific to B. contaminans SK875, including toxin higB-2, oxygen-dependent choline dehydrogenase, and hypothetical proteins. Genotypic analysis of the antimicrobial resistance of B. contaminans SK875 verified resistance to tetracycline, fluoroquinolone, and aminoglycoside. Compared with the virulence factor database, we identified 79 promising virulence genes such as adhesion system, invasions, antiphagocytic, and secretion systems. Moreover, 45 genes of 57 QS-related genes that were identified in B. contaminans SK875 indicated high sequence homology with other B. contaminans strains. Our results will help to gain insight into virulence, antibiotic resistance, and quorum sensing for B. contaminans species.}, }
@article {pmid36898633, year = {2023}, author = {Salaheen, S and Kim, SW and Springer, HR and Hovingh, EP and Van Kessel, JAS and Haley, BJ}, title = {Genomic diversity of antimicrobial-resistant and Shiga toxin gene-harboring non-O157 Escherichia coli from dairy calves.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2023.02.022}, pmid = {36898633}, issn = {2213-7173}, abstract = {OBJECTIVES: Shiga toxin-producing Escherichia coli (STEC) are globally significant foodborne pathogens and dairy calves are a known reservoir of both O157 and non-O157 STEC. The objective of this study was to comprehensively evaluate the genomic attributes, diversity, virulence factors (VFs), and antimicrobial resistance gene (ARG) profiles of the STEC from pre- and postweaned dairy calves in commercial dairy herds.
METHODS: In total, 31 non-O157 STEC were identified as part of a larger study focused on the pangenome of >1000 E. coli isolates from the feces of pre- and postweaned dairy calves on commercial dairy farms. These 31 genomes were sequenced on an Illumina NextSeq500 platform.
RESULTS: Based on the phylogenetic analyses, the STEC isolates were determined to be polyphyletic with at-least three phylogroups, i.e., A (32%), B1 (58%), and G (3%), representing at-least 16 sequence types and 11 serogroups including two of the "big six" serogroups, O103 and O111. Several Shiga toxin gene subtypes were identified in the genomes, including stx1a, stx2a, stx2c, stx2d, and stx2g. Based on analysis with the ResFinder database, the majority of the isolates (>50%) were multidrug-resistant (MDR) because they harbored genes conferring resistance to three or more classes of antimicrobials, including some of human health significance (e.g., β-lactams, macrolides, and fosfomycin). Additionally, non-O157 STEC strain persistence and transmission within a farm was observed.
CONCLUSION: Dairy calves are a reservoir of phylogenomically diverse MDR non-O157 STEC. Information from this study will be helpful in assessing public health risk and may help guide preharvest prevention strategies focusing on reservoirs of STEC.}, }
@article {pmid36897406, year = {2023}, author = {Xu, Y and Kong, X and Guo, Y and Wang, R and Yao, X and Chen, X and Yan, T and Wu, D and Lu, Y and Dong, J and Zhu, Y and Chen, M and Cen, H and Jiang, L}, title = {Structural variations and environmental specificities of flowering time-related genes in Brassica napus.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {136}, number = {3}, pages = {42}, pmid = {36897406}, issn = {1432-2242}, abstract = {We found that the flowering time order of accessions in a genetic population considerably varied across environments, and homolog copies of essential flowering time genes played different roles in different locations. Flowering time plays a critical role in determining the life cycle length, yield, and quality of a crop. However, the allelic polymorphism of flowering time-related genes (FTRGs) in Brassica napus, an important oil crop, remains unclear. Here, we provide high-resolution graphics of FTRGs in B. napus on a pangenome-wide scale based on single nucleotide polymorphism (SNP) and structural variation (SV) analyses. A total of 1337 FTRGs in B. napus were identified by aligning their coding sequences with Arabidopsis orthologs. Overall, 46.07% of FTRGs were core genes and 53.93% were variable genes. Moreover, 1.94%, 0.74%, and 4.49% FTRGs had significant presence-frequency differences (PFDs) between the spring and semi-winter, spring and winter, and winter and semi-winter ecotypes, respectively. SNPs and SVs across 1626 accessions of 39 FTRGs underlying numerous published qualitative trait loci were analyzed. Additionally, to identify FTRGs specific to an eco-condition, genome-wide association studies (GWASs) based on SNP, presence/absence variation (PAV), and SV were performed after growing and observing the flowering time order (FTO) of plants in a collection of 292 accessions at three locations in two successive years. It was discovered that the FTO of plants in a genetic population changed a lot across various environments, and homolog copies of some key FTRGs played different roles in different locations. This study revealed the molecular basis of the genotype-by-environment (G × E) effect on flowering and recommended a pool of candidate genes specific to locations for breeding selection.}, }
@article {pmid36892794, year = {2023}, author = {Xu, Y and Cheng, T and Rao, Q and Zhang, S and Ma, YL}, title = {Comparative genomic analysis of Stenotrophomonas maltophilia unravels their genetic variations and versatility trait.}, journal = {Journal of applied genetics}, volume = {}, number = {}, pages = {}, pmid = {36892794}, issn = {2190-3883}, abstract = {Stenotrophomonas maltophilia is a species with immensely broad phenotypic and genotypic diversity that could widely distribute in natural and clinical environments. However, little attention has been paid to reveal their genome plasticity to diverse environments. In the present study, a comparative genomic analysis of S. maltophilia isolated from clinical and natural sources was systematically explored its genetic diversity of 42 sequenced genomes. The results showed that S. maltophilia owned an open pan-genome and had strong adaptability to different environments. A total of 1612 core genes were existed with an average of 39.43% of each genome, and the shared core genes might be necessary to maintain the basic characteristics of those S. maltophilia strains. Based on the results of the phylogenetic tree, the ANI value, and the distribution of accessory genes, genes associated with the fundamental process of those strains from the same habitat were found to be mostly conserved in evolution. Isolates from the same habitat had a high degree of similarity in COG category, and the most significant KEGG pathways were mainly involved in carbohydrate and amino acid metabolism, indicating that genes related to essential processes were mostly conserved in evolution for the clinical and environmental settings. Meanwhile, the number of resistance and efflux pump gene was significantly higher in the clinical setting than that of in the environmental setting. Collectively, this study highlights the evolutionary relationships of S. maltophilia isolated from clinical and environmental sources, shedding new light on its genomic diversity.}, }
@article {pmid36884376, year = {2023}, author = {Zhang, DD and Zhang, XJ and Wu, D and Li, BB and Liu, HC and Zhou, YG and Fang, BZ and Li, WJ and Cai, M}, title = {Aquiflexum gelatinilyticum sp. nov., isolated from river water.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {73}, number = {3}, pages = {}, doi = {10.1099/ijsem.0.005741}, pmid = {36884376}, issn = {1466-5034}, abstract = {Two Gram-stain-negative, strictly aerobic, rod-shaped, non-motile and non-gliding bacteria, designated as XJ19-10[T] and XJ19-11, were isolated from river water in Xinjiang Uygur Autonomous Region, PR China. Cells of these strains were catalase-, oxidase- and gelatinase-positive and contained carotenoids but no flexirubins. Growth occurred at 10-30 °C, pH 7.0-9.0 and with 0-2.5% (w/v) NaCl. On the basis of the results of 16S rRNA gene sequence and genome analyses, the two isolates represented members of the genus Aquiflexum, and the closest relative was Aquiflexum aquatile Z0201[T] with 16S rRNA gene sequence pairwise similarities of 97.9-98.1%. Furthermore, the average nucleotide identities and digital DNA-DNA hybridization identities between the two isolates and other relatives were all less than 82.9 and 28.2 %, respectively, all below the species delineation thresholds. The results of pan-genomic analysis indicated that the type strain XJ19-10[T] shared 2813 core gene clusters with other three type strains of members of the genus Aquiflexum, as well as having 623 strain-specific clusters. The major polar lipids were phosphatidylethanolamine, phosphatidylcholine, an unidentified aminolipid and unidentified lipids. The predominant fatty acids (>10% of the total contents) were iso-C15 : 0, iso-C15 : 1G, iso-C17 : 0 3-OH and summed feature 9, and MK-7 was the respiratory quinone. On the basis of the results of phenotypic, physiological, chemotaxonomic and genotypic characterization, strains XJ19-10[T] and XJ19-11 are considered to represent a novel species, for which the name Aquiflexum gelatinilyticum sp. nov. is proposed. The type strain is XJ19-10[T] (=CGMCC 1.19385[T] =KCTC 92266[T]).}, }
@article {pmid36882215, year = {2023}, author = {Lee, Y and Kim, JH and Yoon, JH and Lee, JS and Sukhoom, A and Kim, W}, title = {Description of Defluviimonas salinarum sp. nov. with the potential of benzene-degradation isolated from saltern in the Yellow Seacoast.}, journal = {FEMS microbiology letters}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsle/fnad018}, pmid = {36882215}, issn = {1574-6968}, abstract = {Strain CAU 1641T was isolated from saltern collected in Ganghwa Island, Republic of Korea. The bacterium was an aerobic, Gram-negative, catalase-positive, oxidase-positive, motile, and rod-shaped bacterium. Cell of strain CAU 1641T could grow at 20-40°C and pH 6.0-9.0 with 1.0-3.0% (w/v) NaCl. Stain CAU 1641T shared high 16S rRNA gene sequence similarities with Defluviimonas aquaemixtae KCTC 42108T (98.0%), Defluviimonas denitrificans DSM 18921T (97.6%), and Defluviimonas aestuarii KACC 16442T (97.5%). Phylogenetic trees based on the 16S rRNA gene and the core-genome sequences indicated that strain CAU 1641T belonged to genus Defluviimonas. Strain CAU 1641T contained ubiquinone-10 (Q-10) as the sole respiratory quinone and and summed feature 8 (C18:1ω6c and/or C18:1ω7c) as the predominant fatty acid (86.1%). The pan-genome analysis indicated that the genomes of the strain CAU 1641T and 15 reference strains contain a small core genome. The Average Nucleotide Identity and digital DNA-DNA hybridization values among strain CAU 1641T and reference strains of the genus Defluviimonas were in the range of 77.6-78.8% and 21.1-22.1%, respectively. The genome of strain CAU 1641T has several genes of benzene degradation. The genomic G + C content was 66.6%. Based on polyphasic and genomic analyses, strain CAU 1641T represents a novel species of the genus Defluviimonas, for which the name Defluviimonas salinarum sp. nov., is proposed. The type strain is CAU 1641T (= KCTC 92081T = MCCC 1K07180T).}, }
@article {pmid36876113, year = {2023}, author = {Anderson, BD and Bisanz, JE}, title = {Challenges and opportunities of strain diversity in gut microbiome research.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1117122}, pmid = {36876113}, issn = {1664-302X}, abstract = {Just because two things are related does not mean they are the same. In analyzing microbiome data, we are often limited to species-level analyses, and even with the ability to resolve strains, we lack comprehensive databases and understanding of the importance of strain-level variation outside of a limited number of model organisms. The bacterial genome is highly plastic with gene gain and loss occurring at rates comparable or higher than de novo mutations. As such, the conserved portion of the genome is often a fraction of the pangenome which gives rise to significant phenotypic variation, particularly in traits which are important in host microbe interactions. In this review, we discuss the mechanisms that give rise to strain variation and methods that can be used to study it. We identify that while strain diversity can act as a major barrier in interpreting and generalizing microbiome data, it can also be a powerful tool for mechanistic research. We then highlight recent examples demonstrating the importance of strain variation in colonization, virulence, and xenobiotic metabolism. Moving past taxonomy and the species concept will be crucial for future mechanistic research to understand microbiome structure and function.}, }
@article {pmid36875624, year = {2023}, author = {Nawae, W and Naktang, C and Charoensri, S and U-Thoomporn, S and Narong, N and Chusri, O and Tangphatsornruang, S and Pootakham, W}, title = {Resequencing of durian genomes reveals large genetic variations among different cultivars.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1137077}, pmid = {36875624}, issn = {1664-462X}, abstract = {Durian (Durio zibethinus), which yields the fruit known as the "King of Fruits," is an important economic crop in Southeast Asia. Several durian cultivars have been developed in this region. In this study, we resequenced the genomes of three popular durian cultivars in Thailand, including Kradumthong (KD), Monthong (MT), and Puangmanee (PM) to investigate genetic diversities of cultivated durians. KD, MT, and PM genome assemblies were 832.7, 762.6, and 821.6 Mb, and their annotations covered 95.7, 92.4, and 92.7% of the embryophyta core proteins, respectively. We constructed the draft durian pangenome and analyzed comparative genomes with related species in Malvales. Long terminal repeat (LTR) sequences and protein families in durian genomes had slower evolution rates than that in cotton genomes. However, protein families with transcriptional regulation function and protein phosphorylation function involved in abiotic and biotic stress responses appeared to evolve faster in durians. The analyses of phylogenetic relationships, copy number variations (CNVs), and presence/absence variations (PAVs) suggested that the genome evolution of Thai durians was different from that of the Malaysian durian, Musang King (MK). Among the three newly sequenced genomes, the PAV and CNV profiles of disease resistance genes and the expressions of methylesterase inhibitor domain containing genes involved in flowering and fruit maturation in MT were different from those in KD and PM. These genome assemblies and their analyses provide valuable resources to gain a better understanding of the genetic diversity of cultivated durians, which may be useful for the future development of new durian cultivars.}, }
@article {pmid36875612, year = {2023}, author = {Shirasawa, K and Moraga, R and Ghelfi, A and Hirakawa, H and Nagasaki, H and Ghamkhar, K and Barrett, BA and Griffiths, AG and Isobe, SN}, title = {An improved reference genome for Trifolium subterraneum L. provides insight into molecular diversity and intra-specific phylogeny.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1103857}, pmid = {36875612}, issn = {1664-462X}, abstract = {Subterranean clover (Trifolium subterraneum L., Ts) is a geocarpic, self-fertile annual forage legume with a compact diploid genome (n = x = 8, 544 Mb/1C). Its resilience and climate adaptivity have made it an economically important species in Mediterranean and temperate zones. Using the cultivar Daliak, we generated higher resolution sequence data, created a new genome assembly TSUd_3.0, and conducted molecular diversity analysis for copy number variant (CNV) and single-nucleotide polymorphism (SNP) among 36 cultivars. TSUd_3.0 substantively improves prior genome assemblies with new Hi-C and long-read sequence data, covering 531 Mb, containing 41,979 annotated genes and generating a 94.4% BUSCO score. Comparative genomic analysis among select members of the tribe Trifolieae indicated TSUd 3.0 corrects six assembly-error inversion/duplications and confirmed phylogenetic relationships. Its synteny with T. pratense, T. repens, Medicago truncatula and Lotus japonicus genomes were assessed, with the more distantly related T. repens and M. truncatula showing higher levels of co-linearity with Ts than between Ts and its close relative T. pratense. Resequencing of 36 cultivars discovered 7,789,537 SNPs subsequently used for genomic diversity assessment and sequence-based clustering. Heterozygosity estimates ranged from 1% to 21% within the 36 cultivars and may be influenced by admixture. Phylogenetic analysis supported subspecific genetic structure, although it indicates four or five groups, rather than the three recognized subspecies. Furthermore, there were incidences where cultivars characterized as belonging to a particular subspecies clustered with another subspecies when using genomic data. These outcomes suggest that further investigation of Ts sub-specific classification using molecular and morpho-physiological data is needed to clarify these relationships. This upgraded reference genome, complemented with comprehensive sequence diversity analysis of 36 cultivars, provides a platform for future gene functional analysis of key traits, and genome-based breeding strategies for climate adaptation and agronomic performance. Pangenome analysis, more in-depth intra-specific phylogenomic analysis using the Ts core collection, and functional genetic and genomic studies are needed to further augment knowledge of Trifolium genomes.}, }
@article {pmid36871069, year = {2023}, author = {Nowinski, B and Feng, X and Preston, CM and Birch, JM and Luo, H and Whitman, WB and Moran, MA}, title = {Ecological divergence of syntopic marine bacterial species is shaped by gene content and expression.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, pmid = {36871069}, issn = {1751-7370}, abstract = {Identifying mechanisms by which bacterial species evolve and maintain genomic diversity is particularly challenging for the uncultured lineages that dominate the surface ocean. A longitudinal analysis of bacterial genes, genomes, and transcripts during a coastal phytoplankton bloom revealed two co-occurring, highly related Rhodobacteraceae species from the deeply branching and uncultured NAC11-7 lineage. These have identical 16S rRNA gene amplicon sequences, yet their genome contents assembled from metagenomes and single cells indicate species-level divergence. Moreover, shifts in relative dominance of the species during dynamic bloom conditions over 7 weeks confirmed the syntopic species' divergent responses to the same microenvironment at the same time. Genes unique to each species and genes shared but divergent in per-cell inventories of mRNAs accounted for 5% of the species' pangenome content. These analyses uncover physiological and ecological features that differentiate the species, including capacities for organic carbon utilization, attributes of the cell surface, metal requirements, and vitamin biosynthesis. Such insights into the coexistence of highly related and ecologically similar bacterial species in their shared natural habitat are rare.}, }
@article {pmid36864624, year = {2023}, author = {Deorowicz, S and Danek, A and Li, H}, title = {AGC: Compact representation of assembled genomes with fast queries and updates.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btad097}, pmid = {36864624}, issn = {1367-4811}, abstract = {MOTIVATION: High-quality sequence assembly is the ultimate representation of complete genetic information of an individual. Several ongoing pangenome projects are producing collections of high-quality assemblies of various species. Each project has already generated assemblies of hundreds of gigabytes on disk, greatly impeding the distribution of and access to such rich datasets.
RESULTS: Here we show how to reduce the size of the sequenced genomes by 2 to 3 orders of magnitude. Our tool compresses the genomes significantly better than the existing programs and is much faster. Moreover, its unique feature is the ability to access any contig (or its part) in a fraction of a second and easily append new samples to the compressed collections. Thanks to this, AGC could be useful not only for backup or transfer purposes, but also for routine analysis of pangenome sequences in common pipelines. With the rapidly reduced cost and improved accuracy of sequencing technologies, we anticipate more comprehensive pangenome projects with much larger sample sizes. AGC is likely to become a foundation tool to store, distribute and access pangenome data.
AVAILABILITY: The source code of AGC is available at https://github.com/refresh-bio/agc. The package can be installed via Bioconda at https://anaconda.org/bioconda/agc.
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid36864101, year = {2023}, author = {Yan, H and Sun, M and Zhang, Z and Jin, Y and Zhang, A and Lin, C and Wu, B and He, M and Xu, B and Wang, J and Qin, P and Mendieta, JP and Nie, G and Wang, J and Jones, CS and Feng, G and Srivastava, RK and Zhang, X and Bombarely, A and Luo, D and Jin, L and Peng, Y and Wang, X and Ji, Y and Tian, S and Huang, L}, title = {Pangenomic analysis identifies structural variation associated with heat tolerance in pearl millet.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {36864101}, issn = {1546-1718}, abstract = {Pearl millet is an important cereal crop worldwide and shows superior heat tolerance. Here, we developed a graph-based pan-genome by assembling ten chromosomal genomes with one existing assembly adapted to different climates worldwide and captured 424,085 genomic structural variations (SVs). Comparative genomics and transcriptomics analyses revealed the expansion of the RWP-RK transcription factor family and the involvement of endoplasmic reticulum (ER)-related genes in heat tolerance. The overexpression of one RWP-RK gene led to enhanced plant heat tolerance and transactivated ER-related genes quickly, supporting the important roles of RWP-RK transcription factors and ER system in heat tolerance. Furthermore, we found that some SVs affected the gene expression associated with heat tolerance and SVs surrounding ER-related genes shaped adaptation to heat tolerance during domestication in the population. Our study provides a comprehensive genomic resource revealing insights into heat tolerance and laying a foundation for generating more robust crops under the changing climate.}, }
@article {pmid36854668, year = {2023}, author = {Liu, J and Dawe, RK}, title = {Large haplotypes highlight a complex age structure within the maize pan-genome.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.276705.122}, pmid = {36854668}, issn = {1549-5469}, abstract = {The genomes of maize and other eukaryotes contain stable haplotypes in regions of low recombination. These regions, including centromeres, long heterochromatic blocks and rDNA arrays have been difficult to analyze with respect to their diversity and origin. Greatly improved genome assemblies are now available that enable comparative genomics over these and other non-genic spaces. Using 26 complete maize genomes, we developed methods to align intergenic sequences while excluding genes and regulatory regions. The centromere haplotypes (cenhaps) extend for megabases on either side of the functional centromere regions and appear as evolutionary strata, with haplotype divergence/coalescence times dating as far back as 450 thousand years ago (kya). Application of the same methods to other low recombination regions (heterochromatic knobs and rDNA) and all intergenic spaces revealed that deep coalescence times are ubiquitous across the maize pan-genome. Divergence estimates vary over a broad time scale with peaks at ~300 kya and 16 kya, reflecting a complex history of gene flow among diverging populations and changes in population size associated with domestication. Cenhaps and other long haplotypes provide vivid displays of this ancient diversity.}, }
@article {pmid36853054, year = {2023}, author = {Du, Y and Zou, J and Yin, Z and Chen, T}, title = {Pan-Chromosome and Comparative Analysis of Agrobacterium fabrum Reveal Important Traits Concerning the Genetic Diversity, Evolutionary Dynamics, and Niche Adaptation of the Species.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0292422}, doi = {10.1128/spectrum.02924-22}, pmid = {36853054}, issn = {2165-0497}, abstract = {Agrobacterium fabrum has been critical for the development of plant genetic engineering and agricultural biotechnology due to its ability to transform eukaryotic cells. However, the gene composition, evolutionary dynamics, and niche adaptation of this species is still unknown. Therefore, we established a comparative genomic analysis based on a pan-chromosome data set to evaluate the genetic diversity of A. fabrum. Here, 25 A. fabrum genomes were selected for analysis by core genome phylogeny combined with the average nucleotide identity (ANI), amino acid identity (AAI), and in silico DNA-DNA hybridization (DDH) values. An open pan-genome of A. fabrum exhibits genetic diversity with variable accessorial genes as evidenced by a consensus pan-genome of 12 representative genomes. The genomic plasticity of A. fabrum is apparent in its putative sequences for mobile genetic elements (MGEs), limited horizontal gene transfer barriers, and potentially horizontally transferred genes. The evolutionary constraints and functional enrichment in the pan-chromosome were measured by the Clusters of Orthologous Groups (COG) categories using eggNOG-mapper software, and the nonsynonymous/synonymous rate ratio (dN/dS) was determined using HYPHY software. Comparative analysis revealed significant differences in the functional enrichment and the degree of purifying selection between the core genome and non-core genome. We demonstrate that the core gene families undergo stronger purifying selection but have a significant bias to contain one or more positively selected sites. Furthermore, although they shared similar genetic diversity, we observed significant differences between chromosome 1 (Chr I) and the chromid in their functional features and evolutionary constraints. We demonstrate that putative genetic elements responsible for plant infection, ecological adaptation, and speciation represent the core genome, highlighting their importance in the adaptation of A. fabrum to plant-related niches. Our pan-chromosome analysis of A. fabrum provides comprehensive insights into the genetic properties, evolutionary patterns, and niche adaptation of the species. IMPORTANCE Agrobacterium spp. live in diverse plant-associated niches such as soil, the rhizosphere, and vegetation, which are challenged by multiple stressors such as diverse energy sources, plant defenses, and microbial competition. They have evolved the ability to utilize diverse resources, escape plant defenses, and defeat competitors. However, the underlying genetic diversity and evolutionary dynamics of Agrobacterium spp. remain unexplored. We examined the phylogeny and pan-genome of A. fabrum to define intraspecies evolutionary relationships. Our results indicate an open pan-genome and numerous MGEs and horizontally transferred genes among A. fabrum genomes, reflecting the flexibility of the chromosomes and the potential for genetic exchange. Furthermore, we observed significant differences in the functional features and evolutionary constraints between the core and accessory genomes and between Chr I and the chromid, respectively.}, }
@article {pmid36852268, year = {2023}, author = {Jiang, YF and Wang, S and Wang, CL and Xu, RH and Wang, WW and Jiang, Y and Wang, MS and Jiang, L and Dai, LH and Wang, JR and Chu, XH and Zeng, YQ and Fang, LZ and Wu, DD and Zhang, Q and Ding, XD}, title = {Pangenome obtained by long-read sequencing of 11 genomes reveal hidden functional structural variants in pigs.}, journal = {iScience}, volume = {26}, number = {3}, pages = {106119}, pmid = {36852268}, issn = {2589-0042}, abstract = {Long-read sequencing (LRS) facilitates both the genome assembly and the discovery of structural variants (SVs). Here, we built a graph-based pig pangenome by incorporating 11 LRS genomes with an average of 94.01% BUSCO completeness score, revealing 206-Mb novel sequences. We discovered 183,352 nonredundant SVs (63% novel), representing 12.12% of the reference genome. By genotyping SVs in an additional 196 short-read sequencing samples, we identified thousands of population stratified SVs. Particularly, we detected 7,568 Tibetan specific SVs, some of which demonstrate significant population differentiation between Tibetan and low-altitude pigs, which might be associated with the high-altitude hypoxia adaptation in Tibetan pigs. Further integrating functional genomic data, the most promising candidate genes within the SVs that might contribute to the high-altitude hypoxia adaptation were discovered. Overall, our study generates a benchmark pangenome resource for illustrating the important roles of SVs in adaptive evolution, domestication, and genetic improvement of agronomic traits in pigs.}, }
@article {pmid36851839, year = {2023}, author = {Dallinger, HG and Löschenberger, F and Azrak, N and Ametz, C and Michel, S and Bürstmayr, H}, title = {Genome-wide association mapping for pre-harvest sprouting in European winter wheat detects novel resistance QTL, pleiotropic effects, and structural variation in multiple genomes.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20301}, doi = {10.1002/tpg2.20301}, pmid = {36851839}, issn = {1940-3372}, abstract = {Pre-harvest sprouting (PHS), germination of seeds before harvest, is a major problem in global wheat (Triticum aestivum L.) production, and leads to reduced bread-making quality in affected grain. Breeding for PHS resistance can prevent losses under adverse conditions. Selecting resistant lines in years lacking pre-harvest rain, requires challenging of plants in the field or in the laboratory or using genetic markers. Despite the availability of a wheat reference and pan-genome, linking markers, genes, allelic, and structural variation, a complete understanding of the mechanisms underlying various sources of PHS resistance is still lacking. Therefore, we challenged a population of European wheat varieties and breeding lines with PHS conditions and phenotyped them for PHS traits, grain quality, phenological and agronomic traits to conduct genome-wide association mapping. Furthermore, we compared these marker-trait associations to previously reported PHS loci and evaluated their usefulness for breeding. We found markers associated with PHS on all chromosomes, with strong evidence for novel quantitative trait locus/loci (QTL) on chromosome 1A and 5B. The QTL on chromosome 1A lacks pleiotropic effect, for the QTL on 5B we detected pleiotropic effects on phenology and grain quality. Multiple peaks on chromosome 4A co-located with the major resistance locus Phs-A1, for which two causal genes, TaPM19 and TaMKK3, have been proposed. Mapping markers and genes to the pan-genome and chromosomal alignments provide evidence for structural variation around this major PHS-resistance locus. Although PHS is controlled by many loci distributed across the wheat genome, Phs-A1 on chromosome 4A seems to be the most effective and widely deployed source of resistance, in European wheat varieties.}, }
@article {pmid36851180, year = {2023}, author = {Chandrasekar, SS and Kingstad-Bakke, BA and Wu, CW and Phanse, Y and Osorio, JE and Talaat, AM}, title = {A DNA Prime and MVA Boost Strategy Provides a Robust Immunity against Infectious Bronchitis Virus in Chickens.}, journal = {Vaccines}, volume = {11}, number = {2}, pages = {}, pmid = {36851180}, issn = {2076-393X}, abstract = {Infectious bronchitis (IB) is an acute respiratory disease of chickens caused by the avian coronavirus Infectious Bronchitis Virus (IBV). Modified Live Virus (MLV) vaccines used commercially can revert to virulence in the field, recombine with circulating serotypes, and cause tissue damage in vaccinated birds. Previously, we showed that a mucosal adjuvant system, QuilA-loaded Chitosan (QAC) nanoparticles encapsulating plasmid vaccine encoding for IBV nucleocapsid (N), is protective against IBV. Herein, we report a heterologous vaccination strategy against IBV, where QAC-encapsulated plasmid immunization is followed by Modified Vaccinia Ankara (MVA) immunization, both expressing the same IBV-N antigen. This strategy led to the initiation of robust T-cell responses. Birds immunized with the heterologous vaccine strategy had reduced clinical severity and >two-fold reduction in viral burden in lachrymal fluid and tracheal swabs post-challenge compared to priming and boosting with the MVA-vectored vaccine alone. The outcomes of this study indicate that the heterologous vaccine platform is more immunogenic and protective than a homologous MVA prime/boost vaccination strategy.}, }
@article {pmid36848567, year = {2023}, author = {Sierra-Patev, S and Min, B and Naranjo-Ortiz, M and Looney, B and Konkel, Z and Slot, JC and Sakamoto, Y and Steenwyk, JL and Rokas, A and Carro, J and Camarero, S and Ferreira, P and Molpeceres, G and Ruiz-Dueñas, FJ and Serrano, A and Henrissat, B and Drula, E and Hughes, KW and Mata, JL and Ishikawa, NK and Vargas-Isla, R and Ushijima, S and Smith, CA and Donoghue, J and Ahrendt, S and Andreopoulos, W and He, G and LaButti, K and Lipzen, A and Ng, V and Riley, R and Sandor, L and Barry, K and Martínez, AT and Xiao, Y and Gibbons, JG and Terashima, K and Grigoriev, IV and Hibbett, D}, title = {A global phylogenomic analysis of the shiitake genus Lentinula.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {10}, pages = {e2214076120}, doi = {10.1073/pnas.2214076120}, pmid = {36848567}, issn = {1091-6490}, abstract = {Lentinula is a broadly distributed group of fungi that contains the cultivated shiitake mushroom, L. edodes. We sequenced 24 genomes representing eight described species and several unnamed lineages of Lentinula from 15 countries on four continents. Lentinula comprises four major clades that arose in the Oligocene, three in the Americas and one in Asia-Australasia. To expand sampling of shiitake mushrooms, we assembled 60 genomes of L. edodes from China that were previously published as raw Illumina reads and added them to our dataset. Lentinula edodes sensu lato (s. lat.) contains three lineages that may warrant recognition as species, one including a single isolate from Nepal that is the sister group to the rest of L. edodes s. lat., a second with 20 cultivars and 12 wild isolates from China, Japan, Korea, and the Russian Far East, and a third with 28 wild isolates from China, Thailand, and Vietnam. Two additional lineages in China have arisen by hybridization among the second and third groups. Genes encoding cysteine sulfoxide lyase (lecsl) and γ-glutamyl transpeptidase (leggt), which are implicated in biosynthesis of the organosulfur flavor compound lenthionine, have diversified in Lentinula. Paralogs of both genes that are unique to Lentinula (lecsl 3 and leggt 5b) are coordinately up-regulated in fruiting bodies of L. edodes. The pangenome of L. edodes s. lat. contains 20,308 groups of orthologous genes, but only 6,438 orthogroups (32%) are shared among all strains, whereas 3,444 orthogroups (17%) are found only in wild populations, which should be targeted for conservation.}, }
@article {pmid36844929, year = {2023}, author = {Nielsen, FD and Møller-Jensen, J and Jørgensen, MG}, title = {Adding context to the pneumococcal core genes using bioinformatic analysis of the intergenic pangenome of Streptococcus pneumoniae.}, journal = {Frontiers in bioinformatics}, volume = {3}, number = {}, pages = {1074212}, pmid = {36844929}, issn = {2673-7647}, abstract = {Introduction: Whole genome sequencing offers great opportunities for linking genotypes to phenotypes aiding in our understanding of human disease and bacterial pathogenicity. However, these analyses often overlook non-coding intergenic regions (IGRs). By disregarding the IGRs, crucial information is lost, as genes have little biological function without expression. Methods/Results: In this study, we present the first complete pangenome of the important human pathogen Streptococcus pneumoniae (pneumococcus), spanning both the genes and IGRs. We show that the pneumococcus species retains a small core genome of IGRs that are present across all isolates. Gene expression is highly dependent on these core IGRs, and often several copies of these core IGRs are found across each genome. Core genes and core IGRs show a clear linkage as 81% of core genes are associated with core IGRs. Additionally, we identify a single IGR within the core genome that is always occupied by one of two highly distinct sequences, scattered across the phylogenetic tree. Discussion: Their distribution indicates that this IGR is transferred between isolates through horizontal regulatory transfer independent of the flanking genes and that each type likely serves different regulatory roles depending on their genetic context.}, }
@article {pmid36838392, year = {2023}, author = {Sugrue, I and Hill, D and O'Connor, PM and Day, L and Stanton, C and Hill, C and Ross, RP}, title = {Nisin E Is a Novel Nisin Variant Produced by Multiple Streptococcus equinus Strains.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, doi = {10.3390/microorganisms11020427}, pmid = {36838392}, issn = {2076-2607}, support = {SFI/12/RC/2273//Science Foundation Ireland/Ireland ; }, abstract = {Nisin A, the prototypical lantibiotic, is an antimicrobial peptide currently utilised as a food preservative, with potential for therapeutic applications. Here, we describe nisin E, a novel nisin variant produced by two Streptococcus equinus strains, APC4007 and APC4008, isolated from sheep milk. Shotgun whole genome sequencing and analysis revealed biosynthetic gene clusters similar to nisin U, with a unique rearrangement of the core peptide encoding gene within the cluster. The 3100.8 Da peptide by MALDI-TOF mass spectrometry, is 75% identical to nisin A, with 10 differences, including 2 deletions: Ser29 and Ile30, and 8 substitutions: Ile4Lys, Gly18Thr, Asn20Pro, Met21Ile, His27Gly, Val32Phe, Ser33Gly, and Lys34Asn. Nisin E producing strains inhibited species of Lactobacillus, Bacillus, and Clostridiodes and were immune to nisin U. Sequence alignment identified putative promoter sequences across the nisin producer genera, allowing for the prediction of genes in Streptococcus to be potentially regulated by nisin. S. equinus pangenome BLAST analyses detected 6 nisin E operons across 44 publicly available genomes. An additional 20 genomes contained a subset of nisin E transport/immunity and regulatory genes (nseFEGRK), without adjacent peptide production genes. These genes suggest that nisin E response mechanisms, distinct from the canonical nisin immunity and resistance operons, are widespread across the S. equinus species. The discovery of this new nisin variant and its immunity determinants in S. equinus suggests a central role for nisin in the competitive nature of the species.}, }
@article {pmid36838372, year = {2023}, author = {Jiang, S and Fan, Q and Zhang, Z and Deng, Y and Wang, L and Dai, Q and Wang, J and Lin, M and Zhou, J and Long, Z and He, G and Zhou, Z}, title = {Biodegradation of Oil by a Newly Isolated Strain Acinetobacter junii WCO-9 and Its Comparative Pan-Genome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, doi = {10.3390/microorganisms11020407}, pmid = {36838372}, issn = {2076-2607}, abstract = {Waste oil pollution and the treatment of oily waste present a challenge, and the exploitation of microbial resources is a safe and efficient method to resolve these problems. Lipase-producing microorganisms can directly degrade waste oil and promote the degradation of oily waste and, therefore, have very significant research and application value. The isolation of efficient oil-degrading strains is of great practical significance in research into microbial remediation in oil-contaminated environments and for the enrichment of the microbial lipase resource library. In this study, Acinetobacter junii WCO-9, an efficient oil-degrading bacterium, was isolated from an oil-contaminated soil using olive oil as the sole carbon source, and its enzyme activity of ρ-nitrophenyl decanoate (ρ-NPD) decomposition was 3000 U/L. The WCO-9 strain could degrade a variety of edible oils, and its degradation capability was significantly better than that of the control strain, A junii ATCC 17908. Comparative pan-genome and lipid degradation pathway analyses indicated that A. junii isolated from the same environment shared a similar set of core genes and that the species accumulated more specific genes that facilitated resistance to environmental stresses under different environmental conditions. WCO-9 has accumulated a complete set of oil metabolism genes under a long-term oil-contamination environment, and the compact arrangement of abundant lipase and lipase chaperones has further strengthened the ability of the strain to survive in such environments. This is the main reason why WCO-9 is able to degrade oil significantly more effectively than ATCC 17908. In addition, WCO-9 possesses a specific lipase that is not found in homologous strains. In summary, A. junii WCO-9, with a complete triglyceride degradation pathway and the specific lipase gene, has great potential in environmental remediation and lipase for industry.}, }
@article {pmid36838305, year = {2023}, author = {Gonçalves-Oliveira, J and Gutierrez, R and Schlesener, CL and Jaffe, DA and Aguilar-Setién, A and Boulouis, HJ and Nachum-Biala, Y and Huang, BC and Weimer, BC and Chomel, BB and Harrus, S}, title = {Genomic Characterization of Three Novel Bartonella Strains in a Rodent and Two Bat Species from Mexico.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, doi = {10.3390/microorganisms11020340}, pmid = {36838305}, issn = {2076-2607}, abstract = {Rodents and bats are the most diverse mammal group that host Bartonella species. In the Americas, they were described as harboring Bartonella species; however, they were mostly characterized to the genotypic level. We describe here Bartonella isolates obtained from blood samples of one rodent (Peromyscus yucatanicus from San José Pibtuch, Yucatan) and two bat species (Desmodus rotundus from Progreso, and Pteronotus parnellii from Chamela-Cuitzmala) from Mexico. We sequenced and described the genomic features of three Bartonella strains and performed phylogenomic and pangenome analyses to decipher their phylogenetic relationships. The mouse-associated genome was closely related to Bartonella vinsonii. The two bat-associated genomes clustered into a single distinct clade in between lineages 3 and 4, suggesting to be an ancestor of the rodent-associated Bartonella clade (lineage 4). These three genomes showed <95% OrthoANI values compared to any other Bartonella genome, and therefore should be considered as novel species. In addition, our analyses suggest that the B. vinsonii complex should be revised, and all B. vinsonii subspecies need to be renamed and considered as full species. The phylogenomic clustering of the bat-associated Bartonella strains and their virulence factor profile (lack of the Vbh/TraG conjugation system remains of the T4SS) suggest that it should be considered as a new lineage clade (L5) within the Bartonella genus.}, }
@article {pmid36838222, year = {2023}, author = {Mughal, SR and Niazi, SA and Do, T and Gilbert, SC and Didelot, X and Radford, DR and Beighton, D}, title = {Genomic Diversity among Actinomyces naeslundii Strains and Closely Related Species.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, doi = {10.3390/microorganisms11020254}, pmid = {36838222}, issn = {2076-2607}, abstract = {The aim of this study was to investigate and clarify the ambiguous taxonomy of Actinomyces naeslundii and its closely related species using state-of-the-art high-throughput sequencing techniques, and, furthermore, to determine whether sub-clusters identified within Actinomyces oris and Actinomyces naeslundii in a previous study by multi locus sequence typing (MLST) using concatenation of seven housekeeping genes should either be classified as subspecies or distinct species. The strains in this study were broadly classified under Actinomyces naeslundii group as A. naeslundii genospecies I and genospecies II. Based on MLST data analysis, these were further classified as A. oris and A. naeslundii. The whole genome sequencing of selected strains of A. oris (n = 17) and A. naeslundii (n = 19) was carried out using Illumina Genome Analyzer IIxe and Roche 454 allowing paired-end and single-reads sequencing, respectively. The sequences obtained were aligned using CLC Genomic workbench version 5.1 and annotated using RAST (Rapid Annotation using Subsystem Technology) release version 59 accessible online. Additionally, genomes of seven publicly available strains of Actinomyces (k20, MG1, c505, OT175, OT171, OT170, and A. johnsonii) were also included. Comparative genomic analysis (CGA) using Mauve, Progressive Mauve, gene-by-gene, Core, and Pan Genome, and finally Digital DNA-DNA homology (DDH) analysis was carried out. DDH values were obtained using in silico genome-genome comparison. Evolutionary analysis using ClonalFrame was also undertaken. The mutation and recombination events were compared using chi-square test among A. oris and A. naeslundii isolates (analysis methods are not included in the study). CGA results were consistent with previous traditional classification using MLST. It was found that strains of Actinomyces k20, MG1, c505, and OT175 clustered in A. oris group of isolates, while OT171, OT170, and A. johnsonii appeared as separate branches. Similar clustering to MLST was observed for other isolates. The mutation and recombination events were significantly higher in A. oris than A. naeslundii, highlighting the diversity of A. oris strains in the oral cavity. These findings suggest that A. oris forms six distinct groups, whereas A. naeslundii forms three. The correct designation of isolates will help in the identification of clinical Actinomyces isolates found in dental plaque. Easily accessible online genomic sequence data will also accelerate the investigation of the biochemical characterisation and pathogenesis of this important group of micro-organisms.}, }
@article {pmid36836896, year = {2023}, author = {Jalal, K and Khan, K and Hayat, A and Alnasser, SM and Meshal, A and Basharat, Z}, title = {Pan-Genomics of Escherichia albertii for Antibiotic Resistance Profiling in Different Genome Fractions and Natural Product Mediated Intervention: In Silico Approach.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {2}, pages = {}, doi = {10.3390/life13020541}, pmid = {36836896}, issn = {2075-1729}, abstract = {Escherichia albertii is an emerging, enteric pathogen of significance. It was first isolated in 2003 from a pediatric diarrheal sample from Bangladesh. In this study, a comprehensive in silico strategy was followed to first list out antibiotic-resistant genes from core, accessory and unique genome fractions of 95 available genomes of E. albertii. Then, 56 drug targets were identified from the core essential genome. Finally, ZipA, an essential cell division protein that stabilizes the FtsZ protofilaments by cross-linking them and serves as a cytoplasmic membrane anchor for the Z ring, was selected for further downstream processing. It was computationally modeled using a threading approach, followed by virtual screening of two phytochemical libraries, Ayurvedic (n = 2103 compounds) and Traditional Chinese Medicine (n = 36,043 compounds). ADMET profiling, followed by PBPK modeling in the central body compartment, in a population of 250 non-diseased, 250 cirrhotic and 250 renally impaired people was attempted. ZINC85624912 from Chinese medicinal library showed the highest bioavailability and plasma retention. This is the first attempt to simulate the fate of natural products in the body through PBPK. Dynamics simulation of 20 ns for the top three compounds from both libraries was also performed to validate the stability of the compounds. The obtained information from the current study could aid wet-lab scientists to work on the scaffold of screened drug-like compounds from natural resources and could be useful in our quest for therapy against antibiotic-resistant E. albertii.}, }
@article {pmid36835570, year = {2023}, author = {Balabanova, L and Nedashkovskaya, O and Otstavnykh, N and Isaeva, M and Kolpakova, O and Pentehina, I and Seitkalieva, A and Noskova, Y and Stepochkina, V and Son, O and Tekutyeva, L}, title = {Computational Insight into Intraspecies Distinctions in Pseudoalteromonas distincta: Carotenoid-like Synthesis Traits and Genomic Heterogeneity.}, journal = {International journal of molecular sciences}, volume = {24}, number = {4}, pages = {}, doi = {10.3390/ijms24044158}, pmid = {36835570}, issn = {1422-0067}, abstract = {Advances in the computational annotation of genomes and the predictive potential of current metabolic models, based on more than thousands of experimental phenotypes, allow them to be applied to identify the diversity of metabolic pathways at the level of ecophysiology differentiation within taxa and to predict phenotypes, secondary metabolites, host-associated interactions, survivability, and biochemical productivity under proposed environmental conditions. The significantly distinctive phenotypes of members of the marine bacterial species Pseudoalteromonas distincta and an inability to use common molecular markers make their identification within the genus Pseudoalteromonas and prediction of their biotechnology potential impossible without genome-scale analysis and metabolic reconstruction. A new strain, KMM 6257, of a carotenoid-like phenotype, isolated from a deep-habituating starfish, emended the description of P. distincta, particularly in the temperature growth range from 4 to 37 °C. The taxonomic status of all available closely related species was elucidated by phylogenomics. P. distincta possesses putative methylerythritol phosphate pathway II and 4,4'-diapolycopenedioate biosynthesis, related to C30 carotenoids, and their functional analogues, aryl polyene biosynthetic gene clusters (BGC). However, the yellow-orange pigmentation phenotypes in some strains coincide with the presence of a hybrid BGC encoding for aryl polyene esterified with resorcinol. The alginate degradation and glycosylated immunosuppressant production, similar to brasilicardin, streptorubin, and nucleocidines, are the common predicted features. Starch, agar, carrageenan, xylose, lignin-derived compound degradation, polysaccharide, folate, and cobalamin biosynthesis are all strain-specific.}, }
@article {pmid36834516, year = {2023}, author = {Li, H and Tahir Ul Qamar, M and Yang, L and Liang, J and You, J and Wang, L}, title = {Current Progress, Applications and Challenges of Multi-Omics Approaches in Sesame Genetic Improvement.}, journal = {International journal of molecular sciences}, volume = {24}, number = {4}, pages = {}, doi = {10.3390/ijms24043105}, pmid = {36834516}, issn = {1422-0067}, abstract = {Sesame is one of the important traditional oil crops in the world, and has high economic and nutritional value. Recently, due to the novel high throughput sequencing techniques and bioinformatical methods, the study of the genomics, methylomics, transcriptomics, proteomics and metabonomics of sesame has developed rapidly. Thus far, the genomes of five sesame accessions have been released, including white and black seed sesame. The genome studies reveal the function and structure of the sesame genome, and facilitate the exploitation of molecular markers, the construction of genetic maps and the study of pan-genomes. Methylomics focus on the study of the molecular level changes under different environmental conditions. Transcriptomics provide a powerful tool to study abiotic/biotic stress, organ development, and noncoding RNAs, and proteomics and metabonomics also provide some support in studying abiotic stress and important traits. In addition, the opportunities and challenges of multi-omics in sesame genetics breeding were also described. This review summarizes the current research status of sesame from the perspectives of multi-omics and hopes to provide help for further in-depth research on sesame.}, }
@article {pmid36833201, year = {2023}, author = {Liu, S and Jiao, J and Tian, CF}, title = {Adaptive Evolution of Rhizobial Symbiosis beyond Horizontal Gene Transfer: From Genome Innovation to Regulation Reconstruction.}, journal = {Genes}, volume = {14}, number = {2}, pages = {}, doi = {10.3390/genes14020274}, pmid = {36833201}, issn = {2073-4425}, abstract = {There are ubiquitous variations in symbiotic performance of different rhizobial strains associated with the same legume host in agricultural practices. This is due to polymorphisms of symbiosis genes and/or largely unexplored variations in integration efficiency of symbiotic function. Here, we reviewed cumulative evidence on integration mechanisms of symbiosis genes. Experimental evolution, in concert with reverse genetic studies based on pangenomics, suggests that gain of the same circuit of key symbiosis genes through horizontal gene transfer is necessary but sometimes insufficient for bacteria to establish an effective symbiosis with legumes. An intact genomic background of the recipient may not support the proper expression or functioning of newly acquired key symbiosis genes. Further adaptive evolution, through genome innovation and reconstruction of regulation networks, may confer the recipient of nascent nodulation and nitrogen fixation ability. Other accessory genes, either co-transferred with key symbiosis genes or stochastically transferred, may provide the recipient with additional adaptability in ever-fluctuating host and soil niches. Successful integrations of these accessory genes with the rewired core network, regarding both symbiotic and edaphic fitness, can optimize symbiotic efficiency in various natural and agricultural ecosystems. This progress also sheds light on the development of elite rhizobial inoculants using synthetic biology procedures.}, }
@article {pmid36831244, year = {2023}, author = {Apicella, C and Ruano, CSM and Thilaganathan, B and Khalil, A and Giorgione, V and Gascoin, G and Marcellin, L and Gaspar, C and Jacques, S and Murdoch, CE and Miralles, F and Méhats, C and Vaiman, D}, title = {Pan-Genomic Regulation of Gene Expression in Normal and Pathological Human Placentas.}, journal = {Cells}, volume = {12}, number = {4}, pages = {}, doi = {10.3390/cells12040578}, pmid = {36831244}, issn = {2073-4409}, abstract = {In this study, we attempted to find genetic variants affecting gene expression (eQTL = expression Quantitative Trait Loci) in the human placenta in normal and pathological situations. The analysis of gene expression in placental diseases (Pre-eclampsia and Intra-Uterine Growth Restriction) is hindered by the fact that diseased placental tissue samples are generally taken at earlier gestations compared to control samples. The difference in gestational age is considered a major confounding factor in the transcriptome regulation of the placenta. To alleviate this significant problem, we propose here a novel approach to pinpoint disease-specific cis-eQTLs. By statistical correction for gestational age at sampling as well as other confounding/surrogate variables systematically searched and identified, we found 43 e-genes for which proximal SNPs influence expression level. Then, we performed the analysis again, removing the disease status from the covariates, and we identified 54 e-genes, 16 of which are identified de novo and, thus, possibly related to placental disease. We found a highly significant overlap with previous studies for the list of 43 e-genes, validating our methodology and findings. Among the 16 disease-specific e-genes, several are intrinsic to trophoblast biology and, therefore, constitute novel targets of interest to better characterize placental pathology and its varied clinical consequences. The approach that we used may also be applied to the study of other human diseases where confounding factors have hampered a better understanding of the pathology.}, }
@article {pmid36830307, year = {2023}, author = {Liu, H and Liu, X and He, J and Zhang, L and Zhao, F and Zhou, Z and Hua, X and Yu, Y}, title = {Emergence and Evolution of OXA-23-Producing ST46Pas-ST462Oxf-KL28-OCL1 Carbapenem-Resistant Acinetobacter baumannii Mediated by a Novel ISAba1-Based Tn7534 Transposon.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {2}, pages = {}, doi = {10.3390/antibiotics12020396}, pmid = {36830307}, issn = {2079-6382}, abstract = {Carbapenem-resistant Acinetobacter baumannii (CRAB) isolates of global clone 1 (GC1) and global clone 2 (GC2) have been widely reported. Nevertheless, non-GC1 and non-GC2 CRAB strains have been studied less. In particular, no reports concerning sequence type 46 (ST46Pas) CRAB strains have been described thus far. In this work, the genomic features and possible evolution mechanism of ST46Pas OXA-23-producing CRAB isolates from clinical specimens are reported for the first time. Antimicrobial susceptibility testing of three ST46Pas strains revealed identical resistance profiles (resistance to imipenem, meropenem, ciprofloxacin and the combination of cefoperazone/sulbactam at a 2:1 ratio). They were found to belong to ST46Pas and ST462Oxf with capsular polysaccharide 28 (KL28) and lipooligosaccharide 1 (OCL1), respectively. Whole-genome sequencing (WGS) revealed that all contained one copy of chromosomal blaOXA-23, which was located in a novel ISAba1-based Tn7534 composite transposon. In particular, another copy of the Tn7534 composite transposon was identified in an Hgz_103-type plasmid with 9 bp target site duplications (TSDs, ACAACATGC) in the A. baumannii ZHOU strain. As the strains originated from two neighboring intensive care units (ICUs), ST46Pas OXA-23-producing CRAB strains may have evolved via transposition events or a pdif module. Based on the GenBank database, ST46Pas strains were collected from various sources; however, most were collected in Hangzhou (China) from 2014 to 2021. Pan-genome analysis revealed 3276 core genes, 0 soft-core genes, 768 shell genes and 443 cloud genes shared among all ST46Pas strains. In conclusion, the emergence of ST46Pas CRAB strains might present a new threat to healthcare settings; therefore, effective surveillance is required to prevent further dissemination.}, }
@article {pmid36828537, year = {2023}, author = {Parker, K and Wood, H and Russell, JA and Yarmosh, D and Shteyman, A and Bagnoli, J and Knight, B and Aspinwall, JR and Jacobs, J and Werking, K and Winegar, R}, title = {Development and Optimization of an Unbiased, Metagenomics-Based Pathogen Detection Workflow for Infectious Disease and Biosurveillance Applications.}, journal = {Tropical medicine and infectious disease}, volume = {8}, number = {2}, pages = {}, doi = {10.3390/tropicalmed8020121}, pmid = {36828537}, issn = {2414-6366}, abstract = {Rapid, specific, and sensitive identification of microbial pathogens is critical to infectious disease diagnosis and surveillance. Classical culture-based methods can be applied to a broad range of pathogens but have long turnaround times. Molecular methods, such as PCR, are time-effective but are not comprehensive and may not detect novel strains. Metagenomic shotgun next-generation sequencing (NGS) promises specific identification and characterization of any pathogen (viruses, bacteria, fungi, and protozoa) in a less biased way. Despite its great potential, NGS has yet to be widely adopted by clinical microbiology laboratories due in part to the absence of standardized workflows. Here, we describe a sample-to-answer workflow called PanGIA (Pan-Genomics for Infectious Agents) that includes simplified, standardized wet-lab procedures and data analysis with an easy-to-use bioinformatics tool. PanGIA is an end-to-end, multi-use workflow that can be used for pathogen detection and related applications, such as biosurveillance and biothreat detection. We performed a comprehensive survey and assessment of current, commercially available wet-lab technologies and open-source bioinformatics tools for each workflow component. The workflow includes total nucleic acid extraction from clinical human whole blood and environmental microbial forensic swabs as sample inputs, host nucleic acid depletion, dual DNA and RNA library preparation, shotgun sequencing on an Illumina MiSeq, and sequencing data analysis. The PanGIA workflow can be completed within 24 h and is currently compatible with bacteria and viruses. Here, we present data from the development and application of the clinical and environmental workflows, enabling the specific detection of pathogens associated with bloodstream infections and environmental biosurveillance, without the need for targeted assay development.}, }
@article {pmid36824763, year = {2023}, author = {Joubert, PM and Krasileva, KV}, title = {Distinct genomic contexts predict gene presence-absence variation in different pathotypes of a fungal plant pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.02.17.529015}, pmid = {36824763}, abstract = {BACKGROUND: Fungi use the accessory segments of their pan-genomes to adapt to their environments. While gene presence-absence variation (PAV) contributes to shaping these accessory gene reservoirs, whether these events happen in specific genomic contexts remains unclear. Additionally, since pan-genome studies often group together all members of the same species, it is uncertain whether genomic or epigenomic features shaping pan-genome evolution are consistent across populations within the same species. Fungal plant pathogens are useful models for answering these questions because members of the same species often infect distinct hosts, and they frequently rely on gene PAV to adapt to these hosts.
RESULTS: We analyzed gene PAV in the rice and wheat blast fungus, Magnaporthe oryzae , and found that PAV of disease-causing effectors, antibiotic production, and non-self-recognition genes may drive the adaptation of the fungus to its environment. We then analyzed genomic and epigenomic features and data from available datasets for patterns that might help explain these PAV events. We observed that proximity to transposable elements (TEs), gene GC content, gene length, expression level in the host, and histone H3K27me3 marks were different between PAV genes and conserved genes, among other features. We used these features to construct a random forest classifier that was able to predict whether a gene is likely to experience PAV with high precision (86.06%) and recall (92.88%) in rice-infecting M. oryzae . Finally, we found that PAV in wheat- and rice-infecting pathotypes of M. oryzae differed in their number and their genomic context.
CONCLUSIONS: Our results suggest that genomic and epigenomic features of gene PAV can be used to better understand and even predict fungal pan-genome evolution. We also show that substantial intra-species variation can exist in these features.}, }
@article {pmid36824272, year = {2023}, author = {Gao, Y and Xu, J and Li, Z and Zhang, Y and Riera, N and Xiong, Z and Ouyang, Z and Liu, X and Lu, Z and Seymour, D and Zhong, B and Wang, N}, title = {Citrus genomic resources unravel putative genetic determinants of Huanglongbing pathogenicity.}, journal = {iScience}, volume = {26}, number = {2}, pages = {106024}, doi = {10.1016/j.isci.2023.106024}, pmid = {36824272}, issn = {2589-0042}, abstract = {Citrus HLB caused by Candidatus Liberibacter asiaticus is a pathogen-triggered immune disease. Here, we identified putative genetic determinants of HLB pathogenicity by integrating citrus genomic resources to characterize the pan-genome of accessions that differ in their response to HLB. Genome-wide association mapping and analysis of allele-specific expression between susceptible, tolerant, and resistant accessions further refined candidates underlying the response to HLB. We first developed a phased diploid assembly of Citrus sinensis 'Newhall' genome and produced resequencing data for 91 citrus accessions that differ in their response to HLB. These data were combined with previous resequencing data from 356 accessions for genome-wide association mapping of the HLB response. Genes determinants for HLB pathogenicity were associated with host immune response, ROS production, and antioxidants. Overall, this study has provided a significant resource of citrus genomic data and identified candidate genes to be further explored to understand the genetic determinants of HLB pathogenicity.}, }
@article {pmid36823453, year = {2023}, author = {Webb, EA and Held, NA and Zhao, Y and Graham, ED and Conover, AE and Semones, J and Lee, MD and Feng, Y and Fu, FX and Saito, MA and Hutchins, DA}, title = {Importance of mobile genetic element immunity in numerically abundant Trichodesmium clades.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {15}, pmid = {36823453}, issn = {2730-6151}, abstract = {The colony-forming cyanobacteria Trichodesmium spp. are considered one of the most important nitrogen-fixing genera in the warm, low nutrient ocean. Despite this central biogeochemical role, many questions about their evolution, physiology, and trophic interactions remain unanswered. To address these questions, we describe Trichodesmium pangenomic potential via significantly improved genomic assemblies from two isolates and 15 new >50% complete Trichodesmium metagenome-assembled genomes from hand-picked, Trichodesmium colonies spanning the Atlantic Ocean. Phylogenomics identified ~four N2 fixing clades of Trichodesmium across the transect, with T. thiebautii dominating the colony-specific reads. Pangenomic analyses showed that all T. thiebautii MAGs are enriched in COG defense mechanisms and encode a vertically inherited Type III-B Clustered Regularly Interspaced Short Palindromic Repeats and associated protein-based immunity system (CRISPR-Cas). Surprisingly, this CRISPR-Cas system was absent in all T. erythraeum genomes, vertically inherited by T. thiebautii, and correlated with increased signatures of horizontal gene transfer. Additionally, the system was expressed in metaproteomic and transcriptomic datasets and CRISPR spacer sequences with 100% identical hits to field-assembled, putative phage genome fragments were identified. While the currently CO2-limited T. erythraeum is expected to be a 'winner' of anthropogenic climate change, their genomic dearth of known phage resistance mechanisms, compared to T. thiebautii, could put this outcome in question. Thus, the clear demarcation of T. thiebautii maintaining CRISPR-Cas systems, while T. erythraeum does not, identifies Trichodesmium as an ecologically important CRISPR-Cas model system, and highlights the need for more research on phage-Trichodesmium interactions.}, }
@article {pmid36824593, year = {2020}, author = {Braich, S and Baillie, RC and Spangenberg, GC and Cogan, NOI}, title = {A new and improved genome sequence of Cannabis sativa.}, journal = {GigaByte (Hong Kong, China)}, volume = {2020}, number = {}, pages = {gigabyte10}, doi = {10.46471/gigabyte.10}, pmid = {36824593}, issn = {2709-4715}, abstract = {Cannabis is a diploid species (2n = 20), the estimated haploid genome sizes of the female and male plants using flow cytometry are 818 and 843 Mb respectively. Although the genome of Cannabis has been sequenced (from hemp, wild and high-THC strains), all assemblies have significant gaps. In addition, there are inconsistencies in the chromosome numbering which limits their use. A new comprehensive draft genome sequence assembly (∼900 Mb) has been generated from the medicinal cannabis strain Cannbio-2, that produces a balanced ratio of cannabidiol and delta-9-tetrahydrocannabinol using long-read sequencing. The assembly was subsequently analysed for completeness by ordering the contigs into chromosome-scale pseudomolecules using a reference genome assembly approach, annotated and compared to other existing reference genome assemblies. The Cannbio-2 genome sequence assembly was found to be the most complete genome sequence available based on nucleotides assembled and BUSCO evaluation in Cannabis sativa with a comprehensive genome annotation. The new draft genome sequence is an advancement in Cannabis genomics permitting pan-genome analysis, genomic selection as well as genome editing.}, }
@article {pmid36819029, year = {2023}, author = {Liu, B and Ren, YS and Su, CY and Abe, Y and Zhu, DH}, title = {Pangenomic analysis of Wolbachia provides insight into the evolution of host adaptation and cytoplasmic incompatibility factor genes.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1084839}, pmid = {36819029}, issn = {1664-302X}, abstract = {INTRODUCTION: The genus Wolbachia provides a typical example of intracellular bacteria that infect the germline of arthropods and filarial nematodes worldwide. Their importance as biological regulators of invertebrates, so it is particularly important to study the evolution, divergence and host adaptation of these bacteria at the genome-wide level.
METHODS: Here, we used publicly available Wolbachia genomes to reconstruct their evolutionary history and explore their adaptation under host selection.
RESULTS: Our findings indicate that segmental and single-gene duplications, such as DNA methylase, bZIP transcription factor, heat shock protein 90, in single monophyletic Wolbachia lineages (including supergroups A and B) may be responsible for improving the ability to adapt to a broad host range in arthropod-infecting strains. In contrast to A strains, high genetic diversity and rapidly evolving gene families occur in B strains, which may promote the ability of supergroup B strains to adapt to new hosts and their large-scale spreading. In addition, we hypothesize that there might have been two independent horizontal transfer events of cif genes in two sublineages of supergroup A strains. Interestingly, during the independent evolution of supergroup A and B strains, the rapid evolution of cif genes in supergroup B strains resulted in the loss of their functional domain, reflected in a possible decrease in the proportion of induced cytoplasmic incompatibility (CI) strains.
DISCUSSION: This present study highlights for reconstructing of evolutionary history, addressing host adaptation-related evolution and exploring the origin and divergence of CI genes in each Wolbachia supergroup. Our results thus not only provide a basis for further exploring the evolutionary history of Wolbachia adaptation under host selection but also reveal a new research direction for studying the molecular regulation of Wolbachia- induced cytoplasmic incompatibility.}, }
@article {pmid36817109, year = {2022}, author = {Dereeper, A and Allouch, N and Guerlais, V and Garnier, M and Ma, L and De Jonckheere, JF and Joseph, SJ and Ali, IKM and Talarmin, A and Marcelino, I}, title = {Naegleria genus pangenome reveals new structural and functional insights into the versatility of these free-living amoebae.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1056418}, pmid = {36817109}, issn = {1664-302X}, abstract = {INTRODUCTION: Free-living amoebae of the Naegleria genus belong to the major protist clade Heterolobosea and are ubiquitously distributed in soil and freshwater habitats. Of the 47 Naegleria species described, N. fowleri is the only one being pathogenic to humans, causing a rare but fulminant primary amoebic meningoencephalitis. Some Naegleria genome sequences are publicly available, but the genetic basis for Naegleria diversity and ability to thrive in diverse environments (including human brain) remains unclear.
METHODS: Herein, we constructed a high-quality Naegleria genus pangenome to obtain a comprehensive catalog of genes encoded by these amoebae. For this, we first sequenced, assembled, and annotated six new Naegleria genomes.
RESULTS AND DISCUSSION: Genome architecture analyses revealed that Naegleria may use genome plasticity features such as ploidy/aneuploidy to modulate their behavior in different environments. When comparing 14 near-to-complete genome sequences, our results estimated the theoretical Naegleria pangenome as a closed genome, with 13,943 genes, including 3,563 core and 10,380 accessory genes. The functional annotations revealed that a large fraction of Naegleria genes show significant sequence similarity with those already described in other kingdoms, namely Animalia and Plantae. Comparative analyses highlighted a remarkable genomic heterogeneity, even for closely related strains and demonstrate that Naegleria harbors extensive genome variability, reflected in different metabolic repertoires. If Naegleria core genome was enriched in conserved genes essential for metabolic, regulatory and survival processes, the accessory genome revealed the presence of genes involved in stress response, macromolecule modifications, cell signaling and immune response. Commonly reported N. fowleri virulence-associated genes were present in both core and accessory genomes, suggesting that N. fowleri's ability to infect human brain could be related to its unique species-specific genes (mostly of unknown function) and/or to differential gene expression. The construction of Naegleria first pangenome allowed us to move away from a single reference genome (that does not necessarily represent each species as a whole) and to identify essential and dispensable genes in Naegleria evolution, diversity and biology, paving the way for further genomic and post-genomic studies.}, }
@article {pmid36815495, year = {2023}, author = {Favaro, L and Campanaro, S and Fugaban, JII and Treu, L and Jung, ES and d'Ovidio, L and de Oliveira, DP and Liong, MT and Ivanova, IV and Todorov, SD}, title = {Genomic, metabolomic, and functional characterisation of beneficial properties of Pediococcus pentosaceus ST58, isolated from human oral cavity.}, journal = {Beneficial microbes}, volume = {}, number = {}, pages = {1-16}, doi = {10.3920/BM2022.0067}, pmid = {36815495}, issn = {1876-2891}, abstract = {Bacteriocins produced by lactic acid bacteria are proteinaceous antibacterial metabolites that normally exhibit bactericidal or bacteriostatic activity against genetically closely related bacteria. In this work, the bacteriocinogenic potential of Pediococcus pentosaceus strain ST58, isolated from oral cavity of a healthy volunteer was evaluated. To better understand the biological role of this strain, its technological and safety traits were deeply investigated through a combined approach considering physiological, metabolomic and genomic properties. Three out of 14 colonies generating inhibition zones were confirmed to be bacteriocin producers and, according to repPCR and RAPD-PCR, differentiation assays, and 16S rRNA sequencing it was confirmed to be replicates of the same strain, identified as P. pentosaceus, named ST58. Based on multiple isolation of the same strain (P. pentosaceus ST58) over the 26 weeks in screening process for the potential bacteriocinogenic strains from the oral cavity of the same volunteer, strain ST58 can be considered a persistent component of oral cavity microbiota. Genomic analysis of P. pentosaceus ST58 revealed the presence of operons encoding for bacteriocins pediocin PA-1 and penocin A. The produced bacteriocin(s) inhibited the growth of Listeria monocytogenes, Enterococcus spp. and some Lactobacillus spp. used to determine the activity spectrum. The highest levels of production (6400 AU/ml) were recorded against L. monocytogenes strains after 24 h of incubation and the antimicrobial activity was inhibited after treatment of the cell-free supernatants with proteolytic enzymes. Noteworthy, P. pentosaceus ST58 also presented antifungal activity and key metabolites potentially involved in these properties were identified. Overall, this strain can be of great biotechnological interest towards the development of effective bio-preservation cultures as well as potential health promoting microbes.}, }
@article {pmid36814455, year = {2023}, author = {Christine, TD and Clothilde, C and Mathieu, B and Laurence, A and Valentin, K and Cédric, M and Wing Rod, A and Yves, V and Francois, S}, title = {FrangiPANe, a tool for creating a panreference using left behind reads.}, journal = {NAR genomics and bioinformatics}, volume = {5}, number = {1}, pages = {lqad013}, pmid = {36814455}, issn = {2631-9268}, abstract = {We present here FrangiPANe, a pipeline developed to build panreference using short reads through a map-then-assemble strategy. Applying it to 248 African rice genomes using an improved CG14 reference genome, we identified an average of 8 Mb of new sequences and 5290 new contigs per individual. In total, 1.4 G of new sequences, consisting of 1 306 676 contigs, were assembled. We validated 97.7% of the contigs of the TOG5681 cultivar individual assembly from short reads on a newly long reads genome assembly of the same TOG5681 cultivar. FrangiPANe also allowed the anchoring of 31.5% of the new contigs within the CG14 reference genome, with a 92.5% accuracy at 2 kb span. We annotated in addition 3252 new genes absent from the reference. FrangiPANe was developed as a modular and interactive application to simplify the construction of a panreference using the map-then-assemble approach. It is available as a Docker image containing (i) a Jupyter notebook centralizing codes, documentation and interactive visualization of results, (ii) python scripts and (iii) all the software and libraries requested for each step of the analysis. We foreseen our approach will help leverage large-scale illumina dataset for pangenome studies in GWAS or detection of selection.}, }
@article {pmid36807539, year = {2022}, author = {Wang, ZF and Rouard, M and Droc, G and Heslop-Harrison, PJS and Ge, XJ}, title = {Genome assembly of Musa beccarii shows extensive chromosomal rearrangements and genome expansion during evolution of Musaceae genomes.}, journal = {GigaScience}, volume = {12}, number = {}, pages = {}, doi = {10.1093/gigascience/giad005}, pmid = {36807539}, issn = {2047-217X}, abstract = {BACKGROUND: Musa beccarii (Musaceae) is a banana species native to Borneo, sometimes grown as an ornamental plant. The basic chromosome number of Musa species is x = 7, 10, or 11; however, M. beccarii has a basic chromosome number of x = 9 (2n = 2x = 18), which is the same basic chromosome number of species in the sister genera Ensete and Musella. Musa beccarii is in the section Callimusa, which is sister to the section Musa. We generated a high-quality chromosome-scale genome assembly of M. beccarii to better understand the evolution and diversity of genomes within the family Musaceae.
FINDINGS: The M. beccarii genome was assembled by long-read and Hi-C sequencing, and genes were annotated using both long Iso-seq and short RNA-seq reads. The size of M. beccarii was the largest among all known Musaceae assemblies (∼570 Mbp) due to the expansion of transposable elements and increased 45S ribosomal DNA sites. By synteny analysis, we detected extensive genome-wide chromosome fusions and fissions between M. beccarii and the other Musa and Ensete species, far beyond those expected from differences in chromosome number. Within Musaceae, M. beccarii showed a reduced number of terpenoid synthase genes, which are related to chemical defense, and enrichment in lipid metabolism genes linked to the physical defense of the cell wall. Furthermore, type III polyketide synthase was the most abundant biosynthetic gene cluster (BGC) in M. beccarii. BGCs were not conserved in Musaceae genomes.
CONCLUSIONS: The genome assembly of M. beccarii is the first chromosome-scale genome assembly in the Callimusa section in Musa, which provides an important genetic resource that aids our understanding of the evolution of Musaceae genomes and enhances our knowledge of the pangenome.}, }
@article {pmid36797493, year = {2023}, author = {Rautiainen, M and Nurk, S and Walenz, BP and Logsdon, GA and Porubsky, D and Rhie, A and Eichler, EE and Phillippy, AM and Koren, S}, title = {Telomere-to-telomere assembly of diploid chromosomes with Verkko.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {36797493}, issn = {1546-1696}, abstract = {The Telomere-to-Telomere consortium recently assembled the first truly complete sequence of a human genome. To resolve the most complex repeats, this project relied on manual integration of ultra-long Oxford Nanopore sequencing reads with a high-resolution assembly graph built from long, accurate PacBio high-fidelity reads. We have improved and automated this strategy in Verkko, an iterative, graph-based pipeline for assembling complete, diploid genomes. Verkko begins with a multiplex de Bruijn graph built from long, accurate reads and progressively simplifies this graph by integrating ultra-long reads and haplotype-specific markers. The result is a phased, diploid assembly of both haplotypes, with many chromosomes automatically assembled from telomere to telomere. Running Verkko on the HG002 human genome resulted in 20 of 46 diploid chromosomes assembled without gaps at 99.9997% accuracy. The complete assembly of diploid genomes is a critical step towards the construction of comprehensive pangenome databases and chromosome-scale comparative genomics.}, }
@article {pmid36795789, year = {2023}, author = {Mohamed, F and Ruiz Rodriguez, LG and Zorzoli, A and Dorfmueller, HC and Raya, RR and Mozzi, F}, title = {Genomic diversity in Fructobacillus spp. isolated from fructose-rich niches.}, journal = {PloS one}, volume = {18}, number = {2}, pages = {e0281839}, doi = {10.1371/journal.pone.0281839}, pmid = {36795789}, issn = {1932-6203}, abstract = {The Fructobacillus genus is a group of obligately fructophilic lactic acid bacteria (FLAB) that requires the use of fructose or another electron acceptor for their growth. In this work, we performed a comparative genomic analysis within the genus Fructobacillus by using 24 available genomes to evaluate genomic and metabolic differences among these organisms. In the genome of these strains, which varies between 1.15- and 1.75-Mbp, nineteen intact prophage regions, and seven complete CRISPR-Cas type II systems were found. Phylogenetic analyses located the studied genomes in two different clades. A pangenome analysis and a functional classification of their genes revealed that genomes of the first clade presented fewer genes involved in the synthesis of amino acids and other nitrogen compounds. Moreover, the presence of genes strictly related to the use of fructose and electron acceptors was variable within the genus, although these variations were not always related to the phylogeny.}, }
@article {pmid36794816, year = {2023}, author = {Derrien, M and Mikulic, N and Uyoga, MA and Chenoll, E and Climent, E and Howard-Varona, A and Nyilima, S and Stoffel, NU and Karanja, S and Kottler, R and Stahl, B and Zimmermann, MB and Bourdet-Sicard, R}, title = {Gut microbiome function and composition in infants from rural Kenya and association with human milk oligosaccharides.}, journal = {Gut microbes}, volume = {15}, number = {1}, pages = {2178793}, doi = {10.1080/19490976.2023.2178793}, pmid = {36794816}, issn = {1949-0984}, abstract = {The gut microbiota evolves rapidly after birth, responding dynamically to environmental factors and playing a key role in short- and long-term health. Lifestyle and rurality have been shown to contribute to differences in the gut microbiome, including Bifidobacterium levels, between infants. We studied the composition, function and variability of the gut microbiomes of 6- to 11-month-old Kenyan infants (n = 105). Shotgun metagenomics showed Bifidobacterium longum to be the dominant species. A pangenomic analysis of B. longum in gut metagenomes revealed a high prevalence of B. longum subsp. infantis (B. infantis) in Kenyan infants (80%), and possible co-existence of this subspecies with B. longum subsp. longum. Stratification of the gut microbiome into community (GMC) types revealed differences in composition and functional features. GMC types with a higher prevalence of B. infantis and abundance of B. breve also had a lower pH and a lower abundance of genes encoding pathogenic features. An analysis of human milk oligosaccharides (HMOs) classified the human milk (HM) samples into four groups defined on the basis of secretor and Lewis polymorphisms revealed a higher prevalence of HM group III (Se+, Le-) (22%) than in most previously studied populations, with an enrichment in 2'-fucosyllactose. Our results show that the gut microbiome of partially breastfed Kenyan infants over the age of six months is enriched in bacteria from the Bifidobacterium community, including B. infantis, and that the high prevalence of a specific HM group may indicate a specific HMO-gut microbiome association. This study sheds light on gut microbiome variation in an understudied population with limited exposure to modern microbiome-altering factors.}, }
@article {pmid36792708, year = {2023}, author = {Fudge, JB}, title = {Capturing haplotype variation in populations using pangenome references.}, journal = {Nature biotechnology}, volume = {41}, number = {2}, pages = {194}, doi = {10.1038/s41587-023-01691-1}, pmid = {36792708}, issn = {1546-1696}, }
@article {pmid36792019, year = {2023}, author = {Lekired, A and Cherif-Silini, H and Silini, A and Yahia, HB and Ouzari, IH}, title = {Comparative genomics reveals the acquisition of mobile genetic elements by the plant growth-promoting Pantoea eucrina OB49 in polluted environments.}, journal = {Genomics}, volume = {}, number = {}, pages = {110579}, doi = {10.1016/j.ygeno.2023.110579}, pmid = {36792019}, issn = {1089-8646}, abstract = {Heavy metal-tolerant plant growth-promoting bacteria (PGPB) have gained popularity in bioremediation in recent years. A genome-assisted study of a heavy metal-tolerant PGPB Pantoea eucrina OB49 isolated from the rhizosphere of wheat grown on a heavy metal-contaminated site is presented. Comparative pan-genome analysis indicated that OB49 acquired heavy metal resistance genes through horizontal gene transfer. On contigs S10 and S12, OB49 has two arsRBCH operons that give arsenic resistance. On the S12 contig, an arsRBCH operon was discovered in conjunction with the merRTPCADE operon, which provides mercury resistance. P. eucrina OB49 may be involved in an ecological alternative for heavy metal remediation and growth promotion of wheat grown in metal-polluted soils. Our results suggested the detection of mobile genetic elements that harbour the ars operon and the fluoride resistance genes adjacent to the mer operon.}, }
@article {pmid36781662, year = {2023}, author = {Thomas, WJW and Zhang, Y and Amas, JC and Cantila, AY and Zandberg, JD and Harvie, SL and Batley, J}, title = {Innovative Advances in Plant Genotyping.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2638}, number = {}, pages = {451-465}, pmid = {36781662}, issn = {1940-6029}, abstract = {Over the past decade, advances in plant genotyping have been critical in enabling the identification of genetic diversity, in understanding evolution, and in dissecting important traits in both crops and native plants. The widespread popularity of single-nucleotide polymorphisms (SNPs) has prompted significant improvements to SNP-based genotyping, including SNP arrays, genotyping by sequencing, and whole-genome resequencing. More recent approaches, including genotyping structural variants, utilizing pangenomes to capture species-wide genetic diversity and exploiting machine learning to analyze genotypic data sets, are pushing the boundaries of what plant genotyping can offer. In this chapter, we highlight these innovations and discuss how they will accelerate and advance future genotyping efforts.}, }
@article {pmid36778393, year = {2023}, author = {Bonnie, JK and Ahmed, O and Langmead, B}, title = {DandD: efficient measurement of sequence growth and similarity.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.02.02.526837}, pmid = {36778393}, abstract = {Genome assembly databases are growing rapidly. The sequence content in each new assembly can be largely redundant with previous ones, but this is neither conceptually nor algorithmically easy to measure. We propose new methods and a new tool called DandD that addresses the question of how much new sequence is gained when a sequence collection grows. DandD can describe how much human structural variation is being discovered in each new human genome assembly and when discoveries will level off in the future. DandD uses a measure called δ ("delta"), developed initially for data compression. Computing δ directly requires counting k-mers, but DandD can rapidly estimate it using genomic sketches. We also propose δ as an alternative to k-mer-specific cardinalities when computing the Jaccard coefficient, avoiding the pitfalls of a poor choice of k. We demonstrate the utility of DandD's functions for estimating δ, characterizing the rate of pangenome growth, and computing allpairs similarities using k-independent Jaccard. DandD is open source software available at: https://github.com/jessicabonnie/dandd .}, }
@article {pmid36777875, year = {2022}, author = {Grimplet, J}, title = {Genomic and Bioinformatic Resources for Perennial Fruit Species.}, journal = {Current genomics}, volume = {23}, number = {4}, pages = {217-233}, pmid = {36777875}, issn = {1389-2029}, abstract = {In the post-genomic era, data management and development of bioinformatic tools are critical for the adequate exploitation of genomics data. In this review, we address the actual situation for the subset of crops represented by the perennial fruit species. The agronomical singularity of these species compared to plant and crop model species provides significant challenges on the implementation of good practices generally not addressed in other species. Studies are usually performed over several years in non-controlled environments, usage of rootstock is common, and breeders heavily rely on vegetative propagation. A reference genome is now available for all the major species as well as many members of the economically important genera for breeding purposes. Development of pangenome for these species is beginning to gain momentum which will require a substantial effort in term of bioinformatic tool development. The available tools for genome annotation and functional analysis will also be presented.}, }
@article {pmid36764870, year = {2023}, author = {Dwivedi, SL and Heslop-Harrison, P and Spillane, C and McKeown, PC and Edwards, D and Goldman, I and Ortiz, R}, title = {Evolutionary dynamics and adaptive benefits of deleterious mutations in crop gene pools.}, journal = {Trends in plant science}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.tplants.2023.01.006}, pmid = {36764870}, issn = {1878-4372}, abstract = {Mutations with deleterious consequences in nature may be conditionally deleterious in crop plants. That is, while some genetic variants may reduce fitness under wild conditions and be subject to purifying selection, they can be under positive selection in domesticates. Such deleterious alleles can be plant breeding targets, particularly for complex traits. The difficulty of distinguishing favorable from unfavorable variants reduces the power of selection, while favorable trait variation and heterosis may be attributable to deleterious alleles. Here, we review the roles of deleterious mutations in crop breeding and discuss how they can be used as a new avenue for crop improvement with emerging genomic tools, including HapMaps and pangenome analysis, aiding the identification, removal, or exploitation of deleterious mutations.}, }
@article {pmid35072136, year = {2021}, author = {Rehm, HL and Page, AJH and Smith, L and Adams, JB and Alterovitz, G and Babb, LJ and Barkley, MP and Baudis, M and Beauvais, MJS and Beck, T and Beckmann, JS and Beltran, S and Bernick, D and Bernier, A and Bonfield, JK and Boughtwood, TF and Bourque, G and Bowers, SR and Brookes, AJ and Brudno, M and Brush, MH and Bujold, D and Burdett, T and Buske, OJ and Cabili, MN and Cameron, DL and Carroll, RJ and Casas-Silva, E and Chakravarty, D and Chaudhari, BP and Chen, SH and Cherry, JM and Chung, J and Cline, M and Clissold, HL and Cook-Deegan, RM and Courtot, M and Cunningham, F and Cupak, M and Davies, RM and Denisko, D and Doerr, MJ and Dolman, LI and Dove, ES and Dursi, LJ and Dyke, SOM and Eddy, JA and Eilbeck, K and Ellrott, KP and Fairley, S and Fakhro, KA and Firth, HV and Fitzsimons, MS and Fiume, M and Flicek, P and Fore, IM and Freeberg, MA and Freimuth, RR and Fromont, LA and Fuerth, J and Gaff, CL and Gan, W and Ghanaim, EM and Glazer, D and Green, RC and Griffith, M and Griffith, OL and Grossman, RL and Groza, T and Auvil, JMG and Guigó, R and Gupta, D and Haendel, MA and Hamosh, A and Hansen, DP and Hart, RK and Hartley, DM and Haussler, D and Hendricks-Sturrup, RM and Ho, CWL and Hobb, AE and Hoffman, MM and Hofmann, OM and Holub, P and Hsu, JS and Hubaux, JP and Hunt, SE and Husami, A and Jacobsen, JO and Jamuar, SS and Janes, EL and Jeanson, F and Jené, A and Johns, AL and Joly, Y and Jones, SJM and Kanitz, A and Kato, K and Keane, TM and Kekesi-Lafrance, K and Kelleher, J and Kerry, G and Khor, SS and Knoppers, BM and Konopko, MA and Kosaki, K and Kuba, M and Lawson, J and Leinonen, R and Li, S and Lin, MF and Linden, M and Liu, X and Udara Liyanage, I and Lopez, J and Lucassen, AM and Lukowski, M and Mann, AL and Marshall, J and Mattioni, M and Metke-Jimenez, A and Middleton, A and Milne, RJ and Molnár-Gábor, F and Mulder, N and Munoz-Torres, MC and Nag, R and Nakagawa, H and Nasir, J and Navarro, A and Nelson, TH and Niewielska, A and Nisselle, A and Niu, J and Nyrönen, TH and O'Connor, BD and Oesterle, S and Ogishima, S and Wang, VO and Paglione, LAD and Palumbo, E and Parkinson, HE and Philippakis, AA and Pizarro, AD and Prlic, A and Rambla, J and Rendon, A and Rider, RA and Robinson, PN and Rodarmer, KW and Rodriguez, LL and Rubin, AF and Rueda, M and Rushton, GA and Ryan, RS and Saunders, GI and Schuilenburg, H and Schwede, T and Scollen, S and Senf, A and Sheffield, NC and Skantharajah, N and Smith, AV and Sofia, HJ and Spalding, D and Spurdle, AB and Stark, Z and Stein, LD and Suematsu, M and Tan, P and Tedds, JA and Thomson, AA and Thorogood, A and Tickle, TL and Tokunaga, K and Törnroos, J and Torrents, D and Upchurch, S and Valencia, A and Guimera, RV and Vamathevan, J and Varma, S and Vears, DF and Viner, C and Voisin, C and Wagner, AH and Wallace, SE and Walsh, BP and Williams, MS and Winkler, EC and Wold, BJ and Wood, GM and Woolley, JP and Yamasaki, C and Yates, AD and Yung, CK and Zass, LJ and Zaytseva, K and Zhang, J and Goodhand, P and North, K and Birney, E}, title = {GA4GH: International policies and standards for data sharing across genomic research and healthcare.}, journal = {Cell genomics}, volume = {1}, number = {2}, pages = {}, pmid = {35072136}, issn = {2666-979X}, support = {OT3 HL142478/HL/NHLBI NIH HHS/United States ; U13 CA221044/CA/NCI NIH HHS/United States ; 75N91019D00024/CA/NCI NIH HHS/United States ; U54 HG006542/HG/NHGRI NIH HHS/United States ; U41 HG006834/HG/NHGRI NIH HHS/United States ; R00 HG010157/HG/NHGRI NIH HHS/United States ; U24 HG011025/HG/NHGRI NIH HHS/United States ; R24 OD011883/OD/NIH HHS/United States ; U01 CA242954/CA/NCI NIH HHS/United States ; 220544/WT_/Wellcome Trust/United Kingdom ; HHSN261200800001E/CA/NCI NIH HHS/United States ; UM1 HG009443/HG/NHGRI NIH HHS/United States ; U24 HG006941/HG/NHGRI NIH HHS/United States ; R35 HG011949/HG/NHGRI NIH HHS/United States ; RM1 HG010461/HG/NHGRI NIH HHS/United States ; U2C OD023196/OD/NIH HHS/United States ; K99 HG010157/HG/NHGRI NIH HHS/United States ; MC_PC_19024/MRC_/Medical Research Council/United Kingdom ; U24 CA231877/CA/NCI NIH HHS/United States ; R35 GM128636/GM/NIGMS NIH HHS/United States ; 206194/WT_/Wellcome Trust/United Kingdom ; U24 HG010262/HG/NHGRI NIH HHS/United States ; R00 HG007940/HG/NHGRI NIH HHS/United States ; HHSN261201400008C/CA/NCI NIH HHS/United States ; U24 TR002306/TR/NCATS NIH HHS/United States ; 108749/WT_/Wellcome Trust/United Kingdom ; HHSN261201500003I/CA/NCI NIH HHS/United States ; U24 CA237719/CA/NCI NIH HHS/United States ; U54 HG007990/HG/NHGRI NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; R35 HG011899/HG/NHGRI NIH HHS/United States ; HHSN261200800001C/RC/CCR NIH HHS/United States ; MR/S003703/1/MRC_/Medical Research Council/United Kingdom ; R01 CA237118/CA/NCI NIH HHS/United States ; RM1 HG010860/HG/NHGRI NIH HHS/United States ; 201535/WT_/Wellcome Trust/United Kingdom ; U41 HG006627/HG/NHGRI NIH HHS/United States ; }, abstract = {The Global Alliance for Genomics and Health (GA4GH) aims to accelerate biomedical advances by enabling the responsible sharing of clinical and genomic data through both harmonized data aggregation and federated approaches. The decreasing cost of genomic sequencing (along with other genome-wide molecular assays) and increasing evidence of its clinical utility will soon drive the generation of sequence data from tens of millions of humans, with increasing levels of diversity. In this perspective, we present the GA4GH strategies for addressing the major challenges of this data revolution. We describe the GA4GH organization, which is fueled by the development efforts of eight Work Streams and informed by the needs of 24 Driver Projects and other key stakeholders. We present the GA4GH suite of secure, interoperable technical standards and policy frameworks and review the current status of standards, their relevance to key domains of research and clinical care, and future plans of GA4GH. Broad international participation in building, adopting, and deploying GA4GH standards and frameworks will catalyze an unprecedented effort in data sharing that will be critical to advancing genomic medicine and ensuring that all populations can access its benefits.}, }
@article {pmid36760124, year = {2023}, author = {Jin, S and Han, Z and Hu, Y and Si, Z and Dai, F and He, L and Cheng, Y and Li, Y and Zhao, T and Fang, L and Zhang, T}, title = {Structural variation (SV)-based pan-genome and GWAS reveal the impacts of SVs on the speciation and diversification of allotetraploid cottons.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2023.02.004}, pmid = {36760124}, issn = {1752-9867}, abstract = {Structural variations (SVs) have long been described as involved in the origin, adaption, and domestication of species. However, the genetic and genomic mechanisms of that involvement are poorly understood. Here, we assembled a high-quality genome of Gossypium barbadense acc. Tanguis, a landrace which is closely connected to the formation of extra-long‒staple (ELS) cultivated cotton. A SV-based pan-genome (Pan-SV) was constructed using a total of 182,593 non-redundant SVs, including 2,236 inversions, 97,398 insertions, and 82,959 deletions from 11 assembled genomes of allopolyploid cotton. The utility of this Pan-SV was then demonstrated through population structure analysis and genome-wide association studies (GWAS). Using segregation mapping populations produced through crossing ELS cotton and the landrace along with a SV-based GWAS, certain SVs responsible for speciation, domestication, and improvement in tetraploid cottons were identified. Importantly, some of the SVs presently identified as relating to yield and fiber quality improvement had not been identified in previous SNP-based GWAS. In particular, a 9-bp indel was found to associate with elimination of the interspecific reproductive isolation between G. hirsutum and G. barbadense. This study provides insights into genome-wide, gene-scale SVs linked to important agronomic traits in a major crop species and highlights the importance of SVs during the speciation, domestication, and improvement of cultivated crop species.}, }
@article {pmid36753700, year = {2023}, author = {Tanwar, AS and Shruptha, P and Paul, B and Murali, TS and Brand, A and Satyamoorthy, K}, title = {How Can Omics Inform Diabetic Foot Ulcer Clinical Management? A Whole Genome Comparison of Four Clinical Strains of Staphylococcus aureus.}, journal = {Omics : a journal of integrative biology}, volume = {}, number = {}, pages = {}, doi = {10.1089/omi.2022.0184}, pmid = {36753700}, issn = {1557-8100}, abstract = {Foot ulcers and associated infections significantly contribute to morbidity and mortality in diabetes. While diverse pathogens are found in the diabetes-related infected ulcers, Staphylococcus aureus remains one of the most virulent and widely prevalent pathogens. The high prevalence of S. aureus in chronic wound infections, especially in clinical settings, is attributed to its ability to evolve and acquire resistance against common antibiotics and to elicit an array of virulence factors. In this study, whole genome comparison of four strains of S. aureus (MUF168, MUF256, MUM270, and MUM475) isolated from diabetic foot ulcer (DFU) infections showing varying resistance patterns was carried out to study the genomic similarity, antibiotic resistance profiling, associated virulence factors, and sequence variations in drug targets. The comparative genome analysis showed strains MUM475 and MUM270 to be highly resistant, MUF256 with moderate levels of resistance, and MUF168 to be the least resistant. Strain MUF256 and MUM475 harbored more virulence factors compared with other two strains. Deleterious sequence variants were observed suggesting potential role in altering drug targets and drug efficacy. This comparative whole genome study offers new molecular insights that may potentially inform evidence-based diagnosis and treatment of DFUs in the clinic.}, }
@article {pmid36753463, year = {2023}, author = {Hulin, MT and Hill, L and Jones, JDG and Ma, W}, title = {Pangenomic analysis reveals plant NAD[+] manipulation as an important virulence activity of bacterial pathogen effectors.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {7}, pages = {e2217114120}, doi = {10.1073/pnas.2217114120}, pmid = {36753463}, issn = {1091-6490}, abstract = {Nicotinamide adenine dinucleotide (NAD[+]) has emerged as a key component in prokaryotic and eukaryotic immune systems. The recent discovery that Toll/interleukin-1 receptor (TIR) proteins function as NAD[+] hydrolases (NADase) links NAD[+]-derived small molecules with immune signaling. We investigated pathogen manipulation of host NAD[+] metabolism as a virulence strategy. Using the pangenome of the model bacterial pathogen Pseudomonas syringae, we conducted a structure-based similarity search from 35,000 orthogroups for type III effectors (T3Es) with potential NADase activity. Thirteen T3Es, including five newly identified candidates, were identified that possess domain(s) characteristic of seven NAD[+]-hydrolyzing enzyme families. Most Pseudomonas syringae strains that depend on the type III secretion system to cause disease, encode at least one NAD[+]-manipulating T3E, and many have several. We experimentally confirmed the type III-dependent secretion of a novel T3E, named HopBY, which shows structural similarity to both TIR and adenosine diphosphate ribose (ADPR) cyclase. Homologs of HopBY were predicted to be type VI effectors in diverse bacterial species, indicating potential recruitment of this activity by microbial proteins secreted during various interspecies interactions. HopBY efficiently hydrolyzes NAD[+] and specifically produces 2'cADPR, which can also be produced by TIR immune receptors of plants and by other bacteria. Intriguingly, this effector promoted bacterial virulence, indicating that 2'cADPR may not be the signaling molecule that directly initiates immunity. This study highlights a host-pathogen battleground centered around NAD[+] metabolism and provides insight into the NAD[+]-derived molecules involved in plant immunity.}, }
@article {pmid36749783, year = {2023}, author = {Jirakkakul, J and Khoiri, AN and Duangfoo, T and Dulsawat, S and Sutheeworapong, S and Petsong, K and Wattanachaisaereekul, S and Paenkaew, P and Tachaleat, A and Cheevadhanarak, S and Prommeenate, P}, title = {Insights into the genome of Methylobacterium sp. NMS14P, a novel bacterium for growth promotion of maize, chili, and sugarcane.}, journal = {PloS one}, volume = {18}, number = {2}, pages = {e0281505}, doi = {10.1371/journal.pone.0281505}, pmid = {36749783}, issn = {1932-6203}, abstract = {A novel methylotrophic bacterium designated as NMS14P was isolated from the root of an organic coffee plant (Coffea arabica) in Thailand. The 16S rRNA sequence analysis revealed that this new isolate belongs to the genus Methylobacterium, and its novelty was clarified by genomic and comparative genomic analyses, in which NMS14P exhibited low levels of relatedness with other Methylobacterium-type strains. NMS14P genome consists of a 6,268,579 bp chromosome, accompanied by a 542,519 bp megaplasmid and a 66,590 bp plasmid, namely pNMS14P1 and pNMS14P2, respectively. Several genes conferring plant growth promotion are aggregated on both chromosome and plasmids, including phosphate solubilization, indole-3-acetic acid (IAA) biosynthesis, cytokinins (CKs) production, 1-aminocyclopropane-1-carboxylate (ACC) deaminase activity, sulfur-oxidizing activity, trehalose synthesis, and urea metabolism. Furthermore, pangenome analysis showed that NMS14P possessed the highest number of strain-specific genes accounting for 1408 genes, particularly those that are essential for colonization and survival in a wide array of host environments, such as ABC transporter, chemotaxis, quorum sensing, biofilm formation, and biosynthesis of secondary metabolites. In vivo tests have supported that NMS14P significantly promoted the growth and development of maize, chili, and sugarcane. Collectively, NMS14P is proposed as a novel plant growth-promoting Methylobacterium that could potentially be applied to a broad range of host plants as Methylobacterium-based biofertilizers to reduce and ultimately substitute the use of synthetic agrochemicals for sustainable agriculture.}, }
@article {pmid36748949, year = {2023}, author = {Reddy, TS and Zomer, R and Mantri, N}, title = {Nanoformulations as a strategy to overcome the delivery limitations of cannabinoids.}, journal = {Phytotherapy research : PTR}, volume = {}, number = {}, pages = {}, doi = {10.1002/ptr.7742}, pmid = {36748949}, issn = {1099-1573}, abstract = {Medical cannabis has received significant interest in recent years due to its promising benefits in the management of pain, anxiety, depression and neurological and movement disorders. Specifically, the major phytocannabinoids derived from the cannabis plant such as (-) trans-Δ[9] -tetrahydrocannabinol (THC) and cannabidiol (CBD), have been shown to be responsible for the pharmacological and therapeutic properties. Recently, these phytocannabinoids have also attracted special attention in cancer treatment due to their well-known palliative benefits in chemotherapy-induced nausea, vomiting, pain and loss of appetite along with their anticancer activities. Despite the enormous pharmacological benefits, the low aqueous solubility, high instability (susceptibility to extensive first pass metabolism) and poor systemic bioavailability restrict their utilization at clinical perspective. Therefore, drug delivery strategies based on nanotechnology are emerging to improve pharmacokinetic profile and bioavailability of cannabinoids as well as enhance their targeted delivery. Here, we critically review the nano-formulation systems engineered for overcoming the delivery limitations of native phytocannabinoids including polymeric and lipid-based nanoparticles (lipid nano capsules (LNCs), nanostructured lipid carriers (NLCs), nanoemulsions (NE) and self-emulsifying drug delivery systems (SEDDS)), ethosomes and cyclodextrins as well as their therapeutic applications.}, }
@article {pmid36748707, year = {2022}, author = {Worden, PJ and Bogema, DR and Micallef, ML and Go, J and Deutscher, AT and Labbate, M and Green, TJ and King, WL and Liu, M and Seymour, JR and Jenkins, C}, title = {Phylogenomic diversity of Vibrio species and other Gammaproteobacteria isolated from Pacific oysters (Crassostrea gigas) during a summer mortality outbreak.}, journal = {Microbial genomics}, volume = {8}, number = {12}, pages = {}, doi = {10.1099/mgen.0.000883}, pmid = {36748707}, issn = {2057-5858}, abstract = {The Pacific oyster (PO), Crassostrea gigas, is an important commercial marine species but periodically experiences large stock losses due to disease events known as summer mortality. Summer mortality has been linked to environmental perturbations and numerous viral and bacterial agents, indicating this disease is multifactorial in nature. In 2013 and 2014, several summer mortality events occurred within the Port Stephens estuary (NSW, Australia). Extensive culture and molecular-based investigations were undertaken and several potentially pathogenic Vibrio species were identified. To improve species identification and genomically characterise isolates obtained from this outbreak, whole-genome sequencing (WGS) and subsequent genomic analyses were performed on 48 bacterial isolates, as well as a further nine isolates from other summer mortality studies using the same batch of juveniles. Average nucleotide identity (ANI) identified most isolates to the species level and included members of the Photobacterium, Pseudoalteromonas, Shewanella and Vibrio genera, with Vibrio species making up more than two-thirds of all species identified. Construction of a phylogenomic tree, ANI analysis, and pan-genome analysis of the 57 isolates represents the most comprehensive culture-based phylogenomic survey of Vibrios during a PO summer mortality event in Australian waters and revealed large genomic diversity in many of the identified species. Our analysis revealed limited and inconsistent associations between isolate species and their geographical origins, or host health status. Together with ANI and pan-genome results, these inconsistencies suggest that to determine the role that microbes may have in Pacific oyster summer mortality events, isolate identification must be at the taxonomic level of strain. Our WGS data (specifically, the accessory genomes) differentiated bacterial strains, and coupled with associated metadata, highlight the possibility of predicting a strain's environmental niche and level of pathogenicity.}, }
@article {pmid36748586, year = {2023}, author = {Rai, A and Suresh, G and Ria, B and L, V and Pk, S and Ipsita, S and Sasikala, C and Venkata Ramana, C}, title = {Phylogenomic analysis of the genus Alcanivorax: proposal for division of this genus into the emended genus Alcanivorax and two novel genera Alloalcanivorax gen. nov. and Isoalcanivorax gen. nov.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {73}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.005672}, pmid = {36748586}, issn = {1466-5034}, abstract = {The members of the genus Alcanivorax are key players in the removal of petroleum hydrocarbons from polluted marine environments. More than half of the species were described in the last decade using 16S rRNA gene phylogeny and genomic-based metrics. However, the 16S rRNA gene identity (<94 %) between some members of the genus Alcanivorax suggested their imprecise taxonomic status. In this study, we examined the taxonomic positions of Alcanivorax species using 16S rRNA phylogeny and further validated them using phylogenomic-related indexes such as digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), average amino acid identity (AAI), percentage of conserved proteins (POCP) and comparative genomic studies. ANI and dDDH values confirmed that all the Alcanivorax species were well described at the species level. The phylotaxogenomic analysis showed that Alcanivorax species formed three clades. The inter-clade values of AAI and POCP were less than 70 %. The pan-genome evaluation depicted that the members shared 1223 core genes and its number increased drastically when analysed clade-wise. Therefore, these results necessitate the transfer of clade II and clade III members into Isoalcanivorax gen. nov. and Alloalcanivorax gen. nov., respectively, along with the emended description of the genus Alcanivorax sensu stricto.}, }
@article {pmid36748580, year = {2022}, author = {Wietz, M and López-Pérez, M and Sher, D and Biller, SJ and Rodriguez-Valera, F}, title = {Microbe Profile: Alteromonas macleodii - a widespread, fast-responding, 'interactive' marine bacterium.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {11}, pages = {}, doi = {10.1099/mic.0.001236}, pmid = {36748580}, issn = {1465-2080}, abstract = {Alteromonas macleodii is a marine heterotrophic bacterium with widespread distribution - from temperate to tropical oceans, and from surface to deep waters. Strains of A. macleodii exhibit considerable genomic and metabolic variability, and can grow rapidly on diverse organic compounds. A. macleodii is a model organism for the study of population genomics, physiological adaptations and microbial interactions, with individual genomes encoding diverse phenotypic traits influenced by recombination and horizontal gene transfer.}, }
@article {pmid36748558, year = {2022}, author = {Cummins, EA and Hall, RJ and Connor, C and McInerney, JO and McNally, A}, title = {Distinct evolutionary trajectories in the Escherichia coli pangenome occur within sequence types.}, journal = {Microbial genomics}, volume = {8}, number = {11}, pages = {}, doi = {10.1099/mgen.0.000903}, pmid = {36748558}, issn = {2057-5858}, }
@article {pmid36748494, year = {2022}, author = {Li, BB and Zhang, XJ and Wu, D and Zhang, DD and Fang, BZ and Liu, HC and Zhou, YG and Cai, M and Li, WJ and Nie, GX}, title = {Devosia ureilytica sp. nov., isolated from Kuche River in China.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {12}, pages = {}, doi = {10.1099/ijsem.0.005663}, pmid = {36748494}, issn = {1466-5034}, abstract = {Two novel strains, designated XJ19-45[T] and XJ19-1, were isolated from water of Kuche River in Xinjiang Uygur Autonomous Region, China. Their cells were Gram-stain-negative, aerobic and motile rods. The phylogenetic analyses based on 16S rRNA genes and genomes showed that the two isolates belonged to the genus Devosia and the closest relative was Devosia subaequoris HST3-14[T]. The 16S rRNA genes sequences pairwise similarities, average nucleotide identities, digital DNA-DNA hybridizations and average amino acid identities between type strain XJ19-45[T] and other relatives were all less than 98.3, 80.3, 23.6 and 85.7 %, respectively, all below the species delineation thresholds. Pan-genomic analysis indicated that the novel isolate XJ19-45[T] shared 1594 core gene clusters with the 11 closely related type strains in Devosia, and the number of strain-specific clusters was 390. The major cellular fatty acids (>10 %) of the two isolates were summed feature 8, C18 : 1 ω7c 11-methyl and C16 : 0. Diphosphatidylglycerol, phosphatidylglycerol and glycolipids were the major polar lipids, and Q10 was the detected respiratory quinone. Based on the results of phenotypic, physiological, chemotaxonomic and genotypic characterizations, we propose that the isolates represent a novel species, for which the name Devosia ureilytica sp. nov. is proposed. The type strain is XJ19-45[T] (=CGMCC 1.19388[T]=KCTC 92263[T]).}, }
@article {pmid36747706, year = {2023}, author = {Hoover, RL and Keffer, JL and Polson, SW and Chan, CS}, title = {Gallionellaceae pangenomic analysis reveals insight into phylogeny, metabolic flexibility, and iron oxidation mechanisms.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.01.26.525709}, pmid = {36747706}, abstract = {UNLABELLED: The iron-oxidizing Gallionellaceae drive a wide variety of biogeochemical cycles through their metabolisms and biominerals. To better understand the environmental impacts of Gallionellaceae, we need to improve our knowledge of their diversity and metabolisms, especially any novel iron oxidation mechanisms. Here, we used a pangenomic analysis of 103 genomes to resolve Gallionellaceae phylogeny and explore the range of genomic potential. Using a concatenated ribosomal protein tree and key gene patterns, we determined Gallionellaceae has four genera, divided into two groupsâ€"iron-oxidizing bacteria (FeOB) Gallionella , Sideroxydans , and Ferriphaselus with known iron oxidases (Cyc2, MtoA) and nitrite-oxidizing bacteria (NOB) Candidatus Nitrotoga with nitrite oxidase (Nxr). The FeOB and NOB have similar electron transport chains, including genes for reverse electron transport and carbon fixation. Auxiliary energy metabolisms including S oxidation, denitrification, and organotrophy were scattered throughout the Gallionellaceae FeOB. Within FeOB, we found genes that may represent adaptations for iron oxidation, including a variety of extracellular electron uptake (EEU) mechanisms. FeOB genomes encoded more predicted c -type cytochromes overall, notably more multiheme c -type cytochromes (MHCs) with >10 CXXCH motifs. These include homologs of several predicted outer membrane porin-MHC complexes, including MtoAB and Uet. MHCs are known to efficiently conduct electrons across longer distances and function across a wide range of redox potentials that overlap with mineral redox potentials, which can help expand the range of usable iron substrates. Overall, the results of pangenome analyses suggest that the Gallionellaceae genera Gallionella , Sideroxydans , and Ferriphaselus are primarily iron oxidizers, capable of oxidizing dissolved Fe [2+] as well as a range of solid iron or other mineral substrates.
IMPORTANCE: Neutrophilic iron-oxidizing bacteria (FeOB) produce copious iron (oxyhydr)oxides that can profoundly influence biogeochemical cycles, notably the fate of carbon and many metals. To fully understand environmental microbial iron oxidation, we need a thorough accounting of iron oxidation mechanisms. In this study we show the Gallionellaceae FeOB have both known iron oxidases as well as uncharacterized multiheme cytochromes (MHCs). MHCs are predicted to transfer electrons from extracellular substrates and likely confer metabolic capabilities that help Gallionellaceae occupy a range of different iron- and mineral-rich niches. Gallionellaceae appear to specialize in iron oxidation, so it makes sense that they would have multiple mechanisms to oxidize various forms of iron, given the many iron minerals on Earth, as well as the physiological and kinetic challenges faced by FeOB. The multiple iron/mineral oxidation mechanisms may help drive the widespread ecological success of Gallionellaceae.}, }
@article {pmid36747219, year = {2023}, author = {Chen, H and King, R and Smith, D and Bayon, C and Ashfield, T and Torriani, S and Kanyuka, K and Hammond-Kosack, K and Bieri, S and Rudd, J}, title = {Combined pangenomics and transcriptomics reveals core and redundant virulence processes in a rapidly evolving fungal plant pathogen.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {24}, pmid = {36747219}, issn = {1741-7007}, support = {BB/J/00426X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/C000I0250/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {BACKGROUND: Studying genomic variation in rapidly evolving pathogens potentially enables identification of genes supporting their "core biology", being present, functional and expressed by all strains or "flexible biology", varying between strains. Genes supporting flexible biology may be considered to be "accessory", whilst the "core" gene set is likely to be important for common features of a pathogen species biology, including virulence on all host genotypes. The wheat-pathogenic fungus Zymoseptoria tritici represents one of the most rapidly evolving threats to global food security and was the focus of this study.
RESULTS: We constructed a pangenome of 18 European field isolates, with 12 also subjected to RNAseq transcription profiling during infection. Combining this data, we predicted a "core" gene set comprising 9807 sequences which were (1) present in all isolates, (2) lacking inactivating polymorphisms and (3) expressed by all isolates. A large accessory genome, consisting of 45% of the total genes, was also defined. We classified genetic and genomic polymorphism at both chromosomal and individual gene scales. Proteins required for essential functions including virulence had lower-than average sequence variability amongst core genes. Both core and accessory genomes encoded many small, secreted candidate effector proteins that likely interact with plant immunity. Viral vector-mediated transient in planta overexpression of 88 candidates failed to identify any which induced leaf necrosis characteristic of disease. However, functional complementation of a non-pathogenic deletion mutant lacking five core genes demonstrated that full virulence was restored by re-introduction of the single gene exhibiting least sequence polymorphism and highest expression.
CONCLUSIONS: These data support the combined use of pangenomics and transcriptomics for defining genes which represent core, and potentially exploitable, weaknesses in rapidly evolving pathogens.}, }
@article {pmid36747211, year = {2023}, author = {Jia, Y and Xu, M and Hu, H and Chapman, B and Watt, C and Buerte, B and Han, N and Zhu, M and Bian, H and Li, C and Zeng, Z}, title = {Comparative gene retention analysis in barley, wild emmer, and bread wheat pangenome lines reveals factors affecting gene retention following gene duplication.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {25}, doi = {10.1186/s12915-022-01503-z}, pmid = {36747211}, issn = {1741-7007}, abstract = {BACKGROUND: Gene duplication is a prevalent phenomenon and a major driving force underlying genome evolution. The process leading to the fixation of gene duplicates following duplication is critical to understand how genome evolves but remains fragmentally understood. Most previous studies on gene retention are based on gene duplicate analyses in single reference genome. No population-based comparative gene retention analysis has been performed to date.
RESULTS: Taking advantage of recently published genomic data in Triticeae, we dissected a divergent homogentisate phytyltransferase (HPT2) lineage caught in the middle stage of gene fixation following duplication. The presence/absence of HPT2 in barley (diploid), wild emmer (tetraploid), and bread wheat (hexaploid) pangenome lines appears to be associated with gene dosage constraint and environmental adaption. Based on these observations, we adopted a phylogeny-based orthology inference approach and performed comparative gene retention analyses across barley, wild emmer, and bread wheat. This led to the identification of 326 HPT2-pattern-like genes at whole genome scale, representing a pool of gene duplicates in the middle stage of gene fixation. Majority of these HPT2-pattern-like genes were identified as small-scale duplicates, such as dispersed, tandem, and proximal duplications. Natural selection analyses showed that HPT2-pattern-like genes have experienced relaxed selection pressure, which is generally accompanied with partial positive selection and transcriptional divergence. Functional enrichment analyses showed that HPT2-pattern-like genes are over-represented with molecular-binding and defense response functions, supporting the potential role of environmental adaption during gene retention. We also observed that gene duplicates from larger gene family are more likely to be lost, implying a gene dosage constraint effect. Further comparative gene retention analysis in barley and bread wheat pangenome lines revealed combined effects of species-specific selection and gene dosage constraint.
CONCLUSIONS: Comparative gene retention analyses at the population level support gene dosage constraint, environmental adaption, and species-specific selection as three factors that may affect gene retention following gene duplication. Our findings shed light on the evolutionary process leading to the retention of newly formed gene duplicates and will greatly improve our understanding on genome evolution via duplication.}, }
@article {pmid36746216, year = {2023}, author = {Jeong, BR and Jang, J and Jin, E}, title = {Genome engineering via gene editing technologies in microalgae.}, journal = {Bioresource technology}, volume = {}, number = {}, pages = {128701}, doi = {10.1016/j.biortech.2023.128701}, pmid = {36746216}, issn = {1873-2976}, abstract = {CRISPR-Cas has revolutionized genetic modification with its comparative simplicity and accuracy, and it can be used even at the genomic level. Microalgae are excellent feedstocks for biofuels and nutraceuticals because they contain high levels of fatty acids, carotenoids, and other metabolites; however, genome engineering for microalgae is not yet as developed as for other model organisms. Microalgal engineering at the genetic and metabolic levels is relatively well established, and a few genomic resources are available. Their genomic information was used for a "safe harbor" site for stable transgene expression in microalgae. This review proposes further genome engineering schemes including the construction of sgRNA libraries, pan-genomic and epigenomic resources, and mini-genomes, which can together be developed into synthetic biology for carbon-based engineering in microalgae. Acetyl-CoA is at the center of carbon metabolic pathways and is further reviewed for the production of molecules including terpenoids in microalgae.}, }
@article {pmid36741902, year = {2022}, author = {Srinivas, K and Ghatak, S and Pyngrope, DA and Angappan, M and Milton, AAP and Das, S and Lyngdoh, V and Lamare, JP and Prasad, MCB and Sen, A}, title = {Avian strains of emerging pathogen Escherichia fergusonii are phylogenetically diverse and harbor the greatest AMR dissemination potential among different sources: Comparative genomic evidence.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1080677}, pmid = {36741902}, issn = {1664-302X}, abstract = {INTRODUCTION: Escherichia fergusonii is regarded as an emerging pathogen with zoonotic potential. In the current study, we undertook source-wise comparative genomic analyses (resistome, virulome, mobilome and pangenome) to understand the antimicrobial resistance, virulence, mobile genetic elements and phylogenetic diversity of E. fergusonii.
METHODS: Six E. fergusonii strains (5 multidrug resistant strains and 1 biofilm former) were isolated from poultry (duck faeces and retail chicken samples). Following confirmation by phenotypic and molecular methods, the isolates were further characterized and their genomes were sequenced. Comparative resisto-virulo-mobilome analyses and pangenomics were performed for E. fergusonii genomes, while including 125 other E. fergusonii genomes available from NCBI database.
RESULTS AND DISCUSSION: Avian and porcine strains of E. fergusonii were found to carry significantly higher number of antimicrobial resistance genes (p < 0.05) and mobile genetic elements (plasmids, transposons and integrons) (p < 0.05), while the pathogenic potential of bovine strains was significantly higher compared to other strains (p < 0.05). Pan-genome development trends indicated open pan-genome for all strains (0 < γ < 1). Genomic diversity of avian strains was found to be greater than that from other sources. Phylogenetic analysis revealed close clustering among isolates of similar isolation source and geographical location. Indian isolates of E. fergusonii clustered closely with those from Chinese and a singleton Australian isolate. Overall, being the first pangenomic study on E. fergusonii, our analysis provided important cues on genomic features of the emerging pathogen E. fergusonii while highlighting the potential role of avian strains in dissemination of AMR.}, }
@article {pmid36739346, year = {2023}, author = {Lanclos, VC and Rasmussen, AN and Kojima, CY and Cheng, C and Henson, MW and Faircloth, BC and Francis, CA and Thrash, JC}, title = {Ecophysiology and genomics of the brackish water adapted SAR11 subclade IIIa.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41396-023-01376-2}, pmid = {36739346}, issn = {1751-7370}, abstract = {The Order Pelagibacterales (SAR11) is the most abundant group of heterotrophic bacterioplankton in global oceans and comprises multiple subclades with unique spatiotemporal distributions. Subclade IIIa is the primary SAR11 group in brackish waters and shares a common ancestor with the dominant freshwater IIIb (LD12) subclade. Despite its dominance in brackish environments, subclade IIIa lacks systematic genomic or ecological studies. Here, we combine closed genomes from new IIIa isolates, new IIIa MAGS from San Francisco Bay (SFB), and 460 highly complete publicly available SAR11 genomes for the most comprehensive pangenomic study of subclade IIIa to date. Subclade IIIa represents a taxonomic family containing three genera (denoted as subgroups IIIa.1, IIIa.2, and IIIa.3) that had distinct ecological distributions related to salinity. The expansion of taxon selection within subclade IIIa also established previously noted metabolic differentiation in subclade IIIa compared to other SAR11 subclades such as glycine/serine prototrophy, mosaic glyoxylate shunt presence, and polyhydroxyalkanoate synthesis potential. Our analysis further shows metabolic flexibility among subgroups within IIIa. Additionally, we find that subclade IIIa.3 bridges the marine and freshwater clades based on its potential for compatible solute transport, iron utilization, and bicarbonate management potential. Pure culture experimentation validated differential salinity ranges in IIIa.1 and IIIa.3 and provided detailed IIIa cell size and volume data. This study is an important step forward for understanding the genomic, ecological, and physiological differentiation of subclade IIIa and the overall evolutionary history of SAR11.}, }
@article {pmid36728698, year = {2022}, author = {Saikia, J and Kotoky, R and Debnath, R and Kumar, N and Gogoi, P and Yadav, A and Saikia, R}, title = {De novogenomic analysis ofEnterobacter asburiaeEBRJ12, a plant growth-promoting rhizobacteria isolated from the rhizosphere of Phaseolus vulgarisL.}, journal = {Journal of applied microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/jambio/lxac090}, pmid = {36728698}, issn = {1365-2672}, abstract = {AIM: Environmental stresses such as water deficit induced stress are one of the major limiting factors in crop production. However, some plant growth-promoting rhizobacteria (PGPR) can promote plant growth in such adverse condition. Therefore, the objective was to isolate rhizospheric bacteria from Phaseolus vulgaris L. growing in a drought-affected soil and to analyze its plant growth promoting (PGP) efficacy to black gram (Vigna mungo L.) and Bhut jolokia (Capsicum chinense Jacq.). Whole-genome sequencing of the potential bacteria was targeted to analyze the genetic potential of the isolate as a plant growth-promoting agent.
METHODS AND RESULTS: The isolate Enterobacter asburiae EBRJ12 was selected based on its PGP efficacy, which significantly improved plant growth and development. The genomic analysis revealed the presence of one circular chromosome of size 4.8 Mb containing 16 genes for osmotic stress regulation including osmotically inducible protein osmY, outer membrane protein A precursor ompA, aquaporin Z, and an operon for osmoprotectant ABC transporter yehZYXW. Moreover, the genome has a complete genetic cluster for biosynthesis of siderophore Enterobactin and siderophore Aerobactin.The PGP effects were verified with black gram and Bhut jolokia in pot experiments. The isolate significantly increased the shoot length by 35.0% and root length by 58.0% of black gram, while 41.0% and 57.0% of elevation in shoot and root length were observed in Bhut jolokia compared to non-inoculated plants.
CONCLUSIONS: The EBRJ12 has PGP features that could improve the growth in host plants, and the genomic characterization revealed the presence of genetic potential for plant growth promotion.}, }
@article {pmid36726175, year = {2023}, author = {Petersen, C and Sørensen, T and Nielsen, MR and Sondergaard, TE and Sørensen, JL and Fitzpatrick, DA and Frisvad, JC and Nielsen, KL}, title = {Comparative genomic study of the Penicillium genus elucidates a diverse pangenome and 15 lateral gene transfer events.}, journal = {IMA fungus}, volume = {14}, number = {1}, pages = {3}, pmid = {36726175}, issn = {2210-6340}, abstract = {The Penicillia are known to produce a wide range natural products-some with devastating outcome for the agricultural industry and others with unexploited potential in different applications. However, a large-scale overview of the biosynthetic potential of different species has been lacking. In this study, we sequenced 93 Penicillium isolates and, together with eleven published genomes that hold similar assembly characteristics, we established a species phylogeny as well as defining a Penicillium pangenome. A total of 5612 genes were shared between ≥ 98 isolates corresponding to approximately half of the average number of genes a Penicillium genome holds. We further identified 15 lateral gene transfer events that have occurred in this collection of Penicillium isolates, which might have played an important role, such as niche adaption, in the evolution of these fungi. The comprehensive characterization of the genomic diversity in the Penicillium genus supersedes single-reference genomes, which do not necessarily capture the entire genetic variation.}, }
@article {pmid36718535, year = {2023}, author = {Lu, Y and Luo, J and An, E and Lu, B and Wei, Y and Chen, X and Lu, K and Liang, S and Hu, H and Han, M and He, S and Shen, J and Guo, D and Bu, N and Yang, L and Xu, W and Lu, C and Xiang, Z and Tong, X and Dai, F}, title = {Deciphering the genetic basis of silkworm cocoon colors provides new insights into biological coloration and phenotypic diversification.}, journal = {Molecular biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/molbev/msad017}, pmid = {36718535}, issn = {1537-1719}, abstract = {The genetic basis of phenotypic variation is a long-standing concern of evolutionary biology. Coloration has proven to be a visual, easily quantifiable, and highly tractable system for genetic analysis and is an ever-evolving focus of biological research. Compared with the homogenized brown-yellow cocoons of wild silkworms, the cocoons of domestic silkworms are spectacularly diverse in color, such as white, green, and yellow-red; this provides an outstanding model for exploring the phenotypic diversification and biological coloration. Herein, the molecular mechanism underlying silkworm green cocoon formation was investigated, which was not fully understood. We demonstrated that five of the seven members of a sugar transporter gene cluster were specifically duplicated in the Bombycidae and evolved new spatial expression patterns predominantly expressed in silk glands, accompanying complementary temporal expression; they synergistically facilitate the uptake of flavonoids, thus determining the green cocoon. Subsequently, polymorphic cocoon coloring landscape involving multiple loci and the evolution of cocoon color from wild to domestic silkworms were analyzed based on the pan-genome sequencing data. It was found that cocoon coloration involved epistatic interaction between loci; all the identified cocoon color-related loci existed in wild silkworms; the genetic segregation, recombination, and variation of these loci shaped the multi-colored cocoons of domestic silkworms. This study revealed a new mechanism for flavonoids-based biological coloration that highlights the crucial role of gene duplication followed by functional diversification in acquiring new genetic functions; furthermore, the results in this work provide insight into phenotypic innovation during domestication.}, }
@article {pmid36707768, year = {2023}, author = {Sun, Y and Xiao, W and Wang, QN and Wang, J and Kong, XD and Ma, WH and Liu, SX and Ren, P and Xu, LN and Zhang, YJ}, title = {Multiple variation patterns of terpene synthases in 26 maize genomes.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {46}, pmid = {36707768}, issn = {1471-2164}, abstract = {Terpenoids are important compounds associated with the pest and herbivore resistance mechanisms of plants; consequently, it is essential to identify and explore terpene synthase (TPS) genes in maize. In the present study, we identified 31 TPS genes based on a pan-genome of 26 high-quality maize genomes containing 20 core genes (present in all 26 lines), seven dispensable genes (present in 2 to 23 lines), three near-core genes (present in 24 to 25 lines), and one private gene (present in only 1 line). Evaluation of ka/ks values of TPS in 26 varieties revealed that TPS25 was subjected to positive selection in some varieties. Six ZmTPS had ka/ks values less than 1, indicating that they were subjected to purifying selection. In 26 genomes, significant differences were observed in ZmTPS25 expression between genes affected by structural variation (SV) and those not affected by SV. In some varieties, SV altered the conserved structural domains resulting in a considerable number of atypical genes. The analysis of RNA-seq data of maize Ostrinia furnacalis feeding revealed 10 differentially expressed ZmTPS, 9 of which were core genes. However, many atypical genes for these responsive genes were identified in several genomes. These findings provide a novel resource for functional studies of ZmTPS.}, }
@article {pmid36706753, year = {2023}, author = {Younginger, BS and Mayba, O and Reeder, J and Nagarkar, DR and Modrusan, Z and Albert, ML and Byrd, AL}, title = {Enrichment of oral-derived bacteria in inflamed colorectal tumors and distinct associations of Fusobacterium in the mesenchymal subtype.}, journal = {Cell reports. Medicine}, volume = {}, number = {}, pages = {100920}, doi = {10.1016/j.xcrm.2023.100920}, pmid = {36706753}, issn = {2666-3791}, abstract = {While the association between colorectal cancer (CRC) features and Fusobacterium has been extensively studied, less is known of other intratumoral bacteria. Here, we leverage whole transcriptomes from 807 CRC samples to dually characterize tumor gene expression and 74 intratumoral bacteria. Seventeen of these species, including 4 Fusobacterium spp., are classified as orally derived and are enriched among right-sided, microsatellite instability-high (MSI-H), and BRAF-mutant tumors. Across consensus molecular subtypes (CMSs), integration of Fusobacterium animalis (Fa) presence and tumor expression reveals that Fa has the most significant associations in mesenchymal CMS4 tumors despite a lower prevalence than in immune CMS1. Within CMS4, the prevalence of Fa is uniquely associated with collagen- and immune-related pathways. Additional Fa pangenome analysis reveals that stress response genes and the adhesion FadA are commonly expressed intratumorally. Overall, this study identifies oral-derived bacteria as enriched in inflamed tumors, and the associations of bacteria and tumor expression are context and species specific.}, }
@article {pmid36703158, year = {2023}, author = {Wang, J and Yang, W and Zhang, S and Hu, H and Yuan, Y and Dong, J and Chen, L and Ma, Y and Yang, T and Zhou, L and Chen, J and Liu, B and Li, C and Edwards, D and Zhao, J}, title = {A pangenome analysis pipeline provides insights into functional gene identification in rice.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {19}, pmid = {36703158}, issn = {1474-760X}, abstract = {BACKGROUND: A pangenome aims to capture the complete genetic diversity within a species and reduce bias in genetic analysis inherent in using a single reference genome. However, the current linear format of most plant pangenomes limits the presentation of position information for novel sequences. Graph pangenomes have been developed to overcome this limitation. However, bioinformatics analysis tools for graph format genomes are lacking.
RESULTS: To overcome this problem, we develop a novel strategy for pangenome construction and a downstream pangenome analysis pipeline (PSVCP) that captures genetic variants' position information while maintaining a linearized layout. Using PSVCP, we construct a high-quality rice pangenome using 12 representative rice genomes and analyze an international rice panel with 413 diverse accessions using the pangenome as the reference. We show that PSVCP successfully identifies causal structural variations for rice grain weight and plant height. Our results provide insights into rice population structure and genomic diversity. We characterize a new locus (qPH8-1) associated with plant height on chromosome 8 undetected by the SNP-based genome-wide association study (GWAS).
CONCLUSIONS: Our results demonstrate that the pangenome constructed by our pipeline combined with a presence and absence variation-based GWAS can provide additional power for genomic and genetic analysis. The pangenome constructed in this study and the associated genome sequence and genetic variants data provide valuable genomic resources for rice genomics research and improvement in future.}, }
@article {pmid36699832, year = {2022}, author = {Lee, G and Choi, H and Liu, H and Han, YH and Paul, NC and Han, GH and Kim, H and Kim, PI and Seo, SI and Song, J and Sang, H}, title = {Biocontrol of the causal brown patch pathogen Rhizoctonia solani by Bacillus velezensis GH1-13 and development of a bacterial strain specific detection method.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1091030}, pmid = {36699832}, issn = {1664-462X}, abstract = {Brown patch caused by the basidiomycete fungus Rhizoctonia solani is an economically important disease of cool-season turfgrasses. In order to manage the disease, different types of fungicides have been applied, but the negative impact of fungicides on the environment continues to rise. In this study, the beneficial bacteria Bacillus velezensis GH1-13 was characterized as a potential biocontrol agent to manage brown patch disease. The strain GH1-13 strongly inhibited the mycelial growth of turf pathogens including different anastomosis groups of R. solani causing brown patch and large patch. R. solani AG2-2(IIIB) hyphae were morphologically changed, and fungal cell death resulted from exposure to the strain GH1-13. In addition, the compatibility of fungicides with the bacterial strain, and the combined application of fungicide azoxystrobin and the strain in brown patch control on creeping bentgrass indicated that the strain could serve as a biocontrol agent. To develop strain-specific detection method, two unique genes from chromosome and plasmid of GH1-13 were found using pan-genome analysis of 364 Bacillus strains. The unique gene from chromosome was successfully detected using both SYBR Green and TaqMan qPCR methods in bacterial DNA or soil DNA samples. This study suggests that application of GH1-13 offers an environmentally friendly approach via reducing fungicide application rates. Furthermore, the developed pipeline of strain-specific detection method could be a useful tool for detecting and studying the dynamics of specific biocontrol agents.}, }
@article {pmid36699320, year = {2022}, author = {Hanafy, M and Hansen, C and Phanse, Y and Wu, CW and Nelson, K and Aschenbroich, SA and Talaat, AM}, title = {Characterization of early immune responses elicited by live and inactivated vaccines against Johne's disease in goats.}, journal = {Frontiers in veterinary science}, volume = {9}, number = {}, pages = {1046704}, pmid = {36699320}, issn = {2297-1769}, abstract = {Mycobacterium avium subspecies paratuberculosis (M. paratuberculosis) is the causative agent of Johne's disease, a chronic debilitating condition affecting ruminants causing significant economic losses to the dairy industry. Available inactivated vaccines are not effective in controlling the disease and vaccinated animals can continue to infect newly born calves. Recently, we have shown that a live-attenuated vaccine candidate (pgsN) is protective in goats and calves following challenge with virulent strains of M. paratuberculosis. To decipher the dynamics of the immune responses elicited by both live-attenuated and inactivated vaccines, we analyzed key immunological parameters of goats immunized through different routes when a marker-less pgsN vaccine was used. Within a few weeks, the inactivated vaccine triggered the formation of granulomas both at the site of inoculation and in regional lymph nodes, that increased in size over time and persisted until the end of the experiment. In contrast, granulomas induced by the pgsN vaccine were small and subsided during the study. Interestingly, in this vaccine group, histology demonstrated an initial abundance of intra-histiocytic mycobacterial bacilli at the site of inoculation, with recruitment of very minimal T lymphocytes to poorly organized granulomas. Over time, granulomas became more organized, with recruitment of greater numbers of T and B lymphocytes, which coincided with a lack of mycobacteria. For the inactivated vaccine group, mycobacterial bacilli were identified extracellularly within the center of caseating granulomas, with relatively equal proportions of B- and T-lymphocytes maintained across both early and late times. Despite the differences in granuloma-specific lymphocyte recruitment, markers for cell-mediated immunity (e.g., IFN-γ release) were robust in both injected pgsN and inactivated vaccine groups. In contrast, the intranasal live-attenuated vaccine did not elicit any reaction at site of inoculation, nor cell-mediated immune responses. Finally, 80% of animals in the inactivated vaccine group significantly reacted to purified protein derivatives from M. bovis, while reactivity was detected in only 20% of animals receiving pgsN vaccine, suggesting a higher level of cross reactivity for bovine tuberculosis when inactivated vaccine is used. Overall, these results depict the cellular recruitment strategies driving immune responses elicited by both live-attenuated and inactivated vaccines that target Johne's disease.}, }
@article {pmid36698972, year = {2023}, author = {Yang, MR and Wu, YW}, title = {A Cross-Validated Feature Selection (CVFS) approach for extracting the most parsimonious feature sets and discovering potential antimicrobial resistance (AMR) biomarkers.}, journal = {Computational and structural biotechnology journal}, volume = {21}, number = {}, pages = {769-779}, pmid = {36698972}, issn = {2001-0370}, abstract = {Understanding genes and their underlying mechanisms is critical in deciphering how antimicrobial-resistant (AMR) bacteria withstand detrimental effects of antibiotic drugs. At the same time the genes related to AMR phenotypes may also serve as biomarkers for predicting whether a microbial strain is resistant to certain antibiotic drugs. We developed a Cross-Validated Feature Selection (CVFS) approach for robustly selecting the most parsimonious gene sets for predicting AMR activities from bacterial pan-genomes. The core idea behind the CVFS approach is interrogating features among non-overlapping sub-parts of the datasets to ensure the representativeness of the features. By randomly splitting the dataset into disjoint sub-parts, conducting feature selection within each sub-part, and intersecting the features shared by all sub-parts, the CVFS approach is able to achieve the goal of extracting the most representative features for yielding satisfactory AMR activity prediction accuracy. By testing this idea on bacterial pan-genome datasets, we showed that this approach was able to extract the most succinct feature sets that predicted AMR activities very well, indicating the potential of these genes as AMR biomarkers. The functional analysis demonstrated that the CVFS approach was able to extract both known AMR genes and novel ones, suggesting the capabilities of the algorithm in selecting relevant features and highlighting the potential of the novel genes in expanding the antimicrobial resistance gene databases.}, }
@article {pmid36698060, year = {2023}, author = {Sivakumar, R and Pranav, PS and Annamanedi, M and Chandrapriya, S and Isloor, S and Rajendhran, J and Hegde, NR}, title = {Genome sequencing and comparative genomic analysis of bovine mastitis-associated Staphylococcus aureus strains from India.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {44}, pmid = {36698060}, issn = {1471-2164}, abstract = {BACKGROUND: Bovine mastitis accounts for significant economic losses to the dairy industry worldwide. Staphylococcus aureus is the most common causative agent of bovine mastitis. Investigating the prevalence of virulence factors and antimicrobial resistance would provide insight into the molecular epidemiology of mastitis-associated S. aureus strains. The present study is focused on the whole genome sequencing and comparative genomic analysis of 41 mastitis-associated S. aureus strains isolated from India.
RESULTS: The results elucidate explicit knowledge of 15 diverse sequence types (STs) and five clonal complexes (CCs). The clonal complexes CC8 and CC97 were found to be the predominant genotypes comprising 21 and 10 isolates, respectively. The mean genome size was 2.7 Mbp with a 32.7% average GC content. The pan-genome of the Indian strains of mastitis-associated S. aureus is almost closed. The genome-wide SNP-based phylogenetic analysis differentiated 41 strains into six major clades. Sixteen different spa types were identified, and eight isolates were untypeable. The cgMLST analysis of all S. aureus genome sequences reported from India revealed that S. aureus strain MUF256, isolated from wound fluids of a diabetic patient, was the common ancestor. Further, we observed that all the Indian mastitis-associated S. aureus isolates belonging to the CC97 are mastitis-associated. We identified 17 different antimicrobial resistance (AMR) genes among these isolates, and all the isolates used in this study were susceptible to methicillin. We also identified 108 virulence-associated genes and discuss their associations with different genotypes.
CONCLUSION: This is the first study presenting a comprehensive whole genome analysis of bovine mastitis-associated S. aureus isolates from India. Comparative genomic analysis revealed the genome diversity, major genotypes, antimicrobial resistome, and virulome of clinical and subclinical mastitis-associated S. aureus strains.}, }
@article {pmid36695592, year = {2023}, author = {Giacomini, JJ and Torres-Morales, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Site Specialization of Human Oral Veillonella Species.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0404222}, doi = {10.1128/spectrum.04042-22}, pmid = {36695592}, issn = {2165-0497}, abstract = {Veillonella species are abundant members of the human oral microbiome with multiple interspecies commensal relationships. Examining the distribution patterns of Veillonella species across the oral cavity is fundamental to understanding their oral ecology. In this study, we used a combination of pangenomic analysis and oral metagenomic information to clarify Veillonella taxonomy and to test the site specialist hypothesis for the Veillonella genus, which contends that most oral bacterial species are adapted to live at specific oral sites. Using isolate genome sequences combined with shotgun metagenomic sequence data, we showed that Veillonella species have clear, differential site specificity: Veillonella parvula showed strong preference for supra- and subgingival plaque, while closely related V. dispar, as well as more distantly related V. atypica, preferred the tongue dorsum, tonsils, throat, and hard palate. In addition, the provisionally named Veillonella sp. Human Microbial Taxon 780 showed strong site specificity for keratinized gingiva. Using comparative genomic analysis, we identified genes associated with thiamine biosynthesis and the reductive pentose phosphate cycle that may enable Veillonella species to occupy their respective habitats. IMPORTANCE Understanding the microbial ecology of the mouth is fundamental for understanding human physiology. In this study, metapangenomics demonstrated that different Veillonella species have clear ecological preferences in the oral cavity of healthy humans, validating the site specialist hypothesis. Furthermore, the gene pool of different Veillonella species was found to be reflective of their ecology, illuminating the potential role of vitamins and carbohydrates in determining Veillonella distribution patterns and interspecies interactions.}, }
@article {pmid36693839, year = {2023}, author = {Gao, Y and Guitton-Sert, L and Dessapt, J and Coulombe, Y and Rodrigue, A and Milano, L and Blondeau, A and Larsen, NB and Duxin, JP and Hussein, S and Fradet-Turcotte, A and Masson, JY}, title = {A CRISPR-Cas9 screen identifies EXO1 as a formaldehyde resistance gene.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {381}, pmid = {36693839}, issn = {2041-1723}, abstract = {Fanconi Anemia (FA) is a rare, genome instability-associated disease characterized by a deficiency in repairing DNA crosslinks, which are known to perturb several cellular processes, including DNA transcription, replication, and repair. Formaldehyde, a by-product of metabolism, is thought to drive FA by generating DNA interstrand crosslinks (ICLs) and DNA-protein crosslinks (DPCs). However, the impact of formaldehyde on global cellular pathways has not been investigated thoroughly. Herein, using a pangenomic CRISPR-Cas9 screen, we identify EXO1 as a critical regulator of formaldehyde-induced DNA lesions. We show that EXO1 knockout cell lines exhibit formaldehyde sensitivity leading to the accumulation of replicative stress, DNA double-strand breaks, and quadriradial chromosomes, a typical feature of FA. After formaldehyde exposure, EXO1 is recruited to chromatin, protects DNA replication forks from degradation, and functions in parallel with the FA pathway to promote cell survival. In vitro, EXO1-mediated exonuclease activity is proficient in removing DPCs. Collectively, we show that EXO1 limits replication stress and DNA damage to counteract formaldehyde-induced genome instability.}, }
@article {pmid36691844, year = {2023}, author = {Hu, J and Chen, L and Li, G and Pan, Y and Lu, Y and Chen, J and Xiong, W and Zeng, Z}, title = {Prevalence and genetic characteristics of fosB-positive Staphylococcus aureus in duck farms in Guangdong, China in 2020.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {}, number = {}, pages = {}, doi = {10.1093/jac/dkad014}, pmid = {36691844}, issn = {1460-2091}, abstract = {OBJECTIVES: To investigate the epidemiology of fosB-positive Staphylococcus aureus in waterfowl farms in the Pearl River tributaries in Guangdong Province, China in 2020.
METHODS: A total of 63 S. aureus were recovered from 315 samples collected from six duck farms and one goose farm. PFGE, WGS and analysis were performed on 19 fosB-positive S. aureus.
RESULTS: The fosfomycin resistance rate of the strains was as high as 52.4% (33/63), and 30.1% (19/63) of the strains carried fosB. Resistance gene prediction results showed that duck farm environment-derived strains contained the oxazolidinone drug resistance gene optrA. All fosB-positive S. aureus were MRSA and most of them were MDR, mainly ST9-t899 and ST164-t899. PFGE showed that fosB-positive S. aureus from humans and ducks could be clustered into the same clade. In addition, core-genome SNP analysis showed that clonal transmission of S. aureus occurred between humans and water. Pan-genome analysis showed that S. aureus had an open pangenome. The fosB gene was located on 2610-2615 bp plasmids, which all contained a broad host-range plasmid replication protein family 13. Small plasmids carrying the fosB gene could be found in different multilocus STs of S. aureus.
CONCLUSIONS: This study indicated that duck farms in Guangdong, China could be an important reservoir of fosB-positive S. aureus. The spread of drug-resistant bacteria in waterfowl farms requires further monitoring.}, }
@article {pmid36688776, year = {2023}, author = {Basak, C and Chakraborty, R}, title = {A novel strain of Shigella isolated from the gut of Lepidocephalichthys guntea has in its genome a complete gene package for Type ll secretion system, and elaborate repertoire of genes responsible for multiple antibiotic-resistance and metal resistance via specific efflux channels.}, journal = {Letters in applied microbiology}, volume = {76}, number = {1}, pages = {}, doi = {10.1093/lambio/ovac049}, pmid = {36688776}, issn = {1472-765X}, abstract = {The bacterial strain GCP5 was isolated from the gut of a bottom-dwelling fish Lepidocephalichthys guntea, that lives in the Magurmari River near North Bengal University in Siliguri, India. GCP5 was phylogenetically assigned to the Shigella genus using whole genome-based trees, k-mer analysis, the multilocus species tree (MLST), and single nucleotide polymorphism (SNP)-based trees, and the genetic makeup of the isolate was determined following assembly of the genome sequences and genome annotation with several bioinformatics tools. The presence of a complete package of general-secretory-pathway (gsp) genes, grouped in an operon identical to a well-characterized type II secretion system (T2SS), was confirmed by genome mining of Shigella sp. GCP5. The operon's gsp genes shared the most homology with Escherichia coli gsp genes. A few more high-pathogenicity islands (HPIs) in the GCP5 genome were validated using the pan-genomes analysis pipeline (PGAP) and island viewer. Several antibiotic-resistance genes were found in this genome, as well as the existence of key antibiotic efflux pump families, allowing for the creation of a gene network of several antibiotic efflux transporters. In addition, the genome contained genes specific for nickel transport, the nikABCD system, and the RND family transporter cusCFBA, which confers resistance to copper and silver by effluxing out Cu+ and Ag+ ions.}, }
@article {pmid36687647, year = {2022}, author = {Zhang, M and Yu, Y and Wang, Q and Chen, R and Wang, Y and Bai, Y and Song, Z and Lu, X and Hao, Y}, title = {Conjugation of plasmid harboring bla NDM-1 in a clinical Providencia rettgeri strain through the formation of a fusion plasmid.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1071385}, pmid = {36687647}, issn = {1664-302X}, abstract = {Providencia rettgeri has recently gained increased importance owing to the New Delhi metallo-β-lactamase (NDM) and other β-lactamases produced by its clinical isolates. These enzymes reduce the efficiency of antimicrobial therapy. Herein, we reported the findings of whole-genome sequence analysis and a comprehensive pan-genome analysis performed on a multidrug-resistant P. rettgeri 18004577 clinical strain recovered from the urine of a hospitalized patient in Shandong, China, in 2018. Providencia rettgeri 18004577 was found to have a genome assembly size of 4.6 Mb with a G + C content of 41%; a circular plasmid p18004577_NDM of 273.3 Kb, harboring an accessory multidrug-resistant region; and a circular, stable IncT plasmid p18004577_Rts of 146.2 Kb. Additionally, various resistance genes were identified in its genome, including bla NDM-1, bla OXA-10, bla PER-4, aph(3')-VI, ant(2'')-Ia, ant(3')-Ia, sul1, catB8, catA1, mph(E), and tet. Conjugation experiments and whole-genome sequencing revealed that the bla NDM-1 gene could be transferred to the transconjugant via the formation of pJ18004577_NDM, a novel hybrid plasmid. Based on the genetic comparison, the main possible formation process for pJ18004577_NDM was the insertion of the [ΔISKox2-IS26-ΔISKox2]-aph(3')-VI-bla NDM-1 translocatable unit module from p18004577_NDM into plasmid p18004577_Rts in the Russian doll insertion structure (ΔISKox2-IS26-ΔISKox2), which played a role similar to that of IS26 using the "copy-in" route in the mobilization of [aph(3')-VI]-bla NDM-1. The array, multiplicity, and diversity of the resistance and virulence genes in this strain necessitate stringent infection control, antibiotic stewardship, and periodic resistance surveillance/monitoring policies to preempt further horizontal and vertical spread of the resistance genes. Roary analysis based on 30 P. rettgeri strains pan genome identified 415 core, 756 soft core, 5,744 shell, and 12,967 cloud genes, highlighting the "close" nature of P. rettgeri pan-genome. After a comprehensive pan-genome analysis, representative biological information was revealed that included phylogenetic distances, presence or absence of genes across the P. rettgeri bacteria clade, and functional distribution of proteins. Moreover, pan-genome analysis has been shown to be an effective approach to better understand P. rettgeri bacteria because it helps develop various tailored therapeutic strategies based on their biological similarities and differences.}, }
@article {pmid36687645, year = {2022}, author = {Hurtado-Páez, U and Álvarez Zuluaga, N and Arango Isaza, RE and Contreras-Moreira, B and Rouzaud, F and Robledo, J}, title = {Pan-genome association study of Mycobacterium tuberculosis lineage-4 revealed specific genes related to the high and low prevalence of the disease in patients from the North-Eastern area of Medellín, Colombia.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1076797}, pmid = {36687645}, issn = {1664-302X}, abstract = {Mycobacterium tuberculosis (Mtb) lineage 4 is responsible for the highest burden of tuberculosis (TB) worldwide. This lineage has been the most prevalent lineage in Colombia, especially in the North-Eastern (NE) area of Medellin, where it has been shown to have a high prevalence of LAM9 SIT42 and Haarlem1 SIT62 sublineages. There is evidence that regardless of environmental factors and host genetics, differences among sublineages of Mtb strains play an important role in the course of infection and disease. Nevertheless, the genetic basis of the success of a sublineage in a specific geographic area remains uncertain. We used a pan-genome-wide association study (pan-GWAS) of 47 Mtb strains isolated from NE Medellin between 2005 and 2008 to identify the genes responsible for the phenotypic differences among high and low prevalence sublineages. Our results allowed the identification of 12 variants in 11 genes, of which 4 genes showed the strongest association to low prevalence (mmpL12, PPE29, Rv1419, and Rv1762c). The first three have been described as necessary for invasion and intracellular survival. Polymorphisms identified in low prevalence isolates may suggest related to a fitness cost of Mtb, which might reflect a decrease in their capacity to be transmitted or to cause an active infection. These results contribute to understanding the success of some sublineages of lineage-4 in a specific geographical area.}, }
@article {pmid36687572, year = {2022}, author = {Robinson, LA and Collins, ACZ and Murphy, RA and Davies, JC and Allsopp, LP}, title = {Diversity and prevalence of type VI secretion system effectors in clinical Pseudomonas aeruginosa isolates.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1042505}, pmid = {36687572}, issn = {1664-302X}, abstract = {Pseudomonas aeruginosa is an opportunistic pathogen and a major driver of morbidity and mortality in people with Cystic Fibrosis (CF). The Type VI secretion system (T6SS) is a molecular nanomachine that translocates effectors across the bacterial membrane into target cells or the extracellular environment enabling intermicrobial interaction. P. aeruginosa encodes three T6SS clusters, the H1-, H2- and H3-T6SS, and numerous orphan islands. Genetic diversity of T6SS-associated effectors in P. aeruginosa has been noted in reference strains but has yet to be explored in clinical isolates. Here, we perform a comprehensive bioinformatic analysis of the pangenome and T6SS effector genes in 52 high-quality clinical P. aeruginosa genomes isolated from CF patients and housed in the Personalised Approach to P. aeruginosa strain repository. We confirm that the clinical CF isolate pangenome is open and principally made up of accessory and unique genes that may provide strain-specific advantages. We observed genetic variability in some effector/immunity encoding genes and show that several well-characterised vgrG and PAAR islands are absent from numerous isolates. Our analysis shows clear evidence of disruption to T6SS genomic loci through transposon, prophage, and mobile genetic element insertions. We identified an orphan vgrG island in P. aeruginosa strain PAK and five clinical isolates using in silico analysis which we denote vgrG7, predicting a gene within this cluster to encode a Tle2 lipase family effector. Close comparison of T6SS loci in clinical isolates compared to reference P. aeruginosa strain PAO1 revealed the presence of genes encoding eight new T6SS effectors with the following putative functions: cytidine deaminase, lipase, metallopeptidase, NADase, and pyocin. Finally, the prevalence of characterised and putative T6SS effectors were assessed in 532 publicly available P. aeruginosa genomes, which suggests the existence of accessory effectors. Our in silico study of the P. aeruginosa T6SS exposes a level of genetic diversity at T6SS genomic loci not seen to date within P. aeruginosa, particularly in CF isolates. As understanding the effector repertoire is key to identifying the targets of T6SSs and its efficacy, this comprehensive analysis provides a path for future experimental characterisation of these mediators of intermicrobial competition and host manipulation.}, }
@article {pmid36685843, year = {2022}, author = {Stuart, KC and Sherwin, WB and Edwards, RJ and Rollins, LA}, title = {Evolutionary genomics: Insights from the invasive European starlings.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1010456}, pmid = {36685843}, issn = {1664-8021}, abstract = {Two fundamental questions for evolutionary studies are the speed at which evolution occurs, and the way that this evolution may present itself within an organism's genome. Evolutionary studies on invasive populations are poised to tackle some of these pressing questions, including understanding the mechanisms behind rapid adaptation, and how it facilitates population persistence within a novel environment. Investigation of these questions are assisted through recent developments in experimental, sequencing, and analytical protocols; in particular, the growing accessibility of next generation sequencing has enabled a broader range of taxa to be characterised. In this perspective, we discuss recent genetic findings within the invasive European starlings in Australia, and outline some critical next steps within this research system. Further, we use discoveries within this study system to guide discussion of pressing future research directions more generally within the fields of population and evolutionary genetics, including the use of historic specimens, phenotypic data, non-SNP genetic variants (e.g., structural variants), and pan-genomes. In particular, we emphasise the need for exploratory genomics studies across a range of invasive taxa so we can begin understanding broad mechanisms that underpin rapid adaptation in these systems. Understanding how genetic diversity arises and is maintained in a population, and how this contributes to adaptability, requires a deep understanding of how evolution functions at the molecular level, and is of fundamental importance for the future studies and preservation of biodiversity across the globe.}, }
@article {pmid36685320, year = {2023}, author = {Liew, KJ and Zakaria, MR and Hong, CWL and Tan, MCY and Chong, CS}, title = {Draft genome sequence of Joostella atrarenae M1-2[T] with cellulolytic and hemicellulolytic ability.}, journal = {3 Biotech}, volume = {13}, number = {2}, pages = {50}, pmid = {36685320}, issn = {2190-572X}, abstract = {The halophilic genus Joostella is one of the least-studied genera in the family of Flavobacteriaceae. So far, only two species were taxonomically identified with limited genomic analysis in the aspect of application has been reported. Joostella atrarenae M1-2[T] was previously isolated from a seashore sample and it is the second discovered species of the genus Joostella. In this project, the genome of J. atrarenae M1-2[T] was sequenced using NovaSeq 6000. The final assembled genome is comprised of 71 contigs, a total of 3,983,942 bp, a GC ratio of 33.2%, and encoded for 3,416 genes. The 16S rRNA gene sequence of J. atrarenae M1-2[T] shows 97.3% similarity against J. marina DSM 19592[T]. Genome-genome comparison between the two strains by ANI, dDDH, AAI, and POCP shows values of 80.8%, 23.3%, 83.4%, and 74.1% respectively. Pan-genome analysis shows that strain M1-2[T] and J. marina DSM 19592[T] shared a total of 248 core genes. Taken together, strain M-2[T] and J. marina DSM 19592[T] belong to the same genus but are two different species. CAZymes analysis revealed that strain M1-2[T] harbors 109 GHs, 40 GTs, 5 PLs, 9 CEs, and 6 AAs. Among these CAZymes, while 5 genes are related to cellulose degradation, 12 and 24 genes are found to encode for xylanolytic enzymes and other hemicellulases that involve majorly in the side chain removal of the lignocellulose structure, respectively. Furthermore, both the intracellular and extracellular crude extracts of strain M1-2[T] exhibited enzymatic activities against CMC, xylan, pNPG, and pNPX substrates, which corresponding to endoglucanase, xylanase, β-glucosidase, and β-xylosidase, respectively. Collectively, description of genome coupled with the enzyme assay results demonstrated that J. atrarenae M1-2[T] has a role in lignocellulosic biomass degradation, and the strain could be useful for lignocellulosic biorefining.}, }
@article {pmid36684744, year = {2022}, author = {Voelker, WG and Krishnan, K and Chougule, K and Alexander, LC and Lu, Z and Olson, A and Ware, D and Songsomboon, K and Ponce, C and Brenton, ZW and Boatwright, JL and Cooper, EA}, title = {Ten new high-quality genome assemblies for diverse bioenergy sorghum genotypes.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1040909}, pmid = {36684744}, issn = {1664-462X}, abstract = {INTRODUCTION: Sorghum (Sorghum bicolor (L.) Moench) is an agriculturally and economically important staple crop that has immense potential as a bioenergy feedstock due to its relatively high productivity on marginal lands. To capitalize on and further improve sorghum as a potential source of sustainable biofuel, it is essential to understand the genomic mechanisms underlying complex traits related to yield, composition, and environmental adaptations.
METHODS: Expanding on a recently developed mapping population, we generated de novo genome assemblies for 10 parental genotypes from this population and identified a comprehensive set of over 24 thousand large structural variants (SVs) and over 10.5 million single nucleotide polymorphisms (SNPs).
RESULTS: We show that SVs and nonsynonymous SNPs are enriched in different gene categories, emphasizing the need for long read sequencing in crop species to identify novel variation. Furthermore, we highlight SVs and SNPs occurring in genes and pathways with known associations to critical bioenergy-related phenotypes and characterize the landscape of genetic differences between sweet and cellulosic genotypes.
DISCUSSION: These resources can be integrated into both ongoing and future mapping and trait discovery for sorghum and its myriad uses including food, feed, bioenergy, and increasingly as a carbon dioxide removal mechanism.}, }
@article {pmid36683686, year = {2022}, author = {Bai, Z and Zhang, N and Jin, Y and Chen, L and Mao, Y and Sun, L and Fang, F and Liu, Y and Han, M and Li, G}, title = {Comprehensive analysis of 84 Faecalibacterium prausnitzii strains uncovers their genetic diversity, functional characteristics, and potential risks.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {919701}, pmid = {36683686}, issn = {2235-2988}, abstract = {Faecalibacterium prausnitzii is a beneficial human gut microbe and a candidate for next-generation probiotics. With probiotics now being used in clinical treatments, concerns about their safety and side effects need to be considered. Therefore, it is essential to obtain a comprehensive understanding of the genetic diversity, functional characteristics, and potential risks of different F. prausnitzii strains. In this study, we collected the genetic information of 84 F . prausnitzii strains to conduct a pan-genome analysis with multiple perspectives. Based on single-copy genes and the sequences of 16S rRNA and the compositions of the pan-genome, different phylogenetic analyses of F. prausnitzii strains were performed, which showed the genetic diversity among them. Among the proteins of the pan-genome, we found that the accessory clusters made a greater contribution to the primary genetic functions of F. prausnitzii strains than the core and specific clusters. The functional annotations of F. prausnitzii showed that only a very small number of proteins were related to human diseases and there were no secondary metabolic gene clusters encoding harmful products. At the same time, complete fatty acid metabolism was detected in F. prausnitzii. In addition, we detected harmful elements, including antibiotic resistance genes, virulence factors, and pathogenic genes, and proposed the probiotic potential risk index (PPRI) and probiotic potential risk score (PPRS) to classify these 84 strains into low-, medium-, and high-risk groups. Finally, 15 strains were identified as low-risk strains and prioritized for clinical application. Undoubtedly, our results provide a comprehensive understanding and insight into F. prausnitzii, and PPRI and PPRS can be applied to evaluate the potential risks of probiotics in general and to guide the application of probiotics in clinical application.}, }
@article {pmid36678781, year = {2022}, author = {Khan, MA and Amin, A and Farid, A and Ullah, A and Waris, A and Shinwari, K and Hussain, Y and Alsharif, KF and Alzahrani, KJ and Khan, H}, title = {Recent Advances in Genomics-Based Approaches for the Development of Intracellular Bacterial Pathogen Vaccines.}, journal = {Pharmaceutics}, volume = {15}, number = {1}, pages = {}, doi = {10.3390/pharmaceutics15010152}, pmid = {36678781}, issn = {1999-4923}, abstract = {Infectious diseases continue to be a leading cause of morbidity and mortality worldwide. The majority of infectious diseases are caused by intracellular pathogenic bacteria (IPB). Historically, conventional vaccination drives have helped control the pathogenesis of intracellular bacteria and the emergence of antimicrobial resistance, saving millions of lives. However, in light of various limitations, many diseases that involve IPB still do not have adequate vaccines. In response to increasing demand for novel vaccine development strategies, a new area of vaccine research emerged following the advent of genomics technology, which changed the paradigm of vaccine development by utilizing the complete genomic data of microorganisms against them. It became possible to identify genes related to disease virulence, genetic patterns linked to disease virulence, as well as the genetic components that supported immunity and favorable vaccine responses. Complete genomic databases, and advancements in transcriptomics, metabolomics, structural genomics, proteomics, immunomics, pan-genomics, synthetic genomics, and population biology have allowed researchers to identify potential vaccine candidates and predict their effects in patients. New vaccines have been created against diseases for which previously there were no vaccines available, and existing vaccines have been improved. This review highlights the key issues and explores the evolution of vaccines. The increasing volume of IPB genomic data, and their application in novel genome-based techniques for vaccine development, were also examined, along with their characteristics, and the opportunities and obstacles involved. Critically, the application of genomics technology has helped researchers rapidly select and evaluate candidate antigens. Novel vaccines capable of addressing the limitations associated with conventional vaccines have been developed and pressing healthcare issues are being addressed.}, }
@article {pmid36677470, year = {2023}, author = {Charles, C and Conde, C and Vorimore, F and Cochard, T and Michelet, L and Boschiroli, ML and Biet, F}, title = {Features of Mycobacterium bovis Complete Genomes Belonging to 5 Different Lineages.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, doi = {10.3390/microorganisms11010177}, pmid = {36677470}, issn = {2076-2607}, abstract = {Mammalian tuberculosis (TB) is a zoonotic disease mainly due to Mycobacterium bovis (M. bovis). A current challenge for its eradication is understanding its transmission within multi-host systems. Improvements in long-read sequencing technologies have made it possible to obtain complete bacterial genomes that provide a comprehensive view of species-specific genomic features. In the context of TB, new genomic references based on complete genomes genetically close to field strains are also essential to perform precise field molecular epidemiological studies. A total of 10 M. bovis strains representing each genetic lineage identified in France and in other countries were selected for performing complete assembly of their genomes. Pangenome analysis revealed a "closed" pangenome composed of 3900 core genes and only 96 accessory genes. Whole genomes-based alignment using progressive Mauve showed remarkable conservation of the genomic synteny except that the genomes have a variable number of copies of IS6110. Characteristic genomic traits of each lineage were identified through the discovery of specific indels. Altogether, these results provide new genetic features that improve the description of M. bovis lineages. The availability of new complete representative genomes of M. bovis will be useful to epidemiological studies and better understand the transmission of this clonal-evolving pathogen.}, }
@article {pmid36677411, year = {2023}, author = {Thakur, P and Alaba, MO and Rauniyar, S and Singh, RN and Saxena, P and Bomgni, A and Gnimpieba, EZ and Lushbough, C and Goh, KM and Sani, RK}, title = {Text-Mining to Identify Gene Sets Involved in Biocorrosion by Sulfate-Reducing Bacteria: A Semi-Automated Workflow.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, doi = {10.3390/microorganisms11010119}, pmid = {36677411}, issn = {2076-2607}, abstract = {A significant amount of literature is available on biocorrosion, which makes manual extraction of crucial information such as genes and proteins a laborious task. Despite the fast growth of biology related corrosion studies, there is a limited number of gene collections relating to the corrosion process (biocorrosion). Text mining offers a potential solution by automatically extracting the essential information from unstructured text. We present a text mining workflow that extracts biocorrosion associated genes/proteins in sulfate-reducing bacteria (SRB) from literature databases (e.g., PubMed and PMC). This semi-automatic workflow is built with the Named Entity Recognition (NER) method and Convolutional Neural Network (CNN) model. With PubMed and PMCID as inputs, the workflow identified 227 genes belonging to several Desulfovibrio species. To validate their functions, Gene Ontology (GO) enrichment and biological network analysis was performed using UniprotKB and STRING-DB, respectively. The GO analysis showed that metal ion binding, sulfur binding, and electron transport were among the principal molecular functions. Furthermore, the biological network analysis generated three interlinked clusters containing genes involved in metal ion binding, cellular respiration, and electron transfer, which suggests the involvement of the extracted gene set in biocorrosion. Finally, the dataset was validated through manual curation, yielding a similar set of genes as our workflow; among these, hysB and hydA, and sat and dsrB were identified as the metal ion binding and sulfur metabolism genes, respectively. The identified genes were mapped with the pangenome of 63 SRB genomes that yielded the distribution of these genes across 63 SRB based on the amino acid sequence similarity and were further categorized as core and accessory gene families. SRB's role in biocorrosion involves the transfer of electrons from the metal surface via a hydrogen medium to the sulfate reduction pathway. Therefore, genes encoding hydrogenases and cytochromes might be participating in removing hydrogen from the metals through electron transfer. Moreover, the production of corrosive sulfide from the sulfur metabolism indirectly contributes to the localized pitting of the metals. After the corroboration of text mining results with SRB biocorrosion mechanisms, we suggest that the text mining framework could be utilized for genes/proteins extraction and significantly reduce the manual curation time.}, }
@article {pmid36677403, year = {2022}, author = {Romero-Calle, DX and Pedrosa-Silva, F and Tomé, LMR and Sousa, TJ and de Oliveira Santos, LTS and de Carvalho Azevedo, VA and Brenig, B and Benevides, RG and Venancio, TM and Billington, C and Góes-Neto, A}, title = {Hybrid Genomic Analysis of Salmonella enterica Serovar Enteritidis SE3 Isolated from Polluted Soil in Brazil.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, doi = {10.3390/microorganisms11010111}, pmid = {36677403}, issn = {2076-2607}, abstract = {In Brazil, Salmonella enterica serovar Enteritidis is a significant health threat. Salmonella enterica serovar Enteritidis SE3 was isolated from soil at the Subaé River in Santo Amaro, Brazil, a region contaminated with heavy metals and organic waste. Illumina HiSeq and Oxford Nanopore Technologies MinION sequencing were used for de novo hybrid assembly of the Salmonella SE3 genome. This approach yielded 10 contigs with 99.98% identity with S. enterica serovar Enteritidis OLF-SE2-98984-6. Twelve Salmonella pathogenic islands, multiple virulence genes, multiple antimicrobial gene resistance genes, seven phage defense systems, seven prophages and a heavy metal resistance gene were encoded in the genome. Pangenome analysis of the S. enterica clade, including Salmonella SE3, revealed an open pangenome, with a core genome of 2137 genes. Our study showed the effectiveness of a hybrid sequence assembly approach for environmental Salmonella genome analysis using HiSeq and MinION data. This approach enabled the identification of key resistance and virulence genes, and these data are important to inform the control of Salmonella and heavy metal pollution in the Santo Amaro region of Brazil.}, }
@article {pmid36677357, year = {2022}, author = {Myintzaw, P and Pennone, V and McAuliffe, O and Begley, M and Callanan, M}, title = {Variability in Cold Tolerance of Food and Clinical Listeria monocytogenes Isolates.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, doi = {10.3390/microorganisms11010065}, pmid = {36677357}, issn = {2076-2607}, abstract = {The aim of this study was to investigate the level of strain variability amongst food and clinical Listeria monocytogenes isolates growing at low temperatures (4 and 7 °C) in both laboratory media and real food matrices. Isolates (n = 150) grown in laboratory media demonstrated a large variation in growth profiles measured using optical density. Overall, it was noted that clinical isolates exhibited a significantly higher growth rate (p ≤ 0.05) at 7 °C than the other isolates. Analysis of variance (ANOVA) tests of isolates grouped using Multi Locus Sequence Typing (MLST) revealed that clonal complex 18 (CC18) isolates were significantly (p ≤ 0.05) faster growing at 4 °C than other CC-type isolates while CC101, CC18, CC8, CC37 and CC14 were faster growing than other CC types at 7 °C. Euclidean distance and Ward method-based hierarchical clustering of mean growth rates classified 33.33% of isolates as faster growing. Fast and slow growing representative isolates were selected from the cluster analysis and growth rates were determined using plate count data in laboratory media and model food matrices. In agreement with the optical density experiments, CC18 isolates were faster and CC121 isolates were slower than other CC types in laboratory media, UHT milk and fish pie. The same trend was observed in chocolate milk but the differences were not statistically significant. Moreover, pan-genome analysis (Scoary) of isolate genome sequences only identified six genes of unknown function associated with increased cold tolerance while failing to identify any known cold tolerance genes. Overall, an association that was consistent in laboratory media and real food matrices was demonstrated between isolate CC type and increased cold tolerance.}, }
@article {pmid36675897, year = {2023}, author = {Bigey, F and Pasteur, E and Połomska, X and Thomas, S and Crutz-Le Coq, AM and Devillers, H and Neuvéglise, C}, title = {Insights into the Genomic and Phenotypic Landscape of the Oleaginous Yeast Yarrowia lipolytica.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {9}, number = {1}, pages = {}, doi = {10.3390/jof9010076}, pmid = {36675897}, issn = {2309-608X}, abstract = {Although Yarrowia lipolytica is a model yeast for the study of lipid metabolism, its diversity is poorly known, as studies generally consider only a few standard laboratory strains. To extend our knowledge of this biotechnological workhorse, we investigated the genomic and phenotypic diversity of 56 natural isolates. Y. lipolytica is classified into five clades with no correlation between clade membership and geographic or ecological origin. A low genetic diversity (π = 0.0017) and a pan-genome (6528 genes) barely different from the core genome (6315 genes) suggest Y. lipolytica is a recently evolving species. Large segmental duplications were detected, totaling 892 genes. With three new LTR-retrotransposons of the Gypsy family (Tyl4, Tyl9, and Tyl10), the transposable element content of genomes appeared diversified but still low (from 0.36% to 3.62%). We quantified 34 traits with substantial phenotypic diversity, but genome-wide association studies failed to evidence any associations. Instead, we investigated known genes and found four mutational events leading to XPR2 protease inactivation. Regarding lipid metabolism, most high-impact mutations were found in family-belonging genes, such as ALK or LIP, and therefore had a low phenotypic impact, suggesting that the huge diversity of lipid synthesis and accumulation is multifactorial or due to complex regulations.}, }
@article {pmid36671332, year = {2023}, author = {Fono-Tamo, EUK and Kamika, I and Dewar, JB and Lekota, KE}, title = {Comparative Genomics Revealed a Potential Threat of Aeromonas rivipollensis G87 Strain and Its Antibiotic Resistance.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/antibiotics12010131}, pmid = {36671332}, issn = {2079-6382}, abstract = {Aeromonas rivipollensis is an emerging pathogen linked to a broad range of infections in humans. Due to the inability to accurately differentiate Aeromonas species using conventional techniques, in-depth comparative genomics analysis is imperative to identify them. This study characterized 4 A. rivipollensis strains that were isolated from river water in Johannesburg, South Africa, by whole-genome sequencing (WGS). WGS was carried out, and taxonomic classification was employed to profile virulence and antibiotic resistance (AR). The AR profiles of the A. rivipollensis genomes consisted of betalactams and cephalosporin-resistance genes, while the tetracycline-resistance gene (tetE) was only determined to be in the G87 strain. A mobile genetic element (MGE), transposons TnC, was determined to be in this strain that mediates tetracycline resistance MFS efflux tetE. A pangenomic investigation revealed the G87 strain's unique characteristic, which included immunoglobulin A-binding proteins, extracellular polysialic acid, and exogenous sialic acid as virulence factors. The identified polysialic acid and sialic acid genes can be associated with antiphagocytic and antibactericidal properties, respectively. MGEs such as transposases introduce virulence and AR genes in the A. rivipollensis G87 genome. This study showed that A. rivipollensis is generally resistant to a class of beta-lactams and cephalosporins. MGEs pose a challenge in some of the Aeromonas species strains and are subjected to antibiotics resistance and the acquisition of virulence genes in the ecosystem.}, }
@article {pmid36671226, year = {2022}, author = {Thakur, Z and Vaid, RK and Anand, T and Tripathi, BN}, title = {Comparative Genome Analysis of 19 Trueperella pyogenes Strains Originating from Different Animal Species Reveal a Genetically Diverse Open Pan-Genome.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/antibiotics12010024}, pmid = {36671226}, issn = {2079-6382}, abstract = {Trueperella pyogenes is a Gram-positive opportunistic pathogen that causes severe cases of mastitis, metritis, and pneumonia in a wide range of animals, resulting in significant economic losses. Although little is known about the virulence factors involved in the disease pathogenesis, a comprehensive comparative genome analysis of T. pyogenes genomes has not been performed till date. Hence, present investigation was carried out to characterize and compare 19 T. pyogenes genomes originating in different geographical origins including the draftgenome of the first Indian origin strain T. pyogenes Bu5. Additionally, candidate virulence determinants that could be crucial for their pathogenesis were also detected and analyzed by using various bioinformatics tools. The pan-genome calculations revealed an open pan-genome of T. pyogenes. In addition, an inventory of virulence related genes, 190 genomic islands, 31 prophage sequences, and 40 antibiotic resistance genes that could play a significant role in organism's pathogenicity were detected. The core-genome based phylogeny of T. pyogenes demonstrates a polyphyletic, host-associated group with a high degree of genomic diversity. The identified core-genome can be further used for screening of drug and vaccine targets. The investigation has provided unique insights into pan-genome, virulome, mobiliome, and resistome of T. pyogenes genomes and laid the foundation for future investigations.}, }
@article {pmid36669850, year = {2023}, author = {Tonkin-Hill, G and Gladstone, RA and Pöntinen, AK and Arredondo-Alonso, S and Bentley, SD and Corander, J}, title = {Robust analysis of prokaryotic pangenome gene gain and loss rates with Panstripe.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277340.122}, pmid = {36669850}, issn = {1549-5469}, abstract = {Horizontal gene transfer (HGT) plays a critical role in the evolution and diversification of many microbial species. The resulting dynamics of gene gain and loss can have important implications for the development of antibiotic resistance and the design of vaccine and drug interventions. Methods for the analysis of gene presence/absence patterns typically do not account for errors introduced in the automated annotation and clustering of gene sequences. In particular, methods adapted from ecological studies, including the pangenome gene accumulation curve, can be misleading as they may reflect the underlying diversity in the temporal sampling of genomes rather than a difference in the dynamics of HGT. Here, we introduce Panstripe, a method based on generalized linear regression that is robust to population structure, sampling bias, and errors in the predicted presence/absence of genes. We show using simulations that Panstripe can effectively identify differences in the rate and number of genes involved in HGT events, and illustrate its capability by analyzing several diverse bacterial genome data sets representing major human pathogens.}, }
@article {pmid36662619, year = {2023}, author = {Secomandi, S and Gallo, GR and Sozzoni, M and Iannucci, A and Galati, E and Abueg, L and Balacco, J and Caprioli, M and Chow, W and Ciofi, C and Collins, J and Fedrigo, O and Ferretti, L and Fungtammasan, A and Haase, B and Howe, K and Kwak, W and Lombardo, G and Masterson, P and Messina, G and Møller, AP and Mountcastle, J and Mousseau, TA and Ferrer Obiol, J and Olivieri, A and Rhie, A and Rubolini, D and Saclier, M and Stanyon, R and Stucki, D and Thibaud-Nissen, F and Torrance, J and Torroni, A and Weber, K and Ambrosini, R and Bonisoli-Alquati, A and Jarvis, ED and Gianfranceschi, L and Formenti, G}, title = {A chromosome-level reference genome and pangenome for barn swallow population genomics.}, journal = {Cell reports}, volume = {42}, number = {1}, pages = {111992}, doi = {10.1016/j.celrep.2023.111992}, pmid = {36662619}, issn = {2211-1247}, abstract = {Insights into the evolution of non-model organisms are limited by the lack of reference genomes of high accuracy, completeness, and contiguity. Here, we present a chromosome-level, karyotype-validated reference genome and pangenome for the barn swallow (Hirundo rustica). We complement these resources with a reference-free multialignment of the reference genome with other bird genomes and with the most comprehensive catalog of genetic markers for the barn swallow. We identify potentially conserved and accelerated genes using the multialignment and estimate genome-wide linkage disequilibrium using the catalog. We use the pangenome to infer core and accessory genes and to detect variants using it as a reference. Overall, these resources will foster population genomics studies in the barn swallow, enable detection of candidate genes in comparative genomics studies, and help reduce bias toward a single reference genome.}, }
@article {pmid36646895, year = {2023}, author = {Sibbesen, JA and Eizenga, JM and Novak, AM and Sirén, J and Chang, X and Garrison, E and Paten, B}, title = {Haplotype-aware pantranscriptome analyses using spliced pangenome graphs.}, journal = {Nature methods}, volume = {}, number = {}, pages = {}, pmid = {36646895}, issn = {1548-7105}, abstract = {Pangenomics is emerging as a powerful computational paradigm in bioinformatics. This field uses population-level genome reference structures, typically consisting of a sequence graph, to mitigate reference bias and facilitate analyses that were challenging with previous reference-based methods. In this work, we extend these methods into transcriptomics to analyze sequencing data using the pantranscriptome: a population-level transcriptomic reference. Our toolchain, which consists of additions to the VG toolkit and a standalone tool, RPVG, can construct spliced pangenome graphs, map RNA sequencing data to these graphs, and perform haplotype-aware expression quantification of transcripts in a pantranscriptome. We show that this workflow improves accuracy over state-of-the-art RNA sequencing mapping methods, and that it can efficiently quantify haplotype-specific transcript expression without needing to characterize the haplotypes of a sample beforehand.}, }
@article {pmid36646262, year = {2023}, author = {Mishra, A and Kesarwani, S and Jaiswal, TP and Bhattacharjee, S and Chakraborty, S and Mishra, AK and Singh, SS}, title = {Decoding whole genome of Anoxybacillus rupiensis TPH1 isolated from Tatapani hot spring, India and giving insight into bioremediation ability of TPH1 via heavy metals and azo dyes.}, journal = {Research in microbiology}, volume = {}, number = {}, pages = {104027}, doi = {10.1016/j.resmic.2023.104027}, pmid = {36646262}, issn = {1769-7123}, abstract = {A moderately thermophilic, gram-positive genomospecies Anoxybacillus rupiensis TPH1 was isolated from Tatapani hot spring, Chhattisgarh, India. Genome of 3.70 Mb with 42.3% GC subsumed 4131 CDSs, 65 tRNA, 5 rRNA, 35 AMR and 19 drug target genes. Further, comparative genomics of 19 Anoxybacillus spp. exhibited an open pan genome of 13102 genes along with core (10.62%), unique (43.5%) and accessory (45.9%) genes. Moreover, phylogenomic tree displayed clustering of Anoxybacillus spp. into two distinct clades where clade A species harbored larger genomes, more unique genes, CDS and hypothetical proteins than clade B species. Further, distribution of azoreductases showed FMN-binding NADPH azoreductase (AzoRed1) presence in clade A species only and FMN-binding NADH azoreductase (AzoRed2) harboring by species of both clades. Heavy metal resistance genes distribution showed omnipresence of znuA, copZ and arsC in both clades, dispersed presence of cbiM, czcD, merA and feoB over both clades and harboring of nikA and acr3 by few species of clade A only. Additionally, molecular docking of AzoRed1, AzoRed2, ZnuA, CopZ, Acr3, CbiM, CzcD, MerA and NikA with their respective ligands indicated high affinity and stable binding. Conclusively, present study provided insight into gene repertoire of genus Anoxybacillus and a basis for the potential application of this thermophile in bioremediation of azo dyes and heavy metals.}, }
@article {pmid36644533, year = {2022}, author = {Pang, M and Tu, T and Wang, Y and Zhang, P and Ren, M and Yao, X and Luo, Y and Yang, Z}, title = {Design of a multi-epitope vaccine against Haemophilus parasuis based on pan-genome and immunoinformatics approaches.}, journal = {Frontiers in veterinary science}, volume = {9}, number = {}, pages = {1053198}, pmid = {36644533}, issn = {2297-1769}, abstract = {BACKGROUND: Glässer's disease, caused by Haemophilus parasuis (HPS), is responsible for economic losses in the pig industry worldwide. However, the existing commercial vaccines offer poor protection and there are significant barriers to the development of effective vaccines.
METHODS: In the current study, we aimed to identify potential vaccine candidates and design a multi-epitope vaccine against HPS by performing pan-genomic analysis of 121 strains and using a reverse vaccinology approach.
RESULTS: The designed vaccine constructs consist of predicted epitopes of B and T cells derived from the outer membrane proteins of the HPS core genome. The vaccine was found to be highly immunogenic, non-toxic, and non-allergenic as well as have stable physicochemical properties. It has a high binding affinity to Toll-like receptor 2. In addition, in silico immune simulation results showed that the vaccine elicited an effective immune response. Moreover, the mouse polyclonal antibody obtained by immunizing the vaccine protein can be combined with different serotypes and non-typable Haemophilus parasuis in vitro.
CONCLUSION: The overall results of the study suggest that the designed multi-epitope vaccine is a promising candidate for pan-prophylaxis against different strains of HPS.}, }
@article {pmid36326658, year = {2022}, author = {Kittiwan, N and Calland, JK and Mourkas, E and Hitchings, MD and Murray, S and Tadee, P and Tadee, P and Duangsonk, K and Meric, G and Sheppard, SK and Patchanee, P and Pascoe, B}, title = {Genetic diversity and variation in antimicrobial-resistance determinants of non-serotype 2 Streptococcus suis isolates from healthy pigs.}, journal = {Microbial genomics}, volume = {8}, number = {11}, pages = {}, pmid = {36326658}, issn = {2057-5858}, support = {MR/T030062/1//Medical Research Council/United Kingdom ; MR/L015080/1//Medical Research Council/United Kingdom ; }, mesh = {Swine ; Animals ; *Streptococcus suis/genetics ; *Streptococcal Infections/veterinary/genetics ; Anti-Bacterial Agents/pharmacology ; Genetic Variation ; }, abstract = {Streptococcus suis is a leading cause of bacterial meningitis in South-East Asia, with frequent zoonotic transfer to humans associated with close contact with pigs. A small number of invasive lineages are responsible for endemic infection in the swine industry, causing considerable global economic losses. A lack of surveillance and a rising trend in clinical treatment failure has raised concerns of growing antimicrobial resistance (AMR) among invasive S. suis . Gene flow between healthy and disease isolates is poorly understood and, in this study, we sample and sequence a collection of isolates predominantly from healthy pigs in Chiang Mai province, Northern Thailand. Pangenome characterization identified extensive genetic diversity and frequent AMR carriage in isolates from healthy pigs. Multiple AMR genes were identified, conferring resistance to aminoglycosides, lincosamides, tetracycline and macrolides. All isolates were non-susceptible to three or more different antimicrobial classes, and 75 % of non-serotype 2 isolates were non-susceptible to six or more classes (compared to 37.5 % of serotype 2 isolates). AMR genes were found on integrative and conjugative elements previously observed in other species, suggesting a mobile gene pool that can be accessed by invasive disease isolates. This article contains data hosted by Microreact.}, }
@article {pmid36638170, year = {2023}, author = {Cai, H and McLimans, CJ and Beyer, JE and Krumholz, LR and Hambright, KD}, title = {Microcystis pangenome reveals cryptic diversity within and across morphospecies.}, journal = {Science advances}, volume = {9}, number = {2}, pages = {eadd3783}, doi = {10.1126/sciadv.add3783}, pmid = {36638170}, issn = {2375-2548}, abstract = {Microcystis, a common harmful algal bloom (HAB) taxon, threatens water supplies and human health, yet species delimitation is contentious in this taxon, leading to challenges in research and management of this threat. Historical and common morphology-based classifications recognize multiple morphospecies, most with variable and diverse ecologies, while DNA sequence-based classifications indicate a single species with multiple ecotypes. To better delimit Microcystis species, we conducted a pangenome analysis of 122 genomes. Core- and non-core gene phylogenetic analyses placed 113 genomes into 23 monophyletic clusters containing at least two genomes. Overall, genome-related indices revealed that Microcystis contains at least 16 putative genospecies. Fifteen genospecies included at least one Microcystis aeruginosa morphospecies, and 10 genospecies included two or more morphospecies. This classification system will enable consistent taxonomic identification of Microcystis and thereby aid in resolving some of the complexities and controversies that have long characterized eco-evolutionary research and management of this important HAB taxon.}, }
@article {pmid36630500, year = {2023}, author = {Konno, N and Iwasaki, W}, title = {Machine learning enables prediction of metabolic system evolution in bacteria.}, journal = {Science advances}, volume = {9}, number = {2}, pages = {eadc9130}, doi = {10.1126/sciadv.adc9130}, pmid = {36630500}, issn = {2375-2548}, abstract = {Evolution prediction is a long-standing goal in evolutionary biology, with potential impacts on strategic pathogen control, genome engineering, and synthetic biology. While laboratory evolution studies have shown the predictability of short-term and sequence-level evolution, that of long-term and system-level evolution has not been systematically examined. Here, we show that the gene content evolution of metabolic systems is generally predictable by applying ancestral gene content reconstruction and machine learning techniques to ~3000 bacterial genomes. Our framework, Evodictor, successfully predicted gene gain and loss evolution at the branches of the reference phylogenetic tree, suggesting that evolutionary pressures and constraints on metabolic systems are universally shared. Investigation of pathway architectures and meta-analysis of metagenomic datasets confirmed that these evolutionary patterns have physiological and ecological bases as functional dependencies among metabolic reactions and bacterial habitat changes. Last, pan-genomic analysis of intraspecies gene content variations proved that even "ongoing" evolution in extant bacterial species is predictable in our framework.}, }
@article {pmid36627554, year = {2023}, author = {Forgacova, N and Holesova, Z and Hekel, R and Sedlackova, T and Pos, Z and Krivosikova, L and Janega, P and Kuracinova, KM and Babal, P and Radvak, P and Radvanszky, J and Gazdarica, J and Budis, J and Szemes, T}, title = {Evaluation and limitations of different approaches among COVID-19 fatal cases using whole-exome sequencing data.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {12}, pmid = {36627554}, issn = {1471-2164}, abstract = {BACKGROUND: COVID-19 caused by the SARS-CoV-2 infection may result in various disease symptoms and severity, ranging from asymptomatic, through mildly symptomatic, up to very severe and even fatal cases. Although environmental, clinical, and social factors play important roles in both susceptibility to the SARS-CoV-2 infection and progress of COVID-19 disease, it is becoming evident that both pathogen and host genetic factors are important too. In this study, we report findings from whole-exome sequencing (WES) of 27 individuals who died due to COVID-19, especially focusing on frequencies of DNA variants in genes previously associated with the SARS-CoV-2 infection and the severity of COVID-19.
RESULTS: We selected the risk DNA variants/alleles or target genes using four different approaches: 1) aggregated GWAS results from the GWAS Catalog; 2) selected publications from PubMed; 3) the aggregated results of the Host Genetics Initiative database; and 4) a commercial DNA variant annotation/interpretation tool providing its own knowledgebase. We divided these variants/genes into those reported to influence the susceptibility to the SARS-CoV-2 infection and those influencing the severity of COVID-19. Based on the above, we compared the frequencies of alleles found in the fatal COVID-19 cases to the frequencies identified in two population control datasets (non-Finnish European population from the gnomAD database and genomic frequencies specific for the Slovak population from our own database). When compared to both control population datasets, our analyses indicated a trend of higher frequencies of severe COVID-19 associated risk alleles among fatal COVID-19 cases. This trend reached statistical significance specifically when using the HGI-derived variant list. We also analysed other approaches to WES data evaluation, demonstrating its utility as well as limitations.
CONCLUSIONS: Although our results proved the likely involvement of host genetic factors pointed out by previous studies looking into severity of COVID-19 disease, careful considerations of the molecular-testing strategies and the evaluated genomic positions may have a strong impact on the utility of genomic testing.}, }
@article {pmid36627170, year = {2023}, author = {Nii, T and Maeda, Y and Motooka, D and Naito, M and Matsumoto, Y and Ogawa, T and Oguro-Igashira, E and Kishikawa, T and Yamashita, M and Koizumi, S and Kurakawa, T and Okumura, R and Kayama, H and Murakami, M and Sakaguchi, T and Das, B and Nakamura, S and Okada, Y and Kumanogoh, A and Takeda, K}, title = {Genomic repertoires linked with pathogenic potency of arthritogenic Prevotella copri isolated from the gut of patients with rheumatoid arthritis.}, journal = {Annals of the rheumatic diseases}, volume = {}, number = {}, pages = {}, doi = {10.1136/ard-2022-222881}, pmid = {36627170}, issn = {1468-2060}, abstract = {OBJECTIVES: Prevotella copri is considered to be a contributing factor in rheumatoid arthritis (RA). However, in some non-Westernised countries, healthy individuals also harbour an abundance of P. copri in the intestine. This study investigated the pathogenicity of RA patient-derived P. copri (P. copri RA) compared with healthy control-derived P. copri (P. copri HC).
METHODS: We obtained 13 P. copri strains from the faeces of patients with RA and healthy controls. Following whole genome sequencing, the sequences of P. copri RA and P. copri HC were compared. To analyse the arthritis-inducing ability of P. copri, we examined two arthritis models (1) a collagen-induced arthritis model harbouring P. copri under specific-pathogen-free conditions and (2) an SKG mouse arthritis model under P. copri-monocolonised conditions. Finally, to evaluate the ability of P. copri to activate innate immune cells, we performed in vitro stimulation of bone marrow-derived dendritic cells (BMDCs) by P. copri RA and P. copri HC.
RESULTS: Comparative genomic analysis revealed no apparent differences in the core gene contents between P. copri RA and P. copri HC, but pangenome analysis revealed the high genome plasticity of P. copri. We identified a P. copri RA-specific genomic region as a conjugative transposon. In both arthritis models, P. copri RA-induced more severe arthritis than P. copri HC. In vitro BMDC stimulation experiments revealed the upregulation of IL-17 and Th17-related cytokines (IL-6, IL-23) by P. copri RA.
CONCLUSION: Our findings reveal the genetic diversity of P. copri, and the genomic signatures associated with strong arthritis-inducing ability of P. copri RA. Our study contributes towards elucidation of the complex pathogenesis of RA.}, }
@article {pmid36623869, year = {2022}, author = {Ruggieri, AA and Livraghi, L and Lewis, JJ and Evans, E and Cicconardi, F and Hebberecht, L and Ortiz-Ruiz, Y and Montgomery, SH and Ghezzi, A and Rodriguez-Martinez, JA and Jiggins, CD and McMillan, WO and Counterman, BA and Papa, R and Van Belleghem, SM}, title = {Erratum: A butterfly pan-genome reveals that a large amount of structural variation underlies the evolution of chromatin accessibility.}, journal = {Genome research}, volume = {32}, number = {11-12}, pages = {2145}, doi = {10.1101/gr.277534.122}, pmid = {36623869}, issn = {1549-5469}, }
@article {pmid36622155, year = {2023}, author = {Saak, CC and Pierce, EC and Dinh, CB and Portik, D and Hall, R and Ashby, M and Dutton, RJ}, title = {Longitudinal, Multi-Platform Metagenomics Yields a High-Quality Genomic Catalog and Guides an In Vitro Model for Cheese Communities.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0070122}, doi = {10.1128/msystems.00701-22}, pmid = {36622155}, issn = {2379-5077}, abstract = {Microbiomes are intricately intertwined with human health, geochemical cycles, and food production. While many microbiomes of interest are highly complex and experimentally intractable, cheese rind microbiomes have proven to be powerful model systems for the study of microbial interactions. To provide a more comprehensive view of the genomic potential and temporal dynamics of cheese rind communities, we combined longitudinal, multi-platform metagenomics of three ripening washed-rind cheeses with whole-genome sequencing of community isolates. Sequencing-based approaches revealed a highly reproducible microbial succession in each cheese and the coexistence of closely related Psychrobacter species and enabled the prediction of plasmid and phage diversity and their host associations. In combination with culture-based approaches, we established a genomic catalog and a paired 16-member in vitro washed-rind cheese system. The combination of multi-platform metagenomic time-series data and an in vitro model provides a rich resource for further investigation of cheese rind microbiomes both computationally and experimentally. IMPORTANCE Metagenome sequencing can provide great insights into microbiome composition and function and help researchers develop testable hypotheses. Model microbiomes, such as those composed of cheese rind bacteria and fungi, allow the testing of these hypotheses in a controlled manner. Here, we first generated an extensive longitudinal metagenomic data set. This data set reveals successional dynamics, yields a phyla-spanning bacterial genomic catalog, associates mobile genetic elements with their hosts, and provides insights into functional enrichment of Psychrobacter in the cheese environment. Next, we show that members of the washed-rind cheese microbiome lend themselves to in vitro community reconstruction. This paired metagenomic data and in vitro system can thus be used as a platform for generating and testing hypotheses related to the dynamics within, and the functions associated with, cheese rind microbiomes.}, }
@article {pmid36621865, year = {2023}, author = {Zhang, Z and Li, K and Zhang, H and Wang, Q and Zhao, L and Liu, J and Chen, H}, title = {A single silk and multiple pollen-expressed PMEs at the Ga1 locus modulate maize unilateral cross-incompatibility.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13445}, pmid = {36621865}, issn = {1744-7909}, abstract = {The Gametophyte factor1 (Ga1) locus in maize confers unilateral cross-incompatibility (UCI), and it is controlled by both pollen and silk-specific determinants. Although the Ga1 locus has been reported for more than a century and is widely utilized in maize breeding programs, only the pollen-specific ZmGa1P has been shown to function as a male determinant; thus, the genomic structure of the Ga1 locus and all the determinants that control UCI at this locus have not yet been fully characterized. Here, we used map-based cloning to confirm the determinants of UCI at the Ga1 locus and maize pan-genome sequence data to characterize the genomic structure of the Ga1 locus. The Ga1 locus comprises one silk-expressed PME (ZmGa1F) and eight pollen-expressed PMEs (ZmGa1P and ZmGa1PL1-7). Knockout of ZmGa1F in Ga1/Ga1 lines leads to the complete loss of the female barrier function. The expression of individual ZmGa1PL genes in a ga1/ga1 background endows ga1 pollen with the ability to overcome the female barrier of the Ga1 locus. These findings, combined with genomic data and genetic analyses, indicate that the Ga1 locus is modulated by a single female determinant and multiple male determinants, which are tightly linked. The results of this study provide valuable insights into the genomic structure of the Ga2 and Tcb1 loci and will aid applications of these loci in maize breeding programs. This article is protected by copyright. All rights reserved.}, }
@article {pmid36619820, year = {2023}, author = {Khushboo, and Singhvi, N and Gupta, V and Dhaka, N and Dubey, KK}, title = {Draft genome sequence of Streptomyces sp. KD18, isolated from industrial soil.}, journal = {3 Biotech}, volume = {13}, number = {1}, pages = {34}, pmid = {36619820}, issn = {2190-572X}, abstract = {UNLABELLED: The present study scrutinizes the presence of Streptomyces strains in the soil sample collected from industrial area of Bahadurgarh (Haryana) India. The morphological approach manifested the isolated strain belong to Streptomyces species and named as Streptomyces sp. KD18. Sequencing of Streptomyces sp. KD18 genome was performed by Illumina Nextseq500 platform. 65 contigs were generated via SPAdes v3.11.1 and harboured genome size of 7.2 Mb. AntiSMASH server revealed the presence of 25 biosynthetic gene clusters in KD18 genome where BGC of lipstatin was of more interest from industrial and pharmaceutical purpose. The draft genome sequence represented via ANI values claimed that the KD18 strain belongs to Streptomyces toxytricini and finally named as S. toxytricini KD18. The LC-MS analysis of the extracted metabolite confirmed the production of lipstatin. The genome sequence data have been deposited to NCBI under the accession number of GCA_014748315.1.
SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-022-03453-3.}, }
@article {pmid36618639, year = {2022}, author = {Parakkunnel, R and Naik K, B and Vanishree, G and C, S and Purru, S and Bhaskar K, U and Bhat, KV and Kumar, S}, title = {Gene fusions, micro-exons and splice variants define stress signaling by AP2/ERF and WRKY transcription factors in the sesame pan-genome.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1076229}, pmid = {36618639}, issn = {1664-462X}, abstract = {Evolutionary dynamics of AP2/ERF and WRKY genes, the major components of defense response were studied extensively in the sesame pan-genome. Massive variation was observed for gene copy numbers, genome location, domain structure, exon-intron structure and protein parameters. In the pan-genome, 63% of AP2/ERF members were devoid of introns whereas >99% of WRKY genes contained multiple introns. AP2 subfamily was found to be micro-exon rich with the adjoining intronic sequences sharing sequence similarity to many stress-responsive and fatty acid metabolism genes. WRKY family included extensive multi-domain gene fusions where the additional domains significantly enhanced gene and exonic sizes as well as gene copy numbers. The fusion genes were found to have roles in acquired immunity, stress response, cell and membrane integrity as well as ROS signaling. The individual genomes shared extensive synteny and collinearity although ecological adaptation was evident among the Chinese and Indian accessions. Significant positive selection effects were noticed for both micro-exon and multi-domain genes. Splice variants with changes in acceptor, donor and branch sites were common and 6-7 splice variants were detected per gene. The study ascertained vital roles of lipid metabolism and chlorophyll biosynthesis in the defense response and stress signaling pathways. 60% of the studied genes localized in the nucleus while 20% preferred chloroplast. Unique cis-element distribution was noticed in the upstream promoter region with MYB and STRE in WRKY genes while MYC was present in the AP2/ERF genes. Intron-less genes exhibited great diversity in the promoter sequences wherein the predominance of dosage effect indicated variable gene expression levels. Mimicking the NBS-LRR genes, a chloroplast localized WRKY gene, Swetha_24868, with additional domains of chorismate mutase, cAMP and voltage-dependent potassium channel was found to act as a master regulator of defense signaling, triggering immunity and reducing ROS levels.}, }
@article {pmid36614303, year = {2023}, author = {Schanknecht, E and Bachari, A and Nassar, N and Piva, T and Mantri, N}, title = {Phytochemical Constituents and Derivatives of Cannabis sativa; Bridging the Gap in Melanoma Treatment.}, journal = {International journal of molecular sciences}, volume = {24}, number = {1}, pages = {}, doi = {10.3390/ijms24010859}, pmid = {36614303}, issn = {1422-0067}, abstract = {Melanoma is deadly, physically impairing, and has ongoing treatment deficiencies. Current treatment regimens include surgery, targeted kinase inhibitors, immunotherapy, and combined approaches. Each of these treatments face pitfalls, with diminutive five-year survival in patients with advanced metastatic invasion of lymph and secondary organ tissues. Polyphenolic compounds, including cannabinoids, terpenoids, and flavonoids; both natural and synthetic, have emerging evidence of nutraceutical, cosmetic and pharmacological potential, including specific anti-cancer, anti-inflammatory, and palliative utility. Cannabis sativa is a wellspring of medicinal compounds whose direct and adjunctive application may offer considerable relief for melanoma suffers worldwide. This review aims to address the diverse applications of C. sativa's biocompounds in the scope of melanoma and suggest it as a strong candidate for ongoing pharmacological evaluation.}, }
@article {pmid36608657, year = {2023}, author = {Hackl, T and Laurenceau, R and Ankenbrand, MJ and Bliem, C and Cariani, Z and Thomas, E and Dooley, KD and Arellano, AA and Hogle, SL and Berube, P and Leventhal, GE and Luo, E and Eppley, JM and Zayed, AA and Beaulaurier, J and Stepanauskas, R and Sullivan, MB and DeLong, EF and Biller, SJ and Chisholm, SW}, title = {Novel integrative elements and genomic plasticity in ocean ecosystems.}, journal = {Cell}, volume = {186}, number = {1}, pages = {47-62.e16}, doi = {10.1016/j.cell.2022.12.006}, pmid = {36608657}, issn = {1097-4172}, abstract = {Horizontal gene transfer accelerates microbial evolution. The marine picocyanobacterium Prochlorococcus exhibits high genomic plasticity, yet the underlying mechanisms are elusive. Here, we report a novel family of DNA transposons-"tycheposons"-some of which are viral satellites while others carry cargo, such as nutrient-acquisition genes, which shape the genetic variability in this globally abundant genus. Tycheposons share distinctive mobile-lifecycle-linked hallmark genes, including a deep-branching site-specific tyrosine recombinase. Their excision and integration at tRNA genes appear to drive the remodeling of genomic islands-key reservoirs for flexible genes in bacteria. In a selection experiment, tycheposons harboring a nitrate assimilation cassette were dynamically gained and lost, thereby promoting chromosomal rearrangements and host adaptation. Vesicles and phage particles harvested from seawater are enriched in tycheposons, providing a means for their dispersal in the wild. Similar elements are found in microbes co-occurring with Prochlorococcus, suggesting a common mechanism for microbial diversification in the vast oligotrophic oceans.}, }
@article {pmid36607068, year = {2023}, author = {Wong, ED and Miyasato, SR and Aleksander, S and Karra, K and Nash, RS and Skrzypek, MS and Weng, S and Engel, SR and Cherry, JM}, title = {Saccharomyces Genome Database Update: Server Architecture, Pan-Genome Nomenclature, and External Resources.}, journal = {Genetics}, volume = {}, number = {}, pages = {}, doi = {10.1093/genetics/iyac191}, pmid = {36607068}, issn = {1943-2631}, abstract = {As one of the first model organism knowledgebases, Saccharomyces Genome Database (SGD) has been supporting the scientific research community since 1993. As technologies and research evolve, so does SGD: from updates in software architecture, to curation of novel data types, to incorporation of data from, and collaboration with, other knowledgebases. We are continuing to make steps toward providing the community with an S. cerevisiae pan-genome. Here we describe software upgrades, a new nomenclature system for genes not found in the reference strain, and additions to gene pages. With these improvements, we aim to remain a leading resource for students, researchers, and the broader scientific community.}, }
@article {pmid36605514, year = {2022}, author = {Dong, C and Wei, L and Wang, J and Lai, Q and Huang, Z and Shao, Z}, title = {Genome-based taxonomic rearrangement of Oceanobacter-related bacteria including the description of Thalassolituus hydrocarbonoclasticus sp. nov. and Thalassolituus pacificus sp. nov. and emended description of the genus Thalassolituus.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1051202}, pmid = {36605514}, issn = {1664-302X}, abstract = {Oceanobacter-related bacteria (ORB) are a group of oligotrophic marine bacteria play an underappreciated role in carbon cycling. They have been frequently described as one of the dominant bacterial groups with a wide distribution in coastal and deep seawater of global oceans. To clarify their taxonomic affiliation in relation to alkane utilization, phylogenomic and comparative genomics analyses were performed based on currently available genomes from GenBank and four newly isolated strains, in addition to phenotypic and chemotaxonomic characteristics. Consistently, phylogenomic analysis robustly separated them into two groups, which are accordingly hydrocarbon-degrading (HD, Thalassolituus and Oleibacter) and non-HD (NHD, Oceanobacter). In addition, the two groups can also be readily distinguished by several polyphasic taxonomic characteristics. Furthermore, both AAI and POCP genomic indices within the HD group support the conclusion that the members of the genus Oleibacter should be transferred into the genus Thalassolituus. Moreover, HD and NHD bacteria differed significantly in terms of genome size, G + C content and genes involved in alkane utilization. All HD bacteria contain the key gene alkB encoding an alkane monooxygenase, which can be used as a marker gene to distinguish the members of closely related genera Oceanobacter and Thalassolituus. Pangenome analysis revealed that the larger accessory genome may endow Thalassolituus with the flexibility to cope with the dynamics of marine environments and thrive therein, although they possess smaller pan, core- and unique-genomes than Oceanobacter. Within the HD group, twelve species were clearly distinguished from each other by both dDDH and ANI genomic indices, including two novel species represented by the newly isolated strains alknpb1M-1 [T] and 59MF3M-4 [T] , for which the names Thalassolituus hydrocarbonoclasticus sp. nov. and Thalassolituus pacificus sp. nov. are proposed. Collectively, these findings build a phylogenetic framework for the ORB and contribute to understanding of their role in marine carbon cycling.}, }
@article {pmid36605106, year = {2022}, author = {Ali, A and Khatoon, A and Mirza, T and Ahmad, F}, title = {Intensification in Genetic Information and Acquisition of Resistant Genes in Genome of Acinetobacter baumannii: A Pan-Genomic Analysis.}, journal = {BioMed research international}, volume = {2022}, number = {}, pages = {3186343}, pmid = {36605106}, issn = {2314-6141}, abstract = {Acinetobacter baumannii (A. baumannii) attributes 26% of the mortality rate in hospitalized patients, and the percentage can rise to 46 in patients admitted to ICU as it is a major cause of ventilator-associated pneumonia. It has been nominated as the critical priority organism by WHO for which new therapeutic drugs are urgently required. To understand the genomic identification of different strains, antimicrobial resistance patterns, and epidemiological typing of organisms, whole-genome sequencing (WGS) analysis provides insight to explore new epitopes to develop new drugs against the organism. Therefore, the study is aimed at investigating the whole genome sequence of A. baumannii strains to report the new intensifications in its genomic profile. The genome sequences were retrieved from the NCBI database system. Pan-genome BPGA (Bacterial Pan-genome Analysis Tool) was used to analyze the core, pan, and species-specific genome analysis. The pan and core genome curves were extrapolated using the empirical power law equation f(x) = a.xb and the exponential equation f1(x) = c.e (d.x). To identify the resistant genes with resistant mutations against antibiotics, ResFinder and Galaxy Community hub bioinformatics tools were used. According to pan-genome analysis, there were 2227 core genes present in each species of the A. baumannii genome. Furthermore, the number of accessory genes ranged from 1182 to 1460, and the unique genes in the genome were 931. There were 325 exclusively absent genes in the genome of Acinetobacter baumannii. The pan-genome analysis showed that there is a 5-fold increase in the genome of A. baumannii in 5 years, and the genome is still open. There is the addition of multiple unique genes; among them, genes participating in the function of information and processing are increased.}, }
@article {pmid36598708, year = {2023}, author = {Karthik, K and Anbazhagan, S and Chitra, MA and Sridhar, R}, title = {Comparative phylogenomics of Trueperella pyogenes reveals host-based distinction of strains.}, journal = {Antonie van Leeuwenhoek}, volume = {}, number = {}, pages = {}, pmid = {36598708}, issn = {1572-9699}, abstract = {Trueperella pyogenes, an opportunistic pathogen causes various ailments in different animals. Different strains from different animals have distinct characters phenotypically and genotypically. Hence understanding the strains in a particular geographical location helps in framing the preventive measures. Comparative genomics of all the available T. pyogenes genome in the NCBI was conducted to understand the relatedness among strains. Whole genome phylogeny showed host associated clustering of strains recovered from swine lungs. Core genome phylogeny also showed host associated clustering mimicking whole genome phylogeny results. MLST analysis showed that there was higher diversity among cattle strains. Multidimensional scaling revealed five swine clusters, two cattle and buffalo clusters. Pangenome analysis also showed that T. pyogenes had an open genome with 57.09% accessory genome. Host specific genes were identified by pangenome analysis, and (R)-citramalate synthase was specific for swine strains of Asian origin. Host specifc genes identified by pangenome analysis can be exploited for developing a molecular assay to specifically identify the strains. The study shows that MLST having higher discriminatory power can be used as an epidemiological tool for strain discrimination of T. pyogenes.}, }
@article {pmid36598279, year = {2023}, author = {Xu, C and Rao, J and Xie, Y and Lu, J and Li, Z and Dong, C and Wang, L and Jiang, J and Chen, C and Chen, S}, title = {The DNA Phosphorothioation Restriction-Modification System Influences the Antimicrobial Resistance of Pathogenic Bacteria.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0350922}, doi = {10.1128/spectrum.03509-22}, pmid = {36598279}, issn = {2165-0497}, abstract = {Bacterial defense barriers, such as DNA methylation-associated restriction-modification (R-M) and the CRISPR-Cas system, play an important role in bacterial antimicrobial resistance (AMR). Recently, a novel R-M system based on DNA phosphorothioate (PT) modification has been shown to be widespread in the kingdom of Bacteria as well as Archaea. However, the potential role of the PT R-M system in bacterial AMR remains unclear. In this study, we explored the role of PT R-Ms in AMR with a series of common clinical pathogenic bacteria. By analyzing the distribution of AMR genes related to mobile genetic elements (MGEs), it was shown that the presence of PT R-M effectively reduced the distribution of horizontal gene transfer (HGT)-derived AMR genes in the genome, even in the bacteria that did not tend to acquire AMR genes by HGT. In addition, unique gene variation analysis based on pangenome analysis and MGE prediction revealed that the presence of PT R-M could suppress HGT frequency. Thus, this is the first report showing that the PT R-M system has the potential to repress HGT-derived AMR gene acquisition by reducing the HGT frequency. IMPORTANCE In this study, we demonstrated the effect of DNA PT modification-based R-M systems on horizontal gene transfer of AMR genes in pathogenic bacteria. We show that there is no apparent association between the genetic background of the strains harboring PT R-Ms and the number of AMR genes or the kinds of gene families. The strains equipped with PT R-M harbor fewer plasmid-derived, prophage-derived, or integrating mobile genetic element (iMGE)-related AMR genes and have a lower HGT frequency, but the degree of inhibition varies among different bacteria. In addition, compared with Salmonella enterica and Escherichia coli, Klebsiella pneumoniae prefers to acquire MGE-derived AMR genes, and there is no coevolution between PT R-M clusters and bacterial core genes.}, }
@article {pmid36589110, year = {2022}, author = {Liang, L and Zhang, J and Xiao, J and Li, X and Xie, Y and Tan, H and Song, X and Zhu, L and Xue, X and Xu, L and Zhou, P and Ran, J and Sun, B and Huang, Z and Tang, Y and Lin, L and Sun, G and Lai, Y and Li, H}, title = {Genome and pan-genome assembly of asparagus bean (Vigna unguiculata ssp. sesquipedialis) reveal the genetic basis of cold adaptation.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1059804}, pmid = {36589110}, issn = {1664-462X}, abstract = {Asparagus bean (Vigna unguiculata ssp. sesquipedialis) is an important cowpea subspecies. We assembled the genomes of Ningjiang 3 (NJ, 550.31 Mb) and Dubai bean (DB, 564.12 Mb) for comparative genomics analysis. The whole-genome duplication events of DB and NJ occurred at 64.55 and 64.81 Mya, respectively, while the divergence between soybean and Vigna occurred in the Paleogene period. NJ genes underwent positive selection and amplification in response to temperature and abiotic stress. In species-specific gene families, NJ is mainly enriched in response to abiotic stress, while DB is primarily enriched in respiration and photosynthesis. We established the pan-genomes of four accessions (NJ, DB, IT97K-499-35 and Xiabao II) and identified 20,336 (70.5%) core genes present in all the accessions, 6,507 (55.56%) variable genes in two individuals, and 2,004 (6.95%) unique genes. The final pan genome is 616.35 Mb, and the core genome is 399.78 Mb. The variable genes are manifested mainly in stress response functions, ABC transporters, seed storage, and dormancy control. In the pan-genome sequence variation analysis, genes affected by presence/absence variants were enriched in biological processes associated with defense responses, immune system processes, signal transduction, and agronomic traits. The results of the present study provide genetic data that could facilitate efficient asparagus bean genetic improvement, especially in producing cold-adapted asparagus bean.}, }
@article {pmid36586056, year = {2022}, author = {Tanwar, UK and Stolarska, E and Rudy, E and Paluch-Lubawa, E and Grabsztunowicz, M and Arasimowicz-Jelonek, M and Sobieszczuk-Nowicka, E}, title = {Metal tolerance gene family in barley: an in silico comprehensive analysis.}, journal = {Journal of applied genetics}, volume = {}, number = {}, pages = {}, pmid = {36586056}, issn = {2190-3883}, abstract = {Metal-tolerance proteins (MTPs) are divalent cation transporters that play critical roles in metal tolerance and ion homeostasis in plants. However, a comprehensive study of MTPs is still lacking in crop plants. The current study aimed to comprehensively identify and characterize the MTP gene family in barley (Hordeum vulgare, Hv), an important crop. In total, 12 HvMTPs were identified in the barley genome in this study. They were divided into three phylogenetic groups (Zn-cation diffusion facilitator proteins [CDFs], Fe/Zn-CDFs, and Mn-CDFs) and further subdivided into seven groups (G1, G5, G6, G7, G8, G9, and G12). The majority of MTPs were hydrophobic proteins found in the vacuolar membrane. Gene duplication analysis of HvMTPs revealed one pair of segmental-like duplications in the barley genome. Evolutionary analysis suggested that barley MTPs underwent purifying natural selection. Additionally, the HvMTPs were analyzed in the pan-genome sequences of barley (20 accessions), which suggests that HvMTPs are highly conserved in barley evolution. Cis-acting regulatory elements, microRNA target sites, and protein-protein interaction analysis indicated the role of HvMTPs in a variety of biological processes. Expression profiling suggests that HvMTPs play an active role in maintaining barley nutrient homeostasis throughout its life cycle, and their expression levels were not significantly altered by abiotic stresses like cold, drought, or heat. The expression of barley HvMTP genes in the presence of heavy metals such as Zn[2+], Cu[2+], As[3+], and Cd[2+] revealed that these MTPs were induced by at least one metal ion, implying their involvement in metal tolerance or transportation. The identification and comprehensive investigation of MTP gene family members will provide important gene resources for the genetic improvement of crops for metal tolerance, bioremediation, or biofortification of staple crops.}, }
@article {pmid36585993, year = {2022}, author = {Bordel, S and Martín-González, D and Muñoz, R and Santos-Beneit, F}, title = {Genome sequence analysis and characterization of Bacillus altitudinis B12, a polylactic acid- and keratin-degrading bacterium.}, journal = {Molecular genetics and genomics : MGG}, volume = {}, number = {}, pages = {}, pmid = {36585993}, issn = {1617-4623}, abstract = {Keratin-rich wastes, mainly in the form of feathers, are recalcitrant residues generated in high amounts as by-products in chicken farms and food industry. Polylactic acid (PLA) is the second most common biodegradable polymer found in commercial plastics, which is not easily degraded by microbial activity. This work reports the 3.8-Mb genome of Bacillus altitudinis B12, a highly efficient PLA- and keratin-degrading bacterium, with potential for environmental friendly biotechnological applications in the feed, fertilizer, detergent, leather, and pharmaceutical industries. The whole genome sequence of B. altitudinis B12 revealed that this strain (which had been previously misclassified as Bacillus pumilus B12) is closely related to the B. altitudinis strains ER5, W3, and GR-8. A total of 4056 coding sequences were annotated using the RAST server, of which 2484 are core genes of the pan genome of B. altitudinis and 171 are unique to this strain. According to the sequence analysis, B. pumilus B12 has a predicted secretome of 353 proteins, among which a keratinase and a PLA depolymerase were identified by sequence analysis. The presence of these two enzymes could explain the characterized PLA and keratin biodegradation capability of the strain.}, }
@article {pmid36579850, year = {2022}, author = {Javkar, K and Rand, H and Strain, E and Pop, M}, title = {PRAWNS: Compact pan-genomic features for whole-genome population genomics.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btac844}, pmid = {36579850}, issn = {1367-4811}, abstract = {MOTIVATION: Scientists seeking to understand the genomic basis of bacterial phenotypes, such as antibiotic resistance, today have access to an unprecedented number of complete and nearly-complete genomes. Making sense of these data requires computational tools able to perform multiple-genome comparisons efficiently, yet currently available tools cannot scale beyond several tens of genomes.
RESULTS: We describe PRAWNS, an efficient and scalable tool for multiple-genome analysis. PRAWNS defines a concise set of genomic features (metablocks), as well as pairwise relationships between them, which can be used as a basis for large-scale genotype-phenotype association studies. We demonstrate the effectiveness of PRAWNS by identifying genomic regions associated with antibiotic resistance in Acinetobacter baumannii.
AVAILABILITY: PRAWNS is implemented in C ++ and Python3, licensed under the GPLv3 license, and freely downloadable from GitHub (https://github.com/KiranJavkar/PRAWNS.git).
SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, }
@article {pmid36577205, year = {2022}, author = {Kadiri, M and Sevugapperumal, N and Nallusamy, S and Ragunathan, J and Ganesan, MV and Alfarraj, S and Ansari, MJ and Sayyed, RZ and Lim, HR and Show, PL}, title = {Pan-genome analysis and molecular docking unveil the biocontrol potential of Bacillus velezensis VB7 against Phytophthora infestans.}, journal = {Microbiological research}, volume = {268}, number = {}, pages = {127277}, doi = {10.1016/j.micres.2022.127277}, pmid = {36577205}, issn = {1618-0623}, abstract = {Management of late blight of potato incited by Phytophthora infestans remains a major challenge. Coevolution of pathogen with resistant strains and the rise of fungicide resistance have made it more challenging to prevent the spread of P. infestans. Here, the anti-oomycete potential of Bacillus velezensis VB7 against P. infestans through pan-genome analysis and molecular docking were explored. The Biocontrol potential of VB7 against P. infestans was assessed using a confrontational assay. The biomolecules from the inhibition zone were identified and subjected to in silico analysis against P. infestans target proteins. Nucleotide sequences for 54 B. velezensis strains from different geographical locations were used for pan-genome analysis. The confrontational assay revealed the anti-oomycetes potential of VB7 against P. infestans. Molecular docking confirmed that the penicillamine disulfide had the maximum binding energy with eight effector proteins of P. infestans. Besides, scanning electron microscopic observations of P. infestans interaction with VB7 revealed structural changes in hypha and sporangia. Pan-genome analysis between 54 strains of B. velezensis confirmed that the core genome had 2226 genes, and it has an open pan-genome. The present study confirmed the anti-oomycete potential of B. velezensis VB7 against P. infestans and paved the way to explore the genetic potential of VB7.}, }
@article {pmid36575347, year = {2022}, author = {Srivastava, S and Bombaywala, S and Jakhesara, SJ and Patil, NV and Joshi, CG and Purohit, HJ and Dafale, NA}, title = {Potential of camel rumen derived Bacillus subtilis and Bacillus velezensis strains for application in plant biomass hydrolysis.}, journal = {Molecular genetics and genomics : MGG}, volume = {}, number = {}, pages = {}, pmid = {36575347}, issn = {1617-4623}, abstract = {Rumen inhabiting Bacillus species possesses a high genetic potential for plant biomass hydrolysis and conversion to value-added products. In view of the same, five camel rumen-derived Bacillus strains, namely B. subtilis CRN 1, B. velezensis CRN 2, B. subtilis CRN 7, B. subtilis CRN 11, and B. velezensis CRN 23 were initially assayed for diverse hydrolytic activities, followed by genome mining to unravel the potential applications. CRN 1 and CRN 7 showed the highest endoglucanase activity with 0.4 U/ml, while CRN 23 showed high β-xylosidase activity of 0.36 U/ml. The comprehensive genomic insights of strains resolve taxonomic identity, clusters of an orthologous gene, pan-genome dynamics, and metabolic features. Annotation of Carbohydrate active enzymes (CAZymes) reveals the presence of diverse glycoside hydrolases (GH) GH1, GH5, GH43, and GH30, which are solely responsible for the effective breakdown of complex bonds in plant polysaccharides. Further, protein modeling and ligand docking of annotated endoglucanases showed an affinity for cellotrioside, cellobioside, and β-glucoside. The finding indicates the flexibility of Bacillus-derived endoglucanase activity on diverse cellulosic substrates. The presence of the butyrate synthesis gene in the CRN 1 strain depicts its key role in the production of important short-chain fatty acids essential for healthy rumen development. Similarly, antimicrobial peptides such as bacilysin and non-ribosomal peptides (NRPS) synthesized by the Bacillus strains were also annotated in the genome. The findings clearly define the role of Bacillus sp. inside the camel rumen and its potential application in various plant biomass utilizing industry and animal health research sectors.}, }
@article {pmid36567375, year = {2022}, author = {Filipić, B and Malešević, M and Vasiljević, Z and Novović, K and Kojić, M and Jovčić, B}, title = {Comparative genomics of trimethoprim-sulfamethoxazole-resistant Achromobacter xylosoxidans clinical isolates from Serbia reveals shortened variant of class 1 integron integrase gene.}, journal = {Folia microbiologica}, volume = {}, number = {}, pages = {}, pmid = {36567375}, issn = {1874-9356}, abstract = {Trimethoprim-sulfamethoxazole (SXT) is the preferable treatment option of the infections caused by Achromobacter spp. Our study aimed to analyze the SXT resistance of 98 Achromobacter spp. isolates from pediatric patients, among which 33 isolates were SXT-resistant. The presence of intI1 was screened by PCR and genome sequence analyses. The intI1 gene was detected in 10 of SXT-resistant isolates that had shorter intI1 PCR fragments named intI1S. Structural changes in intI1S were confirmed by genome sequencing and analyses which revealed 86 amino acids deletion in IntI1S protein compared to canonical IntI1 protein. All IntI1S isolates were of non-CF origin. Pan-genome analysis of intI1S bearing A. xylosoxidans isolates comprised 9052 genes, with the core genome consisting of 5455 protein-coding genes. Results in this study indicate that IntI1S isolates were derived from clinical settings and that cystic fibrosis (CF) patients were potential reservoirs for healthcare-associated infections that occurred in non-CF patients.}, }
@article {pmid36566389, year = {2022}, author = {Shirasawa, K and Hosokawa, M and Yasui, Y and Toyoda, A and Isobe, S}, title = {Chromosome-scale genome assembly of a Japanese chili pepper landrace, Capsicum annuum 'Takanotsume'.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {}, number = {}, pages = {}, doi = {10.1093/dnares/dsac052}, pmid = {36566389}, issn = {1756-1663}, abstract = {Here, we report the genome sequence of a popular Japanese chili pepper landrace, Capsicum annuum 'Takanotsume'. We used long-read sequencing and optical mapping, together with the genetic mapping technique, to obtain the chromosome-scale genome assembly of 'Takanotsume'. The assembly consists of 12 pseudomolecules, which corresponds to the basic chromosome number of C. annuum, and is 3,058.5 Mb in size, spanning 97.0% of the estimated genome size. A total of 34,324 high-confidence genes were predicted in the genome, and 83.4% of the genome assembly was occupied by repetitive sequences. Comparative genomics of linked-read sequencing-derived de novo genome assemblies of two Capsicum chinense lines and whole-genome resequencing analysis of Capsicum species revealed not only nucleotide sequence variations but also genome structure variations (i.e., chromosomal rearrangements and transposon-insertion polymorphisms) between 'Takanotsume' and its relatives. Overall, the genome sequence data generated in this study will accelerate the pan-genomics and breeding of Capsicum, and facilitate the dissection of genetic mechanisms underlying the agronomically important traits of 'Takanotsume'.}, }
@article {pmid36558824, year = {2022}, author = {Xia, F and Cheng, J and Jiang, M and Wang, Z and Wen, Z and Wang, M and Ren, J and Zhuge, X}, title = {Genomics Analysis to Identify Multiple Genetic Determinants That Drive the Global Transmission of the Pandemic ST95 Lineage of Extraintestinal Pathogenic Escherichia coli (ExPEC).}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {12}, pages = {}, doi = {10.3390/pathogens11121489}, pmid = {36558824}, issn = {2076-0817}, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) is a pathogen that causes host extraintestinal diseases. The ST95 E. coli lineage is one of the dominant ExPEC lineages in humans and poultry. In this study, we took advantage of extensive E. coli genomes available through public open-access databases to construct a detailed understanding of the phylogeny and evolution of ST95. We used a high variability of accessory genomes to highlight the diversity and dynamic traits of ST95. Isolates from diverse hosts and geographic sources were randomly located on the phylogenetic tree, which suggested that there is no host specificity for ST95. The time-scaled phylogeny showed that ST95 is an ancient and long-lasting lineage. The virulence genes, resistance genes, and pathogenicity islands (PAIs) were characterized in ST95 pan-genomes to provide novel insights into the pathogenicity and multidrug resistance (MDR) genotypes. We found that a pool of large plasmids drives virulence and MDR. Based on the unique genes in the ST95 pan-genome, we designed a novel multiplex PCR reaction to rapidly detect ST95. Overall, our study addressed a gap in the current understanding of ST95 ExPEC genomes, with significant implications for recognizing the success and spread of ST95.}, }
@article {pmid36558765, year = {2022}, author = {Lu, Q and Zhu, X and Long, Q and Yi, X and Yang, A and Long, X and Cao, D}, title = {Comparative Genomics Reveal the Utilization Ability of Variable Carbohydrates as Key Genetic Features of Listeria Pathogens in Their Pathogenic Lifestyles.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {12}, pages = {}, doi = {10.3390/pathogens11121430}, pmid = {36558765}, issn = {2076-0817}, abstract = {BACKGROUND: L. monocytogenes and L. ivanovii, the only two pathogens of Listeria, can survive in various environments, having different pathogenic characteristics. However, the genetic basis of their excellent adaptability and differences in pathogenicity has still not been completely elucidated.
METHODS: We performed a comparative genomic analysis based on 275 L. monocytogenes, 10 L. ivanovii, and 22 non-pathogenic Listeria strains.
RESULTS: Core/pan-genome analysis revealed that 975 gene families were conserved in all the studied strains. Additionally, 204, 242, and 756 gene families existed uniquely in L. monocytogenes, L. ivanovii, and both, respectively. Functional annotation partially verified that these unique gene families were closely related to their adaptability and pathogenicity. Moreover, the protein-protein interaction (PPI) network analysis of these unique gene sets showed that plenty of carbohydrate transport systems and energy metabolism enzymes were clustered in the networks. Interestingly, ethanolamine-metabolic-process-related proteins were significantly enriched in the PPI network of the unique genes of the Listeria pathogens, which can be understood as a determining factor of their pathogenicity.
CONCLUSIONS: The utilization capacity of multiple carbon sources of Listeria pathogens, especially ethanolamine, is the key genetic basis for their ability to adapt to various environments and pathogenic lifestyles.}, }
@article {pmid36557654, year = {2022}, author = {Vázquez-Sánchez, DA and Grillo, S and Carrera-Salinas, A and González-Díaz, A and Cuervo, G and Grau, I and Camoez, M and Martí, S and Berbel, D and Tubau, F and Ardanuy, C and Pujol, M and Càmara, J and Domínguez, MÁ}, title = {Molecular Epidemiology, Antimicrobial Susceptibility, and Clinical Features of Methicillin-Resistant Staphylococcus aureus Bloodstream Infections over 30 Years in Barcelona, Spain (1990-2019).}, journal = {Microorganisms}, volume = {10}, number = {12}, pages = {}, doi = {10.3390/microorganisms10122401}, pmid = {36557654}, issn = {2076-2607}, abstract = {Methicillin-resistant Staphylococcus aureus bloodstream infections (MRSA-BSI) are a significant cause of mortality. We analysed the evolution of the molecular and clinical epidemiology of MRSA-BSI (n = 784) in adult patients (Barcelona, 1990-2019). Isolates were tested for antimicrobial susceptibility and genotyped (PFGE), and a selection was sequenced (WGS) to characterise the pangenome and mechanisms underlying antimicrobial resistance. Increases in patient age (60 to 71 years), comorbidities (Charlson's index > 2, 10% to 94%), community-onset healthcare-associated acquisition (9% to 60%), and 30-day mortality (28% to 36%) were observed during the 1990-1995 and 2014-2019 periods. The proportion of catheter-related BSIs fell from 57% to 20%. Current MRSA-BSIs are caused by CC5-IV and an upward trend of CC8-IV and CC22-IV clones. CC5 and CC8 had the lowest core genome proportions. Antimicrobial resistance rates fell, and only ciprofloxacin, tobramycin, and erythromycin remained high (>50%) due to GyrA/GrlA changes, the presence of aminoglycoside-modifying enzymes (AAC(6')-Ie-APH(2″)-Ia and ANT(4')-Ia), and mph(C)/msr(A) or erm (C) genes. Two CC22-IV strains showed daptomycin resistance (MprF substitutions). MRSA-BSI has become healthcare-associated, affecting elderly patients with comorbidities and causing high mortality rates. Clonal replacement with CC5-IV and CC8-IV clones resulted in lower antimicrobial resistance rates. The increased frequency of the successful CC22-IV, associated with daptomycin resistance, should be monitored.}, }
@article {pmid36553557, year = {2022}, author = {Wang, L and Zhou, F and Zhou, J and Harvey, PR and Yu, H and Zhang, G and Zhang, X}, title = {Genomic Analysis of Pseudomonas asiatica JP233: An Efficient Phosphate-Solubilizing Bacterium.}, journal = {Genes}, volume = {13}, number = {12}, pages = {}, doi = {10.3390/genes13122290}, pmid = {36553557}, issn = {2073-4425}, abstract = {The bacterium Pseudomonas sp. strain JP233 has been reported to efficiently solubilize sparingly soluble inorganic phosphate, promote plant growth and significantly reduce phosphorus (P) leaching loss from soil. The production of 2-keto gluconic acid (2KGA) by strain JP233 was identified as the main active metabolite responsible for phosphate solubilization. However, the genetic basis of phosphate solubilization and plant-growth promotion remained unclear. As a result, the genome of JP233 was sequenced and analyzed in this study. The JP233 genome consists of a circular chromosome with a size of 5,617,746 bp and a GC content of 62.86%. No plasmids were detected in the genome. There were 5097 protein-coding sequences (CDSs) predicted in the genome. Phylogenetic analyses based on genomes of related Pseudomonas spp. identified strain JP233 as Pseudomonas asiatica. Comparative pangenomic analysis among 9 P. asiatica strains identified 4080 core gene clusters and 111 singleton genes present only in JP233. Genes associated with 2KGA production detected in strain JP233, included those encoding glucose dehydrogenase, pyrroloquinoline quinone and gluoconate dehydrogenase. Genes associated with mechanisms of plant-growth promotion and nutrient acquisition detected in JP233 included those involved in IAA biosynthesis, ethylene catabolism and siderophore production. Numerous genes associated with other properties beneficial to plant growth were also detected in JP233, included those involved in production of acetoin, 2,3-butanediol, trehalose, and resistance to heavy metals. This study provides the genetic basis to elucidate the plant-growth promoting and bio-remediation properties of strain JP233 and its potential applications in agriculture and industry.}, }
@article {pmid36551744, year = {2022}, author = {Alturki, NA and Mashraqi, MM and Jalal, K and Khan, K and Basharat, Z and Alzamami, A}, title = {Therapeutic Target Identification and Inhibitor Screening against Riboflavin Synthase of Colorectal Cancer Associated Fusobacterium nucleatum.}, journal = {Cancers}, volume = {14}, number = {24}, pages = {}, doi = {10.3390/cancers14246260}, pmid = {36551744}, issn = {2072-6694}, abstract = {Colorectal cancer (CRC) ranks third among all cancers in terms of prevalence. There is growing evidence that gut microbiota has a role in the development of colorectal cancer. Fusobacterium nucleatum is overrepresented in the gastrointestinal tract and tumor microenvironment of patients with CRC. This suggests the role of F. nucleatum as a potential risk factor in the development of CRC. Hence, we aimed to explore whole genomes of F. nucleatum strains related to CRC to predict potential therapeutic markers through a pan-genome integrated subtractive genomics approach. In the current study, we identified 538 proteins as essential for F. nucleatum survival, 209 non-homologous to a human host, and 12 as drug targets. Eventually, riboflavin synthase (RiS) was selected as a therapeutic target for further processing. Three different inhibitor libraries of lead-like natural products, i.e., cyanobactins (n = 237), streptomycins (n = 607), and marine bacterial secondary metabolites (n = 1226) were screened against it. After the structure-based study, three compounds, i.e., CMNPD3609 (-7.63) > Malyngamide V (-7.03) > ZINC06804365 (-7.01) were prioritized as potential inhibitors of F. nucleatum. Additionally, the stability and flexibility of these compounds bound to RiS were determined via a molecular dynamics simulation of 50 ns. Results revealed the stability of these compounds within the binding pocket, after 5 ns. ADMET profiling showed compounds as drug-like, non-permeable to the blood brain barrier, non-toxic, and HIA permeable. Pan-genomics mediated drug target identification and the virtual screening of inhibitors is the preliminary step towards inhibition of this pathogenic oncobacterium and we suggest mouse model experiments to validate our findings.}, }
@article {pmid36550124, year = {2022}, author = {Vaughn, JN and Branham, SE and Abernathy, B and Hulse-Kemp, AM and Rivers, AR and Levi, A and Wechter, WP}, title = {Graph-based pangenomics maximizes genotyping density and reveals structural impacts on fungal resistance in melon.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {7897}, pmid = {36550124}, issn = {2041-1723}, abstract = {The genomic sequences segregating in experimental populations are often highly divergent from the community reference and from one another. Such divergence is problematic under various short-read-based genotyping strategies. In addition, large structural differences are often invisible despite being strong candidates for causal variation. These issues are exacerbated in specialty crop breeding programs with fewer, lower-quality sequence resources. Here, we examine the benefits of complete genomic information, based on long-read assemblies, in a biparental mapping experiment segregating at numerous disease resistance loci in the non-model crop, melon (Cucumis melo). We find that a graph-based approach, which uses both parental genomes, results in 19% more variants callable across the population and raw allele calls with a 2 to 3-fold error-rate reduction, even relative to single reference approaches using a parent genome. We show that structural variation has played a substantial role in shaping two Fusarium wilt resistance loci with known causal genes. We also report on the genetics of powdery mildew resistance, where copy number variation and local recombination suppression are directly interpretable via parental genome alignments. Benefits observed, even in this low-resolution biparental experiment, will inevitably be amplified in more complex populations.}, }
@article {pmid36547858, year = {2022}, author = {Sreya, P and Suresh, G and Rai, A and Ria, B and Vighnesh, L and Agre, VC and Jagadeeshwari, U and Sasikala, C and Ramana, CV}, title = {Revisiting the taxonomy of the genus Rhodopirellula with the proposal for reclassification of the genus to Rhodopirellula sensu stricto, Aporhodopirellula gen. nov., Allorhodopirellula gen. nov. and Neorhodopirellula gen. nov.}, journal = {Antonie van Leeuwenhoek}, volume = {}, number = {}, pages = {}, pmid = {36547858}, issn = {1572-9699}, abstract = {The current genus Rhodopirellula consists of marine bacteria which belong to the family Pirellulaceae of the phylum Planctomycetota. Members of the genus Rhodopirellula are aerobic, mesophiles and chemoheterotrophs. The here conducted analysis built on 16S rRNA gene sequence and multi-locus sequence analysis based phylogenomic trees suggested that the genus is subdivided into four clades. Existing Rhodopirellula species were studied extensively based on phenotypic, genomic and chemotaxonomic parameters. The heterogeneity was further confirmed by overall genome-related indices (OGRI) including digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), average amino acid identity (AAI), and percentage of conserved proteins (POCP). AAI and POCP values between the clades of the genus Rhodopirellula were 62.2-69.6% and 49.5-62.5%, respectively. Comparative genomic approaches like pan-genome analysis and conserved signature indels (CSIs) also support the division of the clades. The genomic incoherence of the members of the genus is further supported by variations in phenotypic characteristics. Thus, with the here applied integrated comparative genomic and polyphasic approaches, we propose the reclassification of the genus Rhodopirellula to three new genera: Aporhodopirellula gen. nov., Allorhodopirellula gen. nov., and Neorhodopirellula gen. nov.}, }
@article {pmid36547571, year = {2022}, author = {Bao, J and Wang, Z and Chen, M and Chen, S and Chen, X and Xie, J and Tang, W and Zheng, H and Wang, Z}, title = {Pan-Genomics Reveals a New Variation Pattern of Secreted Proteins in Pyricularia oryzae.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {12}, pages = {}, doi = {10.3390/jof8121238}, pmid = {36547571}, issn = {2309-608X}, abstract = {(1) Background: Pyricularia oryzae, the causal agent of rice blast disease, is one of the major rice pathogens. The complex population structure of P. oryzae facilitates the rapid virulence variations, which make the blast disease a serious challenge for global food security. There is a large body of existing genomics research on P. oryzae, however the population structure at the pan-genome level is not clear, and the mechanism of genetic divergence and virulence variations of different sub-populations is also unknown. (2) Methods: Based on the genome data published in the NCBI, we constructed a pan-genome database of P. oryzae, which consisted of 156 strains (117 isolated from rice and 39 isolated from other hosts). (3) Results: The pan-genome contained a total of 24,100 genes (12,005 novel genes absent in the reference genome 70-15), including 16,911 (~70%) core genes (population frequency ≥95%) and 1378 (~5%) strain-specific genes (population frequency ≤5%). Gene presence-absence variation (PAV) based clustering analysis of the population structure of P. oryzae revealed four subgroups (three from rice and one from other hosts). Interestingly, the cloned avirulence genes and conventional secreted proteins (SPs, with signal peptides) were enriched in the high-frequency regions and significantly associated with transposable elements (TEs), while the unconventional SPs (without signal peptides) were enriched in the low-frequency regions and not associated significantly with TEs. This pan-genome will expand the breadth and depth of the rice blast fungus reference genome, and also serve as a new blueprint for scientists to further study the pathogenic mechanism and virulence variation of the rice blast fungus.}, }
@article {pmid36544084, year = {2022}, author = {Morey-León, G and Andrade-Molina, D and Fernández-Cadena, JC and Berná, L}, title = {Comparative genomics of drug-resistant strains of Mycobacterium tuberculosis in Ecuador.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {844}, pmid = {36544084}, issn = {1471-2164}, abstract = {BACKGROUND: Tuberculosis is a serious infectious disease affecting millions of people. In spite of efforts to reduce the disease, increasing antibiotic resistance has contributed to persist in the top 10 causes of death worldwide. In fact, the increased cases of multi (MDR) and extreme drug resistance (XDR) worldwide remains the main challenge for tuberculosis control. Whole genome sequencing is a powerful tool for predicting drug resistance-related variants, studying lineages, tracking transmission, and defining outbreaks. This study presents the identification and characterization of resistant clinical isolates of Mycobacterium tuberculosis including a phylogenetic and molecular resistance profile study by sequencing the complete genome of 24 strains from different provinces of Ecuador.
RESULTS: Genomic sequencing was used to identify the variants causing resistance. A total of 15/21 isolates were identified as MDR, 4/21 as pre-XDR and 2/21 as XDR, with three isolates discarded due to low quality; the main sub-lineage was LAM (61.9%) and Haarlem (19%) but clades X, T and S were identified. Of the six pre-XDR and XDR strains, it is noteworthy that five come from females; four come from the LAM sub-lineage and two correspond to the X-class sub-lineage. A core genome of 3,750 genes, distributed in 295 subsystems, was determined. Among these, 64 proteins related to virulence and implicated in the pathogenicity of M. tuberculosis and 66 possible pharmacological targets stand out. Most variants result in nonsynonymous amino acid changes and the most frequent genotypes were identified as conferring resistance to rifampicin, isoniazid, ethambutol, para-aminosalicylic acid and streptomycin. However, an increase in the resistance to fluoroquinolones was detected.
CONCLUSION: This work shows for the first time the variability of circulating resistant strains between men and women in Ecuador, highlighting the usefulness of genomic sequencing for the identification of emerging resistance. In this regard, we found an increase in fluoroquinolone resistance. Further sampling effort is needed to determine the total variability and associations with the metadata obtained to generate better health policies.}, }
@article {pmid36539044, year = {2022}, author = {Lima, A and Carolina Barbosa Caetano, A and Hurtado Castillo, R and Gonçalves Dos Santos, R and Lucas Neres Rodrigues, D and de Jesus Sousa, T and Kato, RB and Vinicius Canário Viana, M and Cybelle Pinto Gomide, A and Figueira Aburjaile, F and Tiwari, S and Jaiswal, A and Gala-García, A and Seyffert, N and Luiz de Paula Castro, T and Brenig, B and Matiuzzi da Costa, M and Maria Seles Dorneles, E and Le Loir, Y and Azevedo, V}, title = {Comparative genomic analysis of ovine and other host associated isolates of Staphylococcus aureus exhibit the important role of mobile genetic elements and virulence factors in host adaptation.}, journal = {Gene}, volume = {}, number = {}, pages = {147131}, doi = {10.1016/j.gene.2022.147131}, pmid = {36539044}, issn = {1879-0038}, abstract = {Staphylococcus aureus is the main etiological agent of mastitis in small ruminants worldwide. This disease has a difficult cure and possible relapse, leading to significant economic losses in production, milk quality and livestock. This study performed comparative genomic analyses between 73 S. aureus genomes from different hosts (human, bovine, pig and others). This work isolated and sequenced 12 of these genomes from ovine. This study contributes to the knowledge of genomic specialization and the role of specific genes in establishing infection in ovine mastitis-associated S. aureus. The genomes of S. aureus isolated from sheep maintained a higher representation when grouped with clonal complexes 130 and 133. The genomes showed high genetic similarity, the species pan-genome consisting of 4200 genes (central = 2008, accessory = 1559 and unique = 634). Among these, 277 unique genes were related to the genomes isolated from sheep, with 39.6% as hypothetical proteins, 6.4% as phages, 6.4% as toxins, 2.9% as transporters, and 44.7% as related to other proteins. Furthermore, at the pathogen level, they showed 80 genes associated with virulence factors and 19 with antibiotic resistance shared in almost all isolates. Although S. aureus isolated from ovine showed susceptibility to antimicrobials in vitro, ten genes were predicted to be associated with antibiotic inactivation and efflux pump, suggesting resistance to gentamicin and penicillin. This work may contribute to identifying genes acquired by horizontal transfer and their role in host adaptation, virulence, bacterial resistance, and characterization of strains affecting ovine.}, }
@article {pmid36537824, year = {2022}, author = {Simoni, S and Leoni, F and Veschetti, L and Malerba, G and Carelli, M and Lleò, MM and Brenciani, A and Morroni, G and Giovanetti, E and Rocchegiani, E and Barchiesi, F and Vignaroli, C}, title = {The Emerging Nosocomial Pathogen Klebsiella michiganensis: Genetic Analysis of a KPC-3 Producing Strain Isolated from Venus Clam.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0423522}, doi = {10.1128/spectrum.04235-22}, pmid = {36537824}, issn = {2165-0497}, abstract = {The recovery and characterization of a multidrug-resistant, KPC-3-producing Klebsiella michiganensis that was obtained from Venus clam samples is reported in this study. A whole-genome sequencing (WGS) analysis using Illumina and Nanopore technologies of the K. michiganensis 23999A2 isolate revealed that the strain belonged to the new sequence type 382 (ST382) and carried seven plasmid replicon sequences, including four IncF type plasmids (FII, FIIY, FIIk, and FIB), one IncHI1 plasmid, and two Col plasmids. The FIB and FIIk plasmids showed high homology to each other and to multireplicon pKpQIL-like plasmids that are found in epidemic KPC-K. pneumoniae clones worldwide. The strain carried multiple β-lactamase genes on the IncF plasmids: blaOXA-9 and blaTEM-1A on FIB, blaKPC-3 inserted in a Tn4401a on FIIK, and blaSHV-12 on FIIY. The IncHI1-ST11 harbored no resistance gene. The curing of the strain caused the loss of all of the bla genes and a rearrangement of the IncF plasmids. Conjugal transfer of the blaOXA-9, blaTEM-1A and blaKPC-3 genes occurred at a frequency of 5 × 10[-7], using K. quasipneumoniae as a recipient, and all of the bla genes were transferred through a pKpQIL that originated from the recombination of the FIB and FIIk plasmids of the donor. A comparison with 31 K. michiganensis genomes that are available in the NCBI database showed that the closest phylogenetic relatives of K. michiganensis 23999A2 are an environmental isolate from soil in South Korea and a clinical isolate from human sputum in Japan. Finally, a pan-genome analysis showed a large accessory genome of the strain as well as the great genomic plasticity of the K. michiganensis species. IMPORTANCE Klebsiella michiganensis is an emerging nosocomial pathogen, and, so far, few studies describe isolates of clinical origin in the environment. This study contributes to the understanding of how the dissemination of carbapenem-resistance outside the hospital setting may be related to the circulation of pKpQIL-like plasmids that are derived from epidemic Klebsiella pneumoniae strains. The recovery of a carbapenem-resistant isolate in clams is of great concern, as bivalves could represent vehicles of transmission of pathogens and resistance genes to humans via the food chain. The study demonstrates the plasticity of K. michiganensis genome, which is probably useful to multiple environment adaptation and to the evolution of the species.}, }
@article {pmid36536862, year = {2022}, author = {Cai, Q and Huang, Y and Zhou, L and Hu, N and Liu, Y and Guo, F and Liu, Q and Huang, X and Zhang, Y and Zeng, L}, title = {A Complete Genome of Nocardia terpenica NC_YFY_NT001 and Pan-Genomic Analysis Based on Different Sources of Nocardia spp. Isolates Reveal Possibly Host-Related Virulence Factors.}, journal = {Infection and drug resistance}, volume = {15}, number = {}, pages = {7259-7270}, pmid = {36536862}, issn = {1178-6973}, abstract = {OBJECTIVE: We aimed to identify the possible virulence genes associated with Nocardia NC_YFY_NT001 isolated by ourselves and other Nocardia spp.
METHODS: The genome of Nocardia terpenica NC_YFY_NT001 was completed by using PacBio and Illumina platforms. A pan-genomic analysis was applied to selected complete Nocardia genomes.
RESULTS: Nocardia terpenica NC_YFY_NT001 can cause healthy mice death by tail intravenous injection. The genome of NT001 has one circular chromosome 8,850,000 bp and one circular plasmid 70,000 bp with ~68% GC content. The chromosome and plasmid encode 7914 and 80 proteins, respectively. Furthermore, a pan-genomic analysis showed a total of 45,825 gene clusters, then 304 core, 21,045 shell and 24,476 cloud gene clusters were classified using specific parameters. In addition, we found that catalases were more abundant in human isolates. Furthermore, we also found no significant differences in the MCE proteins between different strains from different sources. The pan-genomic analysis also showed that 67 genes could only be found in humoral isolates. ReX3 and DUF853 domain protein were found in all eight human isolates. The composition of unique genes in humoral isolate genomes indicated that the transcriptional regulators may be important when Nocardia invades the host, which allows them to survive in the new ecological system.
CONCLUSION: In this study, we confirmed that NT001 could cause infected animal death, and identified many possible virulence factors for our future studies. This study also provides new insight for our further study on Nocardia virulence mechanisms.}, }
@article {pmid36536253, year = {2022}, author = {Sohn, JI and Choi, MH and Yi, D and Menon, VA and Kim, YJ and Lee, J and Park, JW and Kyung, S and Shin, SH and Na, B and Joung, JG and Ju, YS and Yeom, MS and Koh, Y and Yoon, SS and Baek, D and Kim, TM and Nam, JW}, title = {Ultrafast prediction of somatic structural variations by filtering out reads matched to pan-genome k-mer sets.}, journal = {Nature biomedical engineering}, volume = {}, number = {}, pages = {}, pmid = {36536253}, issn = {2157-846X}, abstract = {Variant callers typically produce massive numbers of false positives for structural variations, such as cancer-relevant copy-number alterations and fusion genes resulting from genome rearrangements. Here we describe an ultrafast and accurate detector of somatic structural variations that reduces read-mapping costs by filtering out reads matched to pan-genome k-mer sets. The detector, which we named ETCHING (for efficient detection of chromosomal rearrangements and fusion genes), reduces the number of false positives by leveraging machine-learning classifiers trained with six breakend-related features (clipped-read count, split-reads count, supporting paired-end read count, average mapping quality, depth difference and total length of clipped bases). When benchmarked against six callers on reference cell-free DNA, validated biomarkers of structural variants, matched tumour and normal whole genomes, and tumour-only targeted sequencing datasets, ETCHING was 11-fold faster than the second-fastest structural-variant caller at comparable performance and memory use. The speed and accuracy of ETCHING may aid large-scale genome projects and facilitate practical implementations in precision medicine.}, }
@article {pmid36534203, year = {2022}, author = {Jesus, HNR and Ramos, JN and Rocha, DJPG and Alves, DA and Silva, CS and Cruz, JVO and Vieira, VV and Souza, C and Santos, LS and Navas, J and Ramos, RTJ and Azevedo, V and Aguiar, ERGR and Mattos-Guaraldi, AL and Pacheco, LGC}, title = {The pan-genome of the emerging multidrug-resistant pathogen Corynebacterium striatum.}, journal = {Functional & integrative genomics}, volume = {23}, number = {1}, pages = {5}, pmid = {36534203}, issn = {1438-7948}, abstract = {Corynebacterium striatum, a common constituent of the human skin microbiome, is now considered an emerging multidrug-resistant pathogen of immunocompromised and chronically ill patients. However, little is known about the molecular mechanisms in the transition from colonization to the multidrug-resistant (MDR) invasive phenotype in clinical isolates. This study performed a comprehensive pan-genomic analysis of C. striatum, including isolates from "normal skin microbiome" and from MDR infections, to gain insights into genetic factors contributing to pathogenicity and multidrug resistance in this species. For this, three novel genome sequences were obtained from clinical isolates of C. striatum of patients from Brazil, and other 24 complete or draft C. striatum genomes were retrieved from GenBank, including the ATCC6940 isolate from the Human Microbiome Project. Analysis of C. striatum strains demonstrated the presence of an open pan-genome (α = 0.852803) containing 3816 gene families, including 15 antimicrobial resistance (AMR) genes and 32 putative virulence factors. The core and accessory genomes included 1297 and 1307 genes, respectively. The identified AMR genes are primarily associated with resistance to aminoglycosides and tetracyclines. Of these, 66.6% are present in genomic islands, and four AMR genes, including aac(6')-ib7, are located in a class 1-integron. In conclusion, our data indicated that C. striatum possesses genomic characteristics favorable to the invasive phenotype, with high genomic plasticity, a robust genetic arsenal for iron acquisition, and important virulence determinants and AMR genes present in mobile genetic elements.}, }
@article {pmid36534120, year = {2022}, author = {Gui, S and Martinez-Rivas, FJ and Wen, W and Meng, M and Yan, J and Usadel, B and Fernie, AR}, title = {Going broad and deep: sequencing driven insights into plant physiology, evolution and crop domestication.}, journal = {The Plant journal : for cell and molecular biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/tpj.16070}, pmid = {36534120}, issn = {1365-313X}, abstract = {Deep-sequencing is a term that has become embedded in the plant genomic literature in recent years and with good reason. A torrent of (largely), high quality genomic and transcriptomic data has been collected and most of this has been publicly released. Indeed, almost 1000 plant genomes have been reported (www.plabipd.de) and the 2000 plant transcriptomes project has long been completed (One Thousand Plant Transcriptomes, 2019). The EarthBioGenome project will dwarf even these milestones (Lewin et al., 2022). That said, massive progress in understanding plant physiology, evolution and crop domestication have been made by sequencing broadly (across a species) as well as deeply (within a single individual). We will outline the current state of the art in genome and transcriptome sequencing before we briefly review the most visible of these broad approaches namely genome wide association- and transcriptome wide association- studies as well as the compilation of pan-genomes. This will include both the most commonly used methods reliant on single nucleotide polymorphisms and short indels as well as more recent examples which consider structural variants. We will subsequently present case-studies exemplifying how their application have brought insight into either plant physiology or evolution and crop domestication. Finally, we will provide conclusions and an outlook as to the perspective for the extension of such approaches both to different species, tissues and biological processes.}, }
@article {pmid36533928, year = {2022}, author = {Wang, Z and Xu, S and Zheng, X and Zheng, X and Liu, M and Guo, G and Yu, Y and Han, X and Liu, Y and Wang, K and Zhang, W}, title = {Identification of Subunits for Novel Universal Vaccines against Three Predominant Serogroups and the Emerging O145 among Avian Pathogenic Escherichia coli by Pan-RV Pipeline.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0106122}, doi = {10.1128/aem.01061-22}, pmid = {36533928}, issn = {1098-5336}, abstract = {Avian pathogenic Escherichia coli, a causative agent of avian colibacillosis, has been causing serious economic losses in the poultry industry. The increase in multidrug-resistant isolates and the complexity of the serotypes of this pathogen, especially the recently reported emergence of a newly predominant serogroup of O145, make the control of this disease difficult. To address this challenge, a high-throughput screening approach, called Pan-RV (Reverse vaccinology based on pangenome analysis), is proposed to search for universal protective antigens against the three traditional serogroups and the newly emerged O145. Using this approach, a total of 61 proteins regarded as probable antigens against the four important serogroups were screened from the core genome of 127 Avian pathogenic Escherichia coli (APEC) genomes, and six were verified by Western blots using antisera. Overall, our research will provide a foundation for the development of an APEC subunit vaccine against avian colibacillosis. Given the exponential growth of whole-genome sequencing (WGS) data, our Pan-RV pipeline will make screening of bacterial vaccine candidates inexpensive, rapid, and efficient. IMPORTANCE With the emergence of drug resistance and the newly predominant serogroup O145, the control of Avian pathogenic Escherichia coli is facing a serious challenge; an efficient immunological method is urgently needed. Here, for the first time, we propose a high-throughput screening approach to search for universal protective antigens against the three traditional serogroups and the newly emerged O145. Importantly, using this approach, a total of 61 proteins regarded as probable antigens against the four important serogroups were screened, and three were shown to be immunoreactive with all antisera (covering the four serogroups), thereby providing a foundation for the development of APEC subunit vaccines against avian colibacillosis. Further, our Pan-RV pipeline will provide immunological control strategies for pathogens with complex and variable genetic backgrounds such as Escherichia coli and will make screening of bacterial vaccine candidates more inexpensive, rapid, and efficient.}, }
@article {pmid36533266, year = {2022}, author = {Usadel, B}, title = {Solanaceae pangenomes are coming of graphical age to bring heritability back.}, journal = {aBIOTECH}, volume = {3}, number = {4}, pages = {233-236}, pmid = {36533266}, issn = {2662-1738}, abstract = {Two recent articles describe a pangenome of potato and a graph-based pangenome for tomato, respectively. The latter improves our understanding of the tomato genomics architecture even further and the use of this graph-based pangenome versus a single reference dramatically improves heritability in tomato.}, }
@article {pmid36532462, year = {2022}, author = {Cohn, AR and Orsi, RH and Carroll, LM and Liao, J and Wiedmann, M and Cheng, RA}, title = {Salmonella enterica serovar Cerro displays a phylogenetic structure and genomic features consistent with virulence attenuation and adaptation to cattle.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1005215}, pmid = {36532462}, issn = {1664-302X}, abstract = {Salmonella enterica subsp. enterica (S.) serovar Cerro is rarely isolated from human clinical cases of salmonellosis but represents the most common serovar isolated from cattle without clinical signs of illness in the United States. In this study, using a large, diverse set of 316 isolates, we utilized genomic methods to further elucidate the evolutionary history of S. Cerro and to identify genomic features associated with its apparent virulence attenuation in humans. Phylogenetic analyses showed that within this polyphyletic serovar, 98.4% of isolates (311/316) represent a monophyletic clade within section Typhi and the remaining 1.6% of isolates (5/316) form a monophyletic clade within subspecies enterica Clade A1. Of the section Typhi S. Cerro isolates, 93.2% of isolates (290/311) clustered into a large clonal clade comprised of predominantly sequence type (ST) 367 cattle and environmental isolates, while the remaining 6.8% of isolates (21/311), primarily from human clinical sources, clustered outside of this clonal clade. A tip-dated phylogeny of S. Cerro ST367 identified two major clades (I and II), one of which overwhelmingly consisted of cattle isolates that share a most recent common ancestor that existed circa 1975. Gene presence/absence and rarefaction curve analyses suggested that the pangenome of section Typhi S. Cerro is open, potentially reflecting the gain/loss of prophage; human isolates contained the most open pangenome, while cattle isolates had the least open pangenome. Hypothetically disrupted coding sequences (HDCs) displayed clade-specific losses of intact speC and sopA virulence genes within the large clonal S. Cerro clade, while loss of intact vgrG, araH, and vapC occurred in all section Typhi S. Cerro isolates. Further phenotypic analysis suggested that the presence of a premature stop codon in speC does not abolish ornithine decarboxylase activity in S. Cerro, likely due to the activity of the second ornithine decarboxylase encoded by speF, which remained intact in all isolates. Overall, our study identifies specific genomic features associated with S. Cerro's infrequent isolation from humans and its apparent adaptation to cattle, which has broader implications for informing our understanding of the evolutionary events facilitating host adaptation in Salmonella.}, }
@article {pmid36529716, year = {2022}, author = {Cagirici, HB and Andorf, CM and Sen, TZ}, title = {Co-expression pan-network reveals genes involved in complex traits within maize pan-genome.}, journal = {BMC plant biology}, volume = {22}, number = {1}, pages = {595}, pmid = {36529716}, issn = {1471-2229}, abstract = {BACKGROUND: With the advances in the high throughput next generation sequencing technologies, genome-wide association studies (GWAS) have identified a large set of variants associated with complex phenotypic traits at a very fine scale. Despite the progress in GWAS, identification of genotype-phenotype relationship remains challenging in maize due to its nature with dozens of variants controlling the same trait. As the causal variations results in the change in expression, gene expression analyses carry a pivotal role in unraveling the transcriptional regulatory mechanisms behind the phenotypes.
RESULTS: To address these challenges, we incorporated the gene expression and GWAS-driven traits to extend the knowledge of genotype-phenotype relationships and transcriptional regulatory mechanisms behind the phenotypes. We constructed a large collection of gene co-expression networks and identified more than 2 million co-expressing gene pairs in the GWAS-driven pan-network which contains all the gene-pairs in individual genomes of the nested association mapping (NAM) population. We defined four sub-categories for the pan-network: (1) core-network contains the highest represented ~ 1% of the gene-pairs, (2) near-core network contains the next highest represented 1-5% of the gene-pairs, (3) private-network contains ~ 50% of the gene pairs that are unique to individual genomes, and (4) the dispensable-network contains the remaining 50-95% of the gene-pairs in the maize pan-genome. Strikingly, the private-network contained almost all the genes in the pan-network but lacked half of the interactions. We performed gene ontology (GO) enrichment analysis for the pan-, core-, and private- networks and compared the contributions of variants overlapping with genes and promoters to the GWAS-driven pan-network.
CONCLUSIONS: Gene co-expression networks revealed meaningful information about groups of co-regulated genes that play a central role in regulatory processes. Pan-network approach enabled us to visualize the global view of the gene regulatory network for the studied system that could not be well inferred by the core-network alone.}, }
@article {pmid36526963, year = {2022}, author = {Abraha, HB and Lee, JW and Kim, G and Ferdiansyah, MK and Ramesha, RM and Kim, KP}, title = {Genomic diversity and comprehensive taxonomical classification of 61 Bacillus subtilis group member infecting bacteriophages, and the identification of ortholog taxonomic signature genes.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {835}, pmid = {36526963}, issn = {1471-2164}, abstract = {BACKGROUND: Despite the applications of Bacillus subtilis group species in various sectors, limited information is available regarding their phages. Here, 61 B. subtilis group species-infecting phages (BSPs) were studied for their taxonomic classification considering the genome-size, genomic diversity, and the host, followed by the identification of orthologs taxonomic signature genes.
RESULTS: BSPs have widely ranging genome sizes that can be bunched into groups to demonstrate correlations to family and subfamily classifications. Comparative analysis re-confirmed the existing, BSPs-containing 14 genera and 21 species and displayed inter-genera similarities within existing subfamilies. Importantly, it also revealed the need for the creation of new taxonomic classifications, including 28 species, nine genera, and two subfamilies (New subfamily1 and New subfamily2) to accommodate inter-genera relatedness. Following pangenome analysis, no ortholog shared by all BSPs was identified, while orthologs, namely, the tail fibers/spike proteins and poly-gamma-glutamate hydrolase, that are shared by more than two-thirds of the BSPs were identified. More importantly, major capsid protein (MCP) type I, MCP type II, MCP type III and peptidoglycan binding proteins that are distinctive orthologs for Herelleviridae, Salasmaviridae, New subfamily1, and New subfamily2, respectively, were identified and analyzed which could serve as signatures to distinguish BSP members of the respective taxon.
CONCLUSIONS: In this study, we show the genomic diversity and propose a comprehensive classification of 61 BSPs, including the proposition for the creation of two new subfamilies, followed by the identification of orthologs taxonomic signature genes, potentially contributing to phage taxonomy.}, }
@article {pmid36523157, year = {2022}, author = {Shi, J and Tian, Z and Lai, J and Huang, X}, title = {Plant pan-genomics and its applications.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2022.12.009}, pmid = {36523157}, issn = {1752-9867}, abstract = {Plant genomes are highly diverse that a substantial proportion of genomic sequences are not shared among individuals. The variable DNA sequences, along with the conserved core sequences, compose the more sophisticated pan-genome that represents the collection of all non-redundant DNA in a species. With the rapid progress of genome sequencing technologies, pan-genome researches have now been surging in plants. Here we review the recent advances in plant pan-genomics including major driving forces of structural variations that constitute the variable sequences, the methodological innovations to represent pan-genome as well as the major successes in constructing plant pan-genomes. We also summarize the recent efforts towards the decoding of final dark matters in the Telomere-to-Telomere (T-2-T) or gapless plant genomes. These new genome resources, which have remarkable advantages over the large number of previously assembled less-than-perfect genomes, are expecting to become new references in both genetic studies and plant breeding applications.}, }
@article {pmid36516689, year = {2022}, author = {Hussain, J and Cohen, M and O'Malley, CJ and Mantri, N and Li, Y and Mueller, JF and Greaves, R and Wang, X}, title = {Detections of organophosphate and pyrethroid insecticide metabolites in urine and sweat obtained from women during infrared sauna and exercise: A pilot crossover study.}, journal = {International journal of hygiene and environmental health}, volume = {248}, number = {}, pages = {114091}, doi = {10.1016/j.ijheh.2022.114091}, pmid = {36516689}, issn = {1618-131X}, abstract = {Synthetic pesticides such as organophosphates and pyrethroids are commonly used worldwide yet the metabolic and long-term human health effects of these environmental exposures are unclear. Urinary detections of metabolites involving both classes of insecticides have been documented in various global populations. However, reports documenting similar detections in human sweat are sparse. In this study, the concentrations of four insecticide metabolites were measured using liquid chromatography coupled with tandem mass spectrometry in repeated sweat and urine collections (n = 85) from 10 women undergoing three interventions (control, infrared sauna and indoor bicycling) within a single-blinded randomised crossover trial. The Friedman test with post-hoc two-way analysis of variance, the related-samples Wilcoxon signed rank test and the Spearman's rank-order correlation test were used to analyse the results. Organophosphate metabolites were detected in 84.6% (22/26) and pyrethroids in 26.9% (7/26) of the collected sweat samples (pooled per individual, per intervention). Urinary concentrations of three of the four metabolites marginally increased after infrared sauna bathing: 3,5,6-trichloro-2-pyridinol (z = 2.395, p = 0.017); 3-phenoxybenzoic acid (z = 2.599, p = 0.009); and trans-3-(2,2-dichlorovinyl)-2,2-dimethylcyclopropane-1-carboxylic acid (z = 2.090, p = 0.037). Urinary 3-phenoxybenzoic acid also increased after exercise (z = 2.073, p = 0.038) and demonstrated the most temporal variability (days to weeks) of any of the urinary metabolites. Definitive sweat/urine correlations were not demonstrated. These results indicate metabolites from organophosphate and pyrethroid pesticides can be detected in human sweat and this raises intriguing questions about perspiration and its role in the metabolism and excretion of synthetic pesticides.}, }
@article {pmid36515536, year = {2022}, author = {Rumball, NA and Alm, EW and McLellan, SL}, title = {Genetic Determinants of Escherichia coli Survival in Beach Sand.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0142322}, doi = {10.1128/aem.01423-22}, pmid = {36515536}, issn = {1098-5336}, abstract = {Escherichia coli contain a high level of genetic diversity and are generally associated with the guts of warm-blooded animals but have also been isolated from secondary habitats outside hosts. We used E. coli isolates from previous in situ microcosm experiments conducted under actual beach conditions and performed population-level genomic analysis to identify accessory genes associated with survival within the beach sand environment. E. coli strains capable of surviving had been selected for by seeding isolates originating from sand, sewage, and gull waste (n = 528; 176 from each source) into sand, which was sealed in microcosm chambers and buried for 45 days in the backshore beach of Lake Michigan. In the current work, survival-associated genes were identified by comparing the pangenome of viable E. coli populations at the end of the microcosm experiment with the original isolate collection and identifying loci enriched in the out put samples. We found that environmental survival was associated with a wide variety of genetic factors, with the majority corresponding to metabolism enzymes and transport proteins. Of the 414 unique functions identified, most were present across E. coli phylogroups, except B2 which is often associated with human pathogens. Gene modules that were enriched in surviving populations included a betaine biosynthesis pathway, which produces an osmoprotectant, and the GABA (gamma-aminobutyrate) biosynthesis pathway, which aids in pH homeostasis and nutrient use versatility. Overall, these results demonstrate that the genetic flexibility within this species allows for survival in the environment for extended periods. IMPORTANCE Escherichia coli is commonly used as an indicator of recent fecal pollution in recreational water despite its known ability to survive in secondary environments, such as beach sand. These long-term survivors from sand reservoirs can be introduced into the water column through wave action or runoff during precipitation events, thereby impacting the perception of local water quality. Current beach monitoring methods cannot differentiate long-term environmental survivors from E. coli derived from recent fecal input, resulting in inaccurate monitoring results and unnecessary beach closures. This work identified the genetic factors that are associated with long-term survivors, providing insight into the mechanistic basis for E. coli accumulation in beach sand. A greater understanding of the intrinsic ability of E. coli to survive long-term and conditions that promote such survival will provide evidence of the limitations of beach water quality assessments using this indicator.}, }
@article {pmid36511689, year = {2022}, author = {Dillard, LR and Glass, EM and Lewis, AL and Thomas-White, K and Papin, JA}, title = {Metabolic Network Models of the Gardnerella Pangenome Identify Key Interactions with the Vaginal Environment.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0068922}, doi = {10.1128/msystems.00689-22}, pmid = {36511689}, issn = {2379-5077}, abstract = {Gardnerella is the primary pathogenic bacterial genus present in the polymicrobial condition known as bacterial vaginosis (BV). Despite BV's high prevalence and associated chronic and acute women's health impacts, the Gardnerella pangenome is largely uncharacterized at both the genetic and functional metabolic levels. Here, we used genome-scale metabolic models to characterize in silico the Gardnerella pangenome metabolic content. We also assessed the metabolic functional capacity in a BV-positive cervicovaginal fluid context. The metabolic capacity varied widely across the pangenome, with 38.15% of all reactions being core to the genus, compared to 49.60% of reactions identified as being unique to a smaller subset of species. We identified 57 essential genes across the pangenome via in silico gene essentiality screens within two simulated vaginal metabolic environments. Four genes, gpsA, fas, suhB, and psd, were identified as core essential genes critical for the metabolic function of all analyzed bacterial species of the Gardnerella genus. Further understanding these core essential metabolic functions could inform novel therapeutic strategies to treat BV. Machine learning applied to simulated metabolic network flux distributions showed limited clustering based on the sample isolation source, which further supports the presence of extensive core metabolic functionality across this genus. These data represent the first metabolic modeling of the Gardnerella pangenome and illustrate strain-specific interactions with the vaginal metabolic environment across the pangenome. IMPORTANCE Bacterial vaginosis (BV) is the most common vaginal infection among reproductive-age women. Despite its prevalence and associated chronic and acute women's health impacts, the diverse bacteria involved in BV infection remain poorly characterized. Gardnerella is the genus of bacteria most commonly and most abundantly represented during BV. In this paper, we use metabolic models, which are a computational representation of the possible functional metabolism of an organism, to investigate metabolic conservation, gene essentiality, and pathway utilization across 110 Gardnerella strains. These models allow us to investigate in silico how strains may differ with respect to their metabolic interactions with the vaginal-host environment.}, }
@article {pmid36503997, year = {2022}, author = {Chan, C and Salomé, P}, title = {What makes a good reference? First steps toward a Chlamydomonas pangenome.}, journal = {The Plant cell}, volume = {}, number = {}, pages = {}, doi = {10.1093/plcell/koac361}, pmid = {36503997}, issn = {1532-298X}, }
@article {pmid36494615, year = {2022}, author = {Johansson, P and Säde, E and Hultman, J and Auvinen, P and Björkroth, J}, title = {Pangenome and genomic taxonomy analyses of Leuconostoc gelidum and Leuconostoc gasicomitatum.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {818}, pmid = {36494615}, issn = {1471-2164}, abstract = {BACKGROUND: Leuconostoc gelidum and Leuconostoc gasicomitatum have dual roles in foods. They may spoil cold-stored packaged foods but can also be beneficial in kimchi fermentation. The impact in food science as well as the limited number of publicly available genomes prompted us to create pangenomes and perform genomic taxonomy analyses starting from de novo sequencing of the genomes of 37 L. gelidum/L. gasicomitatum strains from our culture collection. Our aim was also to evaluate the recently proposed change in taxonomy as well as to study the genomes of strains with different lifestyles in foods.
METHODS: We selected as diverse a set of strains as possible in terms of sources, previous genotyping results and geographical distribution, and included also 10 publicly available genomes in our analyses. We studied genomic taxonomy using pairwise average nucleotide identity (ANI) and calculation of digital DNA-DNA hybridisation (dDDH) scores. Phylogeny analyses were done using the core gene set of 1141 single-copy genes and a set of housekeeping genes commonly used for lactic acid bacteria. In addition, the pangenome and core genome sizes as well as some properties, such as acquired antimicrobial resistance (AMR), important due to the growth in foods, were analysed.
RESULTS: Genome relatedness indices and phylogenetic analyses supported the recently suggested classification that restores the taxonomic position of L. gelidum subsp. gasicomitatum back to the species level as L. gasicomitatum. Genome properties, such as size and coding potential, revealed limited intraspecies variation and showed no attribution to the source of isolation. The distribution of the unique genes between species and subspecies was not associated with the previously documented lifestyle in foods. None of the strains carried any acquired AMR genes or genes associated with any known form of virulence.
CONCLUSION: Genome-wide examination of strains confirms that the proposition to restore the taxonomic position of L. gasicomitatum is justified. It further confirms that the distribution and lifestyle of L. gelidum and L. gasicomitatum in foods have not been driven by the evolution of functional and phylogenetic diversification detectable at the genome level.}, }
@article {pmid36494611, year = {2022}, author = {Guardia, AE and Wagner, A and Busalmen, JP and Di Capua, C and Cortéz, N and Beligni, MV}, title = {The draft genome of Andean Rhodopseudomonas sp. strain AZUL predicts genome plasticity and adaptation to chemical homeostasis.}, journal = {BMC microbiology}, volume = {22}, number = {1}, pages = {297}, pmid = {36494611}, issn = {1471-2180}, abstract = {The genus Rhodopseudomonas comprises purple non-sulfur bacteria with extremely versatile metabolisms. Characterization of several strains revealed that each is a distinct ecotype highly adapted to its specific micro-habitat. Here we present the sequencing, genomic comparison and functional annotation of AZUL, a Rhodopseudomonas strain isolated from a high altitude Andean lagoon dominated by extreme conditions and fluctuating levels of chemicals. Average nucleotide identity (ANI) analysis of 39 strains of this genus showed that the genome of AZUL is 96.2% identical to that of strain AAP120, which suggests that they belong to the same species. ANI values also show clear separation at the species level with the rest of the strains, being more closely related to R. palustris. Pangenomic analyses revealed that the genus Rhodopseudomonas has an open pangenome and that its core genome represents roughly 5 to 12% of the total gene repertoire of the genus. Functional annotation showed that AZUL has genes that participate in conferring genome plasticity and that, in addition to sharing the basal metabolic complexity of the genus, it is also specialized in metal and multidrug resistance and in responding to nutrient limitation. Our results also indicate that AZUL might have evolved to use some of the mechanisms involved in resistance as redox reactions for bioenergetic purposes. Most of those features are shared with strain AAP120, and mainly involve the presence of additional orthologs responsible for the mentioned processes. Altogether, our results suggest that AZUL, one of the few bacteria from its habitat with a sequenced genome, is highly adapted to the extreme and changing conditions that constitute its niche.}, }
@article {pmid36479628, year = {2022}, author = {Adsit, FG and Randall, TA and Locklear, J and Kurtz, DM}, title = {The emergence of the tetrathionate reductase operon in the Escherichia coli/Shigella pan-genome.}, journal = {MicrobiologyOpen}, volume = {11}, number = {6}, pages = {e1333}, doi = {10.1002/mbo3.1333}, pmid = {36479628}, issn = {2045-8827}, abstract = {Escherichia coli pathogenic variants (pathovars) are generally characterized by defined virulence traits and are susceptible to the evolution of hybridized identities due to the considerable plasticity of the E. coli genome. We have isolated a strain from a purified diet intended for research animals that further demonstrates the ability of E. coli to acquire novel genetic elements leading potentially to emergent new pathovars. Utilizing next generation sequencing to obtain a whole genome profile, we report an atypical strain of E. coli, EcoFA807-17, possessing a tetrathionate reductase (ttr) operon, which enables the utilization of tetrathionate as an electron acceptor, thus facilitating respiration in anaerobic environments such as the mammalian gut. The ttr operon is a potent virulence factor for several enteric pathogens, most prominently Salmonella enterica. However, the presence of chromosomally integrated tetrathionate reductase genes does not appear to have been previously reported in wild-type E. coli or Shigella. Accordingly, it is possible that the appearance of this virulence factor may signal the evolution of new mechanisms of pathogenicity in E. coli and Shigella and may potentially alter the effectiveness of existing assays using tetrathionate reductase as a unique marker for the detection of Salmonella enterica.}, }
@article {pmid36479579, year = {2022}, author = {Droc, G and Martin, G and Guignon, V and Summo, M and Sempéré, G and Durant, E and Soriano, A and Baurens, FC and Cenci, A and Breton, C and Shah, T and Aury, JM and Ge, XJ and Harrison, PH and Yahiaoui, N and D'Hont, A and Rouard, M}, title = {The banana genome hub: a community database for genomics in the Musaceae.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac221}, doi = {10.1093/hr/uhac221}, pmid = {36479579}, issn = {2662-6810}, abstract = {The Banana Genome Hub provides centralized access for genome assemblies, annotations, and the extensive related omics resources available for bananas and banana relatives. A series of tools and unique interfaces are implemented to harness the potential of genomics in bananas, leveraging the power of comparative analysis, while recognizing the differences between datasets. Besides effective genomic tools like BLAST and the JBrowse genome browser, additional interfaces enable advanced gene search and gene family analyses including multiple alignments and phylogenies. A synteny viewer enables the comparison of genome structures between chromosome-scale assemblies. Interfaces for differential expression analyses, metabolic pathways and GO enrichment were also added. A catalogue of variants spanning the banana diversity is made available for exploration, filtering, and export to a wide variety of software. Furthermore, we implemented new ways to graphically explore gene presence-absence in pangenomes as well as genome ancestry mosaics for cultivated bananas. Besides, to guide the community in future sequencing efforts, we provide recommendations for nomenclature of locus tags and a curated list of public genomic resources (assemblies, resequencing, high density genotyping) and upcoming resources-planned, ongoing or not yet public. The Banana Genome Hub aims at supporting the banana scientific community for basic, translational, and applied research and can be accessed at https://banana-genome-hub.southgreen.fr.}, }
@article {pmid36478861, year = {2022}, author = {Abou Abdallah, R and Million, M and Delerce, J and Anani, H and Diop, A and Caputo, A and Zgheib, R and Rousset, E and Sidi Boumedine, K and Raoult, D and Fournier, PE}, title = {Pangenomic analysis of Coxiella burnetii unveils new traits in genome architecture.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1022356}, doi = {10.3389/fmicb.2022.1022356}, pmid = {36478861}, issn = {1664-302X}, abstract = {Coxiella burnetii is the etiological agent of Q fever, a worldwide zoonosis able to cause large outbreaks. The disease is polymorphic. Symptomatic primary infection is named acute Q fever and is associated with hepatitis, pneumonia, fever, and auto-immune complications while persistent focalized infections, mainly endocarditis, and vascular infections, occur in a minority of patients but are potentially lethal. In order to evaluate the genomic features, genetic diversity, evolution, as well as genetic determinants of antibiotic resistance, pathogenicity, and ability to cause outbreaks of Q fever, we performed a pangenomic analysis and genomic comparison of 75 C. burnetii strains including 63 newly sequenced genomes. Our analysis demonstrated that C. burnetii has an open pangenome, unique genes being found in many strains. In addition, pathogenicity islands were detected in all genomes. In consequence C. burnetii has a high genomic plasticity, higher than that of other intracellular bacteria. The core- and pan-genomes are made of 1,211 and 4,501 genes, respectively (ratio 0.27). The core gene-based phylogenetic analysis matched that obtained from multi-spacer typing and the distribution of plasmid types. Genomic characteristics were associated to clinical and epidemiological features. Some genotypes were associated to specific clinical forms and countries. MST1 genotype strains were associated to acute Q fever. A significant association was also found between clinical forms and plasmids. Strains harboring the QpRS plasmid were never found in acute Q fever and were only associated to persistent focalized infections. The QpDV and QpH1 plasmids were associated to acute Q fever. In addition, the Guyanese strain CB175, the most virulent strain to date, exhibited a unique MST genotype, a distinct COG profile and an important variation in gene number that may explain its unique pathogenesis. Therefore, strain-specific factors play an important role in determining the epidemiological and clinical manifestations of Q fever alongside with host-specific factors (valvular and vascular defects notably).}, }
@article {pmid36476389, year = {2022}, author = {Djeghout, B and Bloomfield, SJ and Rudder, S and Elumogo, N and Mather, AE and Wain, J and Janecko, N}, title = {Comparative genomics of Campylobacter jejuni from clinical campylobacteriosis stool specimens.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {45}, pmid = {36476389}, issn = {1757-4749}, support = {BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {BACKGROUND: Campylobacter jejuni is a pervasive pathogen of major public health concern with a complex ecology requiring accurate and informative approaches to define pathogen diversity during outbreak investigations. Source attribution analysis may be confounded if the genetic diversity of a C. jejuni population is not adequately captured in a single specimen. The aim of this study was to determine the genomic diversity of C. jejuni within individual stool specimens from four campylobacteriosis patients. Direct plating and pre-culture filtration of one stool specimen per patient was used to culture multiple isolates per stool specimen. Whole genome sequencing and pangenome level analysis were used to investigate genomic diversity of C. jejuni within a patient.
RESULTS: A total 92 C. jejuni isolates were recovered from four patients presenting with gastroenteritis. The number of isolates ranged from 13 to 30 per patient stool. Three patients yielded a single C. jejuni multilocus sequence type: ST-21 (n = 26, patient 4), ST-61 (n = 30, patient 1) and ST-2066 (n = 23, patient 2). Patient 3 was infected with two different sequence types [ST-51 (n = 12) and ST-354 (n = 1)]. Isolates belonging to the same sequence type from the same patient specimen shared 12-43 core non-recombinant SNPs and 0-20 frameshifts with each other, and the pangenomes of each sequence type consisted of 1406-1491 core genes and 231-264 accessory genes. However, neither the mutation nor the accessory genes were connected to a specific functional gene category.
CONCLUSIONS: Our findings show that the C. jejuni population recovered from an individual patient's stool are genetically diverse even within the same ST and may have shared common ancestors before specimens were obtained. The population is unlikely to have evolved from a single isolate at the time point of initial patient infection, leading us to conclude that patients were likely infected with a heterogeneous C. jejuni population. The diversity of the C. jejuni population found within individual stool specimens can inform future methodological approaches to attribution and outbreak investigations.}, }
@article {pmid36476074, year = {2022}, author = {Ullah, A and Ullah Khan, S and Haq, MU and Ahmad, S and Irfan, M and Asif, M and Muhseen, ZT and Alkeraidees, MS and Allemailem, KS and Alrumaihi, F and Almatroudi, A}, title = {Computational study to investigate Proteus mirabilis proteomes for multi-epitope vaccine construct design.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-12}, doi = {10.1080/07391102.2022.2153920}, pmid = {36476074}, issn = {1538-0254}, abstract = {Proteus mirabilis is a gram-negative bacterium particularly known for its unique swarming ability. The swarming gives the bacteria ability to enhance adherence to the catheter surface and epithelium cells of the urethra to cause catheter associated urinary tract infections. P. mirabilis has evolved resistant to antibiotics. Additionally, there is an approved vaccine against P. mirabilis, thus demanding for identification of new vaccine targets. This gram-negative bacterium consists of 19,502 core proteins, out of which 19,063 are redundant proteins and remaining 439 are non-redundant proteins. The non-redundant proteins have 21 proteins present on the cell surface out of which 11 proteins are virulent. Antigenicity analysis predicted only 2 proteins as antigenic (fimbrial biogenesis outer membrane usher protein and ligand-gated channel protein). Four and seven B-cells epitopes were predicted from the former and later proteins, respectively. The predicted B-cells epitopes were used for T- cells epitopes prediction. The predicted epitopes were linked to each other through GPGPG linkers and joined with cholera toxin beta subunit adjuvant. A multi-epitopes vaccine construct consisting of 226 residues was docked with MHC-I, MHC-II and TLR-4. The best docked complex in each case has binding energy of -714.6, -744.6 and -829.5 kcal/mol, respectively. Moreover, the docking results were validated through molecular dynamics simulation and binding free energies estimation. The net energy of -137.2 kcal/mol was calculated for vaccine-MHC-I complex, -133.39 kcal/mol for vaccine-MHC-II and -158.68 kcal/mol for vaccine-TLR-4 complex. The designed vaccine construct could provoke immune responses against targeted pathogen and may be used in experimental testing.Communicated by Ramaswamy H. Sarma.}, }
@article {pmid36474047, year = {2022}, author = {Wang, M and Li, J and Qi, Z and Long, Y and Pei, L and Huang, X and Grover, CE and Du, X and Xia, C and Wang, P and Liu, Z and You, J and Tian, X and Ma, Y and Wang, R and Chen, X and He, X and Fang, DD and Sun, Y and Tu, L and Jin, S and Zhu, L and Wendel, JF and Zhang, X}, title = {Genomic innovation and regulatory rewiring during evolution of the cotton genus Gossypium.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {36474047}, issn = {1546-1718}, abstract = {Phenotypic diversity and evolutionary innovation ultimately trace to variation in genomic sequence and rewiring of regulatory networks. Here, we constructed a pan-genome of the Gossypium genus using ten representative diploid genomes. We document the genomic evolutionary history and the impact of lineage-specific transposon amplification on differential genome composition. The pan-3D genome reveals evolutionary connections between transposon-driven genome size variation and both higher-order chromatin structure reorganization and the rewiring of chromatin interactome. We linked changes in chromatin structures to phenotypic differences in cotton fiber and identified regulatory variations that decode the genetic basis of fiber length, the latter enabled by sequencing 1,005 transcriptomes during fiber development. We showcase how pan-genomic, pan-3D genomic and genetic regulatory data serve as a resource for delineating the evolutionary basis of spinnable cotton fiber. Our work provides insights into the evolution of genome organization and regulation and will inform cotton improvement by enabling regulome-based approaches.}, }
@article {pmid36469788, year = {2022}, author = {Yebra, G and Harling-Lee, JD and Lycett, S and Aarestrup, FM and Larsen, G and Cavaco, LM and Seo, KS and Abraham, S and Norris, JM and Schmidt, T and Ehlers, MM and Sordelli, DO and Buzzola, FR and Gebreyes, WA and Gonçalves, JL and Dos Santos, MV and Zakaria, Z and Rall, VLM and Keane, OM and Niedziela, DA and Paterson, GK and Holmes, MA and Freeman, TC and Fitzgerald, JR}, title = {Multiclonal human origin and global expansion of an endemic bacterial pathogen of livestock.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {119}, number = {50}, pages = {e2211217119}, doi = {10.1073/pnas.2211217119}, pmid = {36469788}, issn = {1091-6490}, mesh = {Female ; Humans ; Cattle ; Animals ; *Staphylococcus aureus/genetics ; Livestock/genetics ; *Staphylococcal Infections/epidemiology/veterinary/genetics ; Genome ; Host Specificity ; }, abstract = {Most new pathogens of humans and animals arise via switching events from distinct host species. However, our understanding of the evolutionary and ecological drivers of successful host adaptation, expansion, and dissemination are limited. Staphylococcus aureus is a major bacterial pathogen of humans and a leading cause of mastitis in dairy cows worldwide. Here we trace the evolutionary history of bovine S. aureus using a global dataset of 10,254 S. aureus genomes including 1,896 bovine isolates from 32 countries in 6 continents. We identified 7 major contemporary endemic clones of S. aureus causing bovine mastitis around the world and traced them back to 4 independent host-jump events from humans that occurred up to 2,500 y ago. Individual clones emerged and underwent clonal expansion from the mid-19th to late 20th century coinciding with the commercialization and industrialization of dairy farming, and older lineages have become globally distributed via established cattle trade links. Importantly, we identified lineage-dependent differences in the frequency of host transmission events between humans and cows in both directions revealing high risk clones threatening veterinary and human health. Finally, pangenome network analysis revealed that some bovine S. aureus lineages contained distinct sets of bovine-associated genes, consistent with multiple trajectories to host adaptation via gene acquisition. Taken together, we have dissected the evolutionary history of a major endemic pathogen of livestock providing a comprehensive temporal, geographic, and gene-level perspective of its remarkable success.}, }
@article {pmid36469554, year = {2022}, author = {Zhao, C and Goldman, M and Smith, BJ and Pollard, KS}, title = {Genotyping Microbial Communities with MIDAS2: From Metagenomic Reads to Allele Tables.}, journal = {Current protocols}, volume = {2}, number = {12}, pages = {e604}, doi = {10.1002/cpz1.604}, pmid = {36469554}, issn = {2691-1299}, mesh = {*Metagenome/genetics ; Genotype ; Alleles ; *Microbiota/genetics ; Nucleotides ; }, abstract = {The Metagenomic Intra-Species Diversity Analysis System 2 (MIDAS2) is a scalable pipeline that identifies single nucleotide variants and gene copy number variants in metagenomes using comprehensive reference databases built from public microbial genome collections (metagenotyping). MIDAS2 is the first metagenotyping tool with functionality to control metagenomic read mapping filters and to customize the reference database to the microbial community, features that improve the precision and recall of detected variants. In this article we present four basic protocols for the most common use cases of MIDAS2, along with supporting protocols for installation and use. In addition, we provide in-depth guidance on adjusting command line parameters, editing the reference database, optimizing hardware utilization, and understanding the metagenotyping results. All the steps of metagenotyping, from raw sequencing reads to population genetic analysis, are demonstrated with example data in two downloadable sequencing libraries of single-end metagenomic reads representing a mixture of multiple bacterial species. This set of protocols empowers users to accurately genotype hundreds of species in thousands of samples, providing rich genetic data for studying the evolution and strain-level ecology of microbial communities. © 2022 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol 1: Species prescreening Basic Protocol 2: Download MIDAS reference database Basic Protocol 3: Population single nucleotide variant calling Basic Protocol 4: Pan-genome copy number variant calling Support Protocol 1: Installing MIDAS2 Support Protocol 2: Command line inputs Support Protocol 3: Metagenotyping with a custom collection of genomes Support Protocol 4: Metagenotyping with advanced parameters.}, }
@article {pmid36469480, year = {2022}, author = {Pais, AKL and Santos, LVSD and Albuquerque, GMR and Farias, ARG and Silva Junior, WJ and Balbino, VQ and Silva, AMF and Gama, MASD and Souza, EB}, title = {Comparative genomics and phylogenomics of the Ralstonia solanacearum Moko ecotype and its symptomatological variants.}, journal = {Genetics and molecular biology}, volume = {45}, number = {4}, pages = {e20220038}, doi = {10.1590/1678-4685-GMB-2022-0038}, pmid = {36469480}, issn = {1415-4757}, abstract = {Banana tree bacterial wilt is caused by the Ralstonia solanacearum Moko ecotype. These strains vary in their symptom progression in banana, and are classified as typical Moko variants (phylotype IIA and IIB strains from across Central and South America), Bugtok variant (Philippines), and Sergipe facies (the states of Sergipe and Alagoas, Brazil). This study used comparative genomic and phylogenomic approaches to identify a correlation between the symptom progression of the Moko ecotypes based on the analysis of 23 available genomes. Average nucleotide identity and in silico DNA-DNA hybridization revealed a high correlation (>96% and >78%, respectively) between the genomes of Moko variants. Pan-genome analysis identified 21.3% of inheritable regions between representatives of the typical Moko and Sergipe facies variants, which could be traced to an abundance of exclusive homolog clusters. Moko ecotype genomes shared 1,951 orthologous genes, but representatives with typical symptoms did not display unique orthologues. Moreover, Bugtok disease and Sergipe facies genomes did not share any unique genes, suggesting convergent evolution to a shared symptom progression. Overall, genomic and phylogenomic analyses were insufficient to differentiate the Moko variants based on symptom progression.}, }
@article {pmid36467270, year = {2022}, author = {Lee, JH and Venkatesh, J and Jo, J and Jang, S and Kim, GW and Kim, JM and Han, K and Ro, N and Lee, HY and Kwon, JK and Kim, YM and Lee, TH and Choi, D and Van Deynze, A and Hill, T and Kfir, N and Freiman, A and Davila Olivas, NH and Elkind, Y and Paran, I and Kang, BC}, title = {High-quality chromosome-scale genomes facilitate effective identification of large structural variations in hot and sweet peppers.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac210}, pmid = {36467270}, issn = {2662-6810}, abstract = {Pepper (Capsicum annuum) is an important vegetable crop that has been subjected to intensive breeding, resulting in limited genetic diversity, especially for sweet peppers. Previous studies have reported pepper draft genome assemblies using short read sequencing, but their capture of the extent of large structural variants (SVs), such as presence-absence variants (PAVs), inversions, and copy-number variants (CNVs) in the complex pepper genome falls short. In this study, we sequenced the genomes of representative sweet and hot pepper accessions by long-read and/or linked-read methods and advanced scaffolding technologies. First, we developed a high-quality reference genome for the sweet pepper cultivar 'Dempsey' and then used the reference genome to identify SVs in 11 other pepper accessions and constructed a graph-based pan-genome for pepper. We annotated an average of 42 972 gene families in each pepper accession, defining a set of 19 662 core and 23 115 non-core gene families. The new pepper pan-genome includes informative variants, 222 159 PAVs, 12 322 CNVs, and 16 032 inversions. Pan-genome analysis revealed PAVs associated with important agricultural traits, including potyvirus resistance, fruit color, pungency, and pepper fruit orientation. Comparatively, a large number of genes are affected by PAVs, which is positively correlated with the high frequency of transposable elements (TEs), indicating TEs play a key role in shaping the genomic landscape of peppers. The datasets presented herein provide a powerful new genomic resource for genetic analysis and genome-assisted breeding for pepper improvement.}, }
@article {pmid36466678, year = {2022}, author = {Núñez-Montero, K and Rojas-Villalta, D and Barrientos, L}, title = {Antarctic Sphingomonas sp. So64.6b showed evolutive divergence within its genus, including new biosynthetic gene clusters.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1007225}, pmid = {36466678}, issn = {1664-302X}, abstract = {INTRODUCTION: The antibiotic crisis is a major human health problem. Bioprospecting screenings suggest that proteobacteria and other extremophile microorganisms have biosynthetic potential for the production novel antimicrobial compounds. An Antarctic Sphingomonas strain (So64.6b) previously showed interesting antibiotic activity and elicitation response, then a relationship between environmental adaptations and its biosynthetic potential was hypothesized. We aimed to determine the genomic characteristics in So64.6b strain related to evolutive traits for the adaptation to the Antarctic environment that could lead to its diversity of potentially novel antibiotic metabolites.
METHODS: The complete genome sequence of the Antarctic strain was obtained and mined for Biosynthetic Gene Clusters (BGCs) and other unique genes related to adaptation to extreme environments. Comparative genome analysis based on multi-locus phylogenomics, BGC phylogeny, and pangenomics were conducted within the closest genus, aiming to determine the taxonomic affiliation and differential characteristics of the Antarctic strain.
RESULTS AND DISCUSSION: The Antarctic strain So64.6b showed a closest identity with Sphingomonas alpina, however containing a significant genomic difference of ortholog cluster related to degradation multiple pollutants. Strain So64.6b had a total of six BGC, which were predicted with low to no similarity with other reported clusters; three were associated with potential novel antibiotic compounds using ARTS tool. Phylogenetic and synteny analysis of a common BGC showed great diversity between Sphingomonas genus but grouping in clades according to similar isolation environments, suggesting an evolution of BGCs that could be linked to the specific ecosystems. Comparative genomic analysis also showed that Sphingomonas species isolated from extreme environments had the greatest number of predicted BGCs and a higher percentage of genetic content devoted to BGCs than the isolates from mesophilic environments. In addition, some extreme-exclusive clusters were found related to oxidative and thermal stress adaptations, while pangenome analysis showed unique resistance genes on the Antarctic strain included in genetic islands. Altogether, our results showed the unique genetic content on Antarctic strain Sphingomonas sp. So64.6, -a probable new species of this genetically divergent genus-, which could have potentially novel antibiotic compounds acquired to cope with Antarctic poly-extreme conditions.}, }
@article {pmid36466658, year = {2022}, author = {Jesus, HNR and Rocha, DJPG and Ramos, RTJ and Silva, A and Brenig, B and Góes-Neto, A and Costa, MM and Soares, SC and Azevedo, V and Aguiar, ERGR and Martínez-Martínez, L and Ocampo, A and Alibi, S and Dorta, A and Pacheco, LGC and Navas, J}, title = {Pan-genomic analysis of Corynebacterium amycolatum gives insights into molecular mechanisms underpinning the transition to a pathogenic phenotype.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1011578}, pmid = {36466658}, issn = {1664-302X}, abstract = {Corynebacterium amycolatum is a nonlipophilic coryneform which is increasingly being recognized as a relevant human and animal pathogen showing multidrug resistance to commonly used antibiotics. However, little is known about the molecular mechanisms involved in transition from colonization to the MDR invasive phenotype in clinical isolates. In this study, we performed a comprehensive pan-genomic analysis of C. amycolatum, including 26 isolates from different countries. We obtained the novel genome sequences of 8 of them, which are multidrug resistant clinical isolates from Spain and Tunisia. They were analyzed together with other 18 complete or draft C. amycolatum genomes retrieved from GenBank. The species C. amycolatum presented an open pan-genome (α = 0.854905), with 3,280 gene families, being 1,690 (51.52%) in the core genome, 1,121 related to accessory genes (34.17%), and 469 related to unique genes (14.29%). Although some classic corynebacterial virulence factors are absent in the species C. amycolatum, we did identify genes associated with immune evasion, toxin, and antiphagocytosis among the predicted putative virulence factors. Additionally, we found genomic evidence for extensive acquisition of antimicrobial resistance genes through genomic islands.}, }
@article {pmid36466249, year = {2022}, author = {Park, J and Jung, H and Mannaa, M and Lee, SY and Lee, HH and Kim, N and Han, G and Park, DS and Lee, SW and Lee, SW and Seo, YS}, title = {Genome-guided comparative in planta transcriptome analyses for identifying cross-species common virulence factors in bacterial phytopathogens.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1030720}, pmid = {36466249}, issn = {1664-462X}, abstract = {Plant bacterial disease is a complex outcome achieved through a combination of virulence factors that are activated during infection. However, the common virulence factors across diverse plant pathogens are largely uncharacterized. Here, we established a pan-genome shared across the following plant pathogens: Burkholderia glumae, Ralstonia solanacearum, and Xanthomonas oryzae pv. oryzae. By overlaying in planta transcriptomes onto the pan-genome, we investigated the expression profiles of common genes during infection. We found over 70% of identical patterns for genes commonly expressed by the pathogens in different plant hosts or infection sites. Co-expression patterns revealed the activation of a signal transduction cascade to recognize and respond to external changes within hosts. Using mutagenesis, we uncovered a relationship between bacterial virulence and functions highly conserved and shared in the studied genomes of the bacterial phytopathogens, including flagellar biosynthesis protein, C4-dicarboxylate ABC transporter, 2-methylisocitrate lyase, and protocatechuate 3,4-dioxygenase (PCD). In particular, the disruption of PCD gene led to attenuated virulence in all pathogens and significantly affected phytotoxin production in B. glumae. This PCD gene was ubiquitously distributed in most plant pathogens with high homology. In conclusion, our results provide cross-species in planta models for identifying common virulence factors, which can be useful for the protection of crops against diverse pathogens.}, }
@article {pmid36466237, year = {2022}, author = {Tirnaz, S and Zandberg, J and Thomas, WJW and Marsh, J and Edwards, D and Batley, J}, title = {Application of crop wild relatives in modern breeding: An overview of resources, experimental and computational methodologies.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1008904}, pmid = {36466237}, issn = {1664-462X}, abstract = {Global agricultural industries are under pressure to meet the future food demand; however, the existing crop genetic diversity might not be sufficient to meet this expectation. Advances in genome sequencing technologies and availability of reference genomes for over 300 plant species reveals the hidden genetic diversity in crop wild relatives (CWRs), which could have significant impacts in crop improvement. There are many ex-situ and in-situ resources around the world holding rare and valuable wild species, of which many carry agronomically important traits and it is crucial for users to be aware of their availability. Here we aim to explore the available ex-/in- situ resources such as genebanks, botanical gardens, national parks, conservation hotspots and inventories holding CWR accessions. In addition we highlight the advances in availability and use of CWR genomic resources, such as their contribution in pangenome construction and introducing novel genes into crops. We also discuss the potential and challenges of modern breeding experimental approaches (e.g. de novo domestication, genome editing and speed breeding) used in CWRs and the use of computational (e.g. machine learning) approaches that could speed up utilization of CWR species in breeding programs towards crop adaptability and yield improvement.}, }
@article {pmid36466225, year = {2022}, author = {Ma, J and Wei, H and Yu, X and Lv, Y and Zhang, Y and Qian, Q and Shang, L and Guo, L}, title = {Compared analysis with a high-quality genome of weedy rice reveals the evolutionary game of de-domestication.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1065449}, pmid = {36466225}, issn = {1664-462X}, abstract = {The weedy rice (Oryza sativa f. spontanea) harbors large numbers of excellent traits and genetic diversities, which serves as a valuable germplasm resource and has been considered as a typical material for research about de-domestication. However, there are relatively few reference genomes on weedy rice that severely limit exploiting these genetic resources and revealing more details about de-domestication events. In this study, a high-quality genome (~376.4 Mb) of weedy rice A02 was assembled based on Nanopore ultra-long platform with a coverage depth of about 79.3× and 35,423 genes were predicted. Compared to Nipponbare genome, 5,574 structural variations (SVs) were found in A02. Based on super pan-genome graph, population SVs of 238 weedy rice and cultivated rice accessions were identified using public resequencing data. Furthermore, the de-domestication sites of weedy rice and domestication sites of wild rice were analyzed and compared based on SVs and single-nucleotide polymorphisms (SNPs). Interestingly, an average of 2,198 genes about de-domestication could only be found by F ST analysis based on SVs (SV-F ST) while not by F ST analysis based on SNPs (SNP-F ST) in divergent region. Additionally, there was a low overlap between domestication and de-domestication intervals, which demonstrated that two different mechanisms existed in these events. Our finding could facilitate pinpointing of the evolutionary events that had shaped the genomic architecture of wild, cultivated, and weedy rice, and provide a good foundation for cloning of the superior alleles for breeding.}, }
@article {pmid36461252, year = {2022}, author = {Xiang, X and Diao, E and Shang, Y and Song, M and He, Y}, title = {Rapid quantitative detection of Vibrio parahaemolyticus via high-fidelity target-based microfluidic identification.}, journal = {Food research international (Ottawa, Ont.)}, volume = {162}, number = {Pt A}, pages = {112032}, doi = {10.1016/j.foodres.2022.112032}, pmid = {36461252}, issn = {1873-7145}, mesh = {*Vibrio parahaemolyticus/genetics ; Microfluidics ; DNA Primers ; Excipients ; Food ; }, abstract = {With the rapid development of logistics, a growing number of pathogenic microorganisms has the means to spread worldwide using food as a carrier; thus, there is an urgent need to develop effective detection strategies to ensure food safety. By combining novel markers identified by pan-genome analysis and a digital recombinase-aided amplification (RAA) detection method based on a microfluidic chip, a strategy of high-fidelity target-based microfluidic identification (HFTMI) has been developed. Herein, a proof-of-concept study of HFTMI for rapid pathogen detection of V. parahaemolyticus was investigated. Specific primers designed for the gene group_41170 identified in the pan-genome analysis showed high sensitivity and a broad spectrum for the detection of V. parahaemolyticus. Different power systems were investigated to increase the partition rate on specifically designed chamber-based digital chips. The performance of HFTMI was greatly improved compared with qPCR. Collectively, this novel HFTMI system provides more reliable guidance for food safety testing.}, }
@article {pmid36461065, year = {2022}, author = {Marone, MP and Singh, HC and Pozniak, CJ and Mascher, M}, title = {A technical guide to TRITEX, a computational pipeline for chromosome-scale sequence assembly of plant genomes.}, journal = {Plant methods}, volume = {18}, number = {1}, pages = {128}, pmid = {36461065}, issn = {1746-4811}, abstract = {BACKGROUND: As complete and accurate genome sequences are becoming easier to obtain, more researchers wish to get one or more of them to support their research endeavors. Reliable and well-documented sequence assembly workflows find use in reference or pangenome projects.
RESULTS: We describe modifications to the TRITEX genome assembly workflow motivated by the rise of fast and easy long-read contig assembly of inbred plant genomes and the routine deployment of the toolchains in pangenome projects. New features include the use as surrogates of or complements to dense genetic maps and the introduction of user-editable tables to make the curation of contig placements easier and more intuitive.
CONCLUSION: Even maximally contiguous sequence assemblies of the telomere-to-telomere sort, and to a yet greater extent, the fragmented kind require validation, correction, and comparison to reference standards. As pangenomics is burgeoning, these tasks are bound to become more widespread and TRITEX is one tool to get them done. This technical guide is supported by a step-by-step computational tutorial accessible under https://tritexassembly.bitbucket.io/ . The TRITEX source code is hosted under this URL: https://bitbucket.org/tritexassembly .}, }
@article {pmid36454681, year = {2022}, author = {Prondzinsky, P and Toyoda, S and McGlynn, SE}, title = {The methanogen core and pangenome: conservation and variability across biology's growth temperature extremes.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {}, number = {}, pages = {}, doi = {10.1093/dnares/dsac048}, pmid = {36454681}, issn = {1756-1663}, abstract = {Temperature is a key variable in biological processes. However, a complete understanding of biological temperature adaptation is lacking, in part because of the unique constraints among different evolutionary lineages and physiological groups. Here we compared the genomes of cultivated psychrotolerant and thermotolerant methanogens, which are physiologically related and span growth temperatures from -2.5 °C to 122 °C. Despite being phylogenetically distributed amongst three phyla in the archaea, the methanogenic genome core comprises about one third of a given methanogen's genome, and the genome fraction shared by any two organisms decreases with increasing phylogenetic distance between them. Increased growth temperature is associated with reduced genome size, and thermotolerant organisms have larger core genome fractions, suggesting that genome reduction is governed by temperature rather than phylogeny. Thermotolerant methanogens are enriched in metal and other transporters, and psychrotolerant methanogens are enriched in proteins related to structure and motility. Observed amino acid compositional differences between temperature groups include proteome charge, polarity, and unfolding entropy. Our results suggest that in the methanogens, shared physiology maintains a large, conserved core even across large phylogenetic distances and biology's temperature extremes.}, }
@article {pmid36454044, year = {2022}, author = {Pham, HM and Le, DT and Le, LT and Chu, PTM and Tran, LH and Pham, TT and Nguyen, HM and Luu, TT and Hoang, H and Chu, HH}, title = {A highly quality genome sequence of Penicillium oxalicum species isolated from the root of Ixora chinensis in Vietnam.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkac300}, pmid = {36454044}, issn = {2160-1836}, abstract = {Penicillium oxalicum has been reported as a multienzyme producing fungus and is widely used in industry due to great potential for cellulase release. Until now, there are only ten available genome assemblies of P. oxalicum species deposited in the Genbank database. In this study, the genome of the I1R1 strain isolated from the root of Ixora chinensis was completely sequenced by Pacbio Sequel sequencing technology, assembled into eight chromosomes with the genome size of 30.8 Mb, as well as a mitogenome of 26 Kb. The structural and functional analyses of the I1R1 genome revealed gene model annotations encoding an enzyme set involved in significant metabolic processes, along with cytochrome P450s and secondary metabolite biosynthesis. The comparative analysis of the P. oxalicum species based on orthology and gene family duplications indicated their large and closed pan genome of 9,500 orthologous groups. This is valuable data for future phylogenetic and population genomics studies.}, }
@article {pmid36453992, year = {2022}, author = {Rabanal, FA and Gräff, M and Lanz, C and Fritschi, K and Llaca, V and Lang, M and Carbonell-Bejerano, P and Henderson, I and Weigel, D}, title = {Pushing the limits of HiFi assemblies reveals centromere diversity between two Arabidopsis thaliana genomes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkac1115}, pmid = {36453992}, issn = {1362-4962}, abstract = {Although long-read sequencing can often enable chromosome-level reconstruction of genomes, it is still unclear how one can routinely obtain gapless assemblies. In the model plant Arabidopsis thaliana, other than the reference accession Col-0, all other accessions de novo assembled with long-reads until now have used PacBio continuous long reads (CLR). Although these assemblies sometimes achieved chromosome-arm level contigs, they inevitably broke near the centromeres, excluding megabases of DNA from analysis in pan-genome projects. Since PacBio high-fidelity (HiFi) reads circumvent the high error rate of CLR technologies, albeit at the expense of read length, we compared a CLR assembly of accession Eyach15-2 to HiFi assemblies of the same sample. The use of five different assemblers starting from subsampled data allowed us to evaluate the impact of coverage and read length. We found that centromeres and rDNA clusters are responsible for 71% of contig breaks in the CLR scaffolds, while relatively short stretches of GA/TC repeats are at the core of >85% of the unfilled gaps in our best HiFi assemblies. Since the HiFi technology consistently enabled us to reconstruct gapless centromeres and 5S rDNA clusters, we demonstrate the value of the approach by comparing these previously inaccessible regions of the genome between the Eyach15-2 accession and the reference accession Col-0.}, }
@article {pmid36453910, year = {2022}, author = {Belloso Daza, MV and Almeida-Santos, AC and Novais, C and Read, A and Alves, V and Cocconcelli, PS and Freitas, AR and Peixe, L}, title = {Distinction between Enterococcus faecium and Enterococcus lactis by a gluP PCR-Based Assay for Accurate Identification and Diagnostics.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0326822}, doi = {10.1128/spectrum.03268-22}, pmid = {36453910}, issn = {2165-0497}, abstract = {It was recently proposed that Enterococcus faecium colonizing the human gut (previous clade B) actually corresponds to Enterococcus lactis. Our goals were to develop a PCR assay to rapidly differentiate these species and to discuss the main phenotypic and genotypic differences from a clinical perspective. The pan-genome of 512 genomes of E. faecium and E. lactis strains was analyzed to assess diversity in genes between the two species. Sequences were aligned to find the best candidate gene for designing species-specific primers, and their accuracy was tested with a collection of 382 enterococci. E. lactis isolates from clinical origins were further characterized by whole-genome sequencing (Illumina). Pan-genome analysis resulted in 12 gene variants, with gene gluP (rhomboid protease) being selected as the candidate for species differentiation. The nucleotide sequence of gluP diverged by 90 to 92% between sets, which allowed species identification through PCR with 100% specificity and no cross-reactivity. E. lactis strains were greatly pan-susceptible and not host specific. Hospital E. lactis isolates were susceptible to clinically relevant antibiotics, lacked infection-associated virulence markers, and were associated with patients presenting risk factors for enhanced bacterial translocation. Here, we propose a PCR-based assay using gluP for easy routine differentiation between E. faecium and E. lactis that could be implemented in different public health contexts. We further suggest that E. lactis, a dominant human gut species, can cross the gut barrier in severely ill, immunodeficient, and surgical patients. Knowing that bacterial translocation may be a sepsis promoter, the relevance of infections caused by E. lactis strains, even if they are pan-susceptible, should be explored. IMPORTANCE Enterococcus faecium is a WHO priority pathogen that causes severe and hard-to-treat human infections. It was recently proposed that E. faecium colonizing the human gut (previous clade B) actually corresponds to Enterococcus lactis; therefore, some of the human infections occurring globally are being misidentified. In this work, we developed a PCR-based rapid identification method for the differentiation of E. faecium and E. lactis and discussed the main phenotypic and genotypic differences of these species from a clinical perspective. We identified the gluP gene as the best candidate, based on the phylogenomic analysis of 512 published pan-genomes, and validated the PCR assay with a comprehensive collection of 382 enterococci obtained from different sources. Further detailed analysis of clinical E. lactis strains showed that they are highly susceptible to antibiotics and lack the typical virulence markers of E. faecium but are able to cause severe human infections in immunosuppressed patients, possibly in part due to gut barrier translocation.}, }
@article {pmid36451103, year = {2022}, author = {Sarkar, S and Kamke, A and Ward, K and Hartung, E and Ran, Q and Feehan, B and Galliart, M and Jumpponen, A and Johnson, L and Lee, STM}, title = {Pseudomonas cultivated from Andropogon gerardii rhizosphere show functional potential for promoting plant host growth and drought resilience.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {784}, pmid = {36451103}, issn = {1471-2164}, mesh = {*Andropogon ; Rhizosphere ; Droughts ; Pseudomonas ; Phylogeny ; *Poa ; Nitrogen ; Nitrate Reductases ; }, abstract = {BACKGROUND: Climate change will result in more frequent droughts that can impact soil-inhabiting microbiomes (rhizobiomes) in the agriculturally vital North American perennial grasslands. Rhizobiomes have contributed to enhancing drought resilience and stress resistance properties in plant hosts. In the predicted events of more future droughts, how the changing rhizobiome under environmental stress can impact the plant host resilience needs to be deciphered. There is also an urgent need to identify and recover candidate microorganisms along with their functions, involved in enhancing plant resilience, enabling the successful development of synthetic communities.
RESULTS: In this study, we used the combination of cultivation and high-resolution genomic sequencing of bacterial communities recovered from the rhizosphere of a tallgrass prairie foundation grass, Andropogon gerardii. We cultivated the plant host-associated microbes under artificial drought-induced conditions and identified the microbe(s) that might play a significant role in the rhizobiome of Andropogon gerardii under drought conditions. Phylogenetic analysis of the non-redundant metagenome-assembled genomes (MAGs) identified a bacterial genome of interest - MAG-Pseudomonas. Further metabolic pathway and pangenome analyses recovered genes and pathways related to stress responses including ACC deaminase; nitrogen transformation including assimilatory nitrate reductase in MAG-Pseudomonas, which might be associated with enhanced drought tolerance and growth for Andropogon gerardii.
CONCLUSIONS: Our data indicated that the metagenome-assembled MAG-Pseudomonas has the functional potential to contribute to the plant host's growth during stressful conditions. Our study also suggested the nitrogen transformation potential of MAG-Pseudomonas that could impact Andropogon gerardii growth in a positive way. The cultivation of MAG-Pseudomonas sets the foundation to construct a successful synthetic community for Andropogon gerardii. To conclude, stress resilience mediated through genes ACC deaminase, nitrogen transformation potential through assimilatory nitrate reductase in MAG-Pseudomonas could place this microorganism as an important candidate of the rhizobiome aiding the plant host resilience under environmental stress. This study, therefore, provided insights into the MAG-Pseudomonas and its potential to optimize plant productivity under ever-changing climatic patterns, especially in frequent drought conditions.}, }
@article {pmid36449159, year = {2023}, author = {Groza, C and Bourque, G and Goubert, C}, title = {A Pangenome Approach to Detect and Genotype TE Insertion Polymorphisms.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2607}, number = {}, pages = {85-94}, pmid = {36449159}, issn = {1940-6029}, mesh = {Humans ; *DNA Transposable Elements/genetics ; Genotype ; *Polymorphism, Genetic ; Haplotypes ; Genome, Human ; }, abstract = {Pangenome graphs are flexible data structures that contain the genetic variation that exists in a population of genomes and describe the sequences of the many possible ensuing haplotypes. Here, we use such a pangenome graph to represent and genotype transposable element (TE) polymorphisms. By combining the transposable element annotation (Alus, L1s, and SVAs) of the human genome reference with novel transposable element insertions observed in two high-quality assemblies (HG002 and HG00733), we show how to create a transposable element pangenome that consists of ~1.2 million reference and 2939 non-reference transposable elements. We then demonstrate this approach by aligning short-read sequencing data and genotyping transposable element deletions and insertions with reasonable specificity and sensitivity (0.85 F1-score).}, }
@article {pmid36448683, year = {2022}, author = {Garrison, E and Guarracino, A}, title = {Unbiased pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btac743}, pmid = {36448683}, issn = {1367-4811}, abstract = {MOTIVATION: Pangenome variation graphs model the mutual alignment of collections of DNA sequences. A set of pairwise alignments implies a variation graph, but there are no scalable methods to generate such a graph from these alignments. Existing related approaches depend on a single reference, a specific ordering of genomes, or a de Bruijn model based on a fixed k-mer length. A scalable, self-contained method to build pangenome graphs without such limitations would be a key step in pangenome construction and manipulation pipelines.
RESULTS: We design the seqwish algorithm, which builds a variation graph from a set of sequences and alignments between them. We first transform the alignment set into an implicit interval tree. To build up the variation graph, we query this tree-based representation of the alignments to reduce transitive matches into single DNA segments in a sequence graph. By recording the mapping from input sequence to output graph, we can trace the original paths through this graph, yielding a pangenome variation graph. We present an implementation that operates in external memory, using disk-backed data structures and lock-free parallel methods to drive the core graph induction step. We demonstrate that our method scales to very large graph induction problems by applying it to build pangenome graphs for several species.
AVAILABILITY: seqwish is published as free software under the MIT open source license. Source code and documentation are available at https://github.com/ekg/seqwish. seqwish can be installed via Bioconda https://bioconda.github.io/recipes/seqwish/README.html or GNU Guix https://github.com/ekg/guix-genomics/blob/master/seqwish.scm.}, }
@article {pmid36447475, year = {2022}, author = {Moniruzzaman, M and Erazo-Garcia, MP and Aylward, FO}, title = {Endogenous giant viruses contribute to intraspecies genomic variability in the model green alga Chlamydomonas reinhardtii.}, journal = {Virus evolution}, volume = {8}, number = {2}, pages = {veac102}, pmid = {36447475}, issn = {2057-1577}, abstract = {Chlamydomonas reinhardtii is a unicellular eukaryotic alga that has been studied as a model organism for decades. Despite an extensive history as a model system, phylogenetic and genetic characteristics of viruses infecting this alga have remained elusive. We analyzed high-throughput genome sequence data of C. reinhardtii field isolates, and in six we discovered sequences belonging to endogenous giant viruses that reach up to several 100 kb in length. In addition, we have also discovered the entire genome of a closely related giant virus that is endogenized within the genome of Chlamydomonas incerta, the closest sequenced relative of C. reinhardtii. Endogenous giant viruses add hundreds of new gene families to the host strains, highlighting their contribution to the pangenome dynamics and interstrain genomic variability of C. reinhardtii. Our findings suggest that the endogenization of giant viruses may have important implications for structuring the population dynamics and ecology of protists in the environment.}, }
@article {pmid36445094, year = {2022}, author = {Yu, Y and Cheng, W and Chen, X and Guo, Q and Cao, H}, title = {Cyanobacterial Blooms Are Not a Result of Positive Selection by Freshwater Eutrophication.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0319422}, doi = {10.1128/spectrum.03194-22}, pmid = {36445094}, issn = {2165-0497}, abstract = {Long-standing cyanobacterial harmful algal blooms (CyanoHABs) are known to result from synergistic interaction between elevated nutrients and superior ecophysiology of cyanobacteria. However, it remains to be determined whether CyanoHABs are a result of positive selection by eutrophic waters. To address this, we conducted molecular evolutionary analyses on the genomes of 9 bloom-forming cyanobacteria, combined with pangenomics and metatranscriptomics. The results showed no positive selection by water eutrophication. Instead, all homologous genes in the species are under strong purifying selection based on the ratio of divergence at nonsynonymous and synonymous sites (dN/dS) and phylogeny. The dN/dS < 0.85 (median = 0.3) for all homologous genes are similar between the genes in the pathways driving CyanoHABs and housekeeping functions. Phylogenetic support for non-positive selection comes from the mixed clustering of strains: strains of the same species from diverse geographic origins form the same clusters, while strains from the same origins form different clusters. Further support lies in the codon adaptation index (CAI) and single nucleotide polymorphism (SNP). The CAI ranged from 0.42 to 0.9 (mean = 0.75), which indicates high-level codon usage bias; the pathways for CyanoHABs and housekeeping functions showed a similar CAI. Interestingly, CAI was negatively correlated with gene expression in 3 metatranscriptomes. The numbers of SNPs were concentrated around 5 to 50. As the SNP number increases, the gene expression level decreases. These negative correlations agree with the population-level dN/dS and phylogeny in supporting purifying selection in bloom-forming cyanobacteria. In summary, superior ecophysiology appears to be acquired prior to water eutrophication. IMPORTANCE CyanoHABs are global environmental hazards, and their mechanisms of action are being intensively investigated. On an ecological scale, CyanoHABs are consequences of synergistic interactions between biological functions and elevated nutrients in eutrophic waters. On an evolutionary scale, one important question is how bloom-forming cyanobacteria acquire these superior biological functions. There are several possibilities, including adaptive evolution and horizontal gene transfer. Here, we explored the possibility of positive selection. We reasoned that there are two possible periods for cyanobacteria to acquire these functions: before the onset of water eutrophication or during water eutrophication. Either way, there should be molecular signatures in protein sequences for positive selection. Interestingly, we found no positive selection by water eutrophication, but strong purifying selection instead on nearly all the genes, suggesting these superior functions aiding CyanoHABs are acquired prior to water eutrophication.}, }
@article {pmid36445082, year = {2022}, author = {Cheng, S and Fleres, G and Chen, L and Liu, G and Hao, B and Newbrough, A and Driscoll, E and Shields, RK and Squires, KM and Chu, TY and Kreiswirth, BN and Nguyen, MH and Clancy, CJ}, title = {Within-Host Genotypic and Phenotypic Diversity of Contemporaneous Carbapenem-Resistant Klebsiella pneumoniae from Blood Cultures of Patients with Bacteremia.}, journal = {mBio}, volume = {}, number = {}, pages = {e0290622}, doi = {10.1128/mbio.02906-22}, pmid = {36445082}, issn = {2150-7511}, abstract = {It is unknown whether bacterial bloodstream infections (BSIs) are commonly caused by single organisms or mixed microbial populations. We hypothesized that contemporaneous carbapenem-resistant Klebsiella pneumoniae (CRKP) strains from blood cultures of individual patients are genetically and phenotypically distinct. We determined short-read whole-genome sequences of 10 sequence type 258 (ST258) CRKP strains from blood cultures in each of 6 patients (Illumina HiSeq). Strains clustered by patient by core genome and pan-genome phylogeny. In 5 patients, there was within-host strain diversity by gene mutations, presence/absence of antibiotic resistance or virulence genes, and/or plasmid content. Accessory gene phylogeny revealed strain diversity in all 6 patients. Strains from 3 patients underwent long-read sequencing for genome completion (Oxford Nanopore) and phenotypic testing. Genetically distinct strains within individuals exhibited significant differences in carbapenem and other antibiotic responses, capsular polysaccharide (CPS) production, mucoviscosity, and/or serum killing. In 2 patients, strains differed significantly in virulence during mouse BSIs. Genetic or phenotypic diversity was not observed among strains recovered from blood culture bottles seeded with index strains from the 3 patients and incubated in vitro at 37°C. In conclusion, we identified genotypic and phenotypic variant ST258 CRKP strains from blood cultures of individual patients with BSIs, which were not detected by the clinical laboratory or in seeded blood cultures. The data suggest a new paradigm of CRKP population diversity during BSIs, at least in some patients. If validated for BSIs caused by other bacteria, within-host microbial diversity may have implications for medical, microbiology, and infection prevention practices and for understanding antibiotic resistance and pathogenesis. IMPORTANCE The long-standing paradigm for pathogenesis of bacteremia is that, in most cases, a single organism passes through a bottleneck and establishes itself in the bloodstream (single-organism hypothesis). In keeping with this paradigm, standard practice in processing positive microbiologic cultures is to test single bacterial strains from morphologically distinct colonies. This study is the first genome-wide analysis of within-host diversity of Klebsiella pneumoniae strains recovered from individual patients with bloodstream infections (BSIs). Our finding that positive blood cultures comprised genetically and phenotypically heterogeneous carbapenem-resistant K. pneumoniae strains challenges the single-organism hypothesis and suggests that at least some BSIs are caused by mixed bacterial populations that are unrecognized by the clinical laboratory. The data support a model of pathogenesis in which pressures in vivo select for strain variants with particular antibiotic resistance or virulence attributes and raise questions about laboratory protocols and treatment decisions directed against single strains.}, }
@article {pmid36445077, year = {2022}, author = {Conde, C and Thézé, J and Cochard, T and Rossignol, MN and Fourichon, C and Delafosse, A and Joly, A and Guatteo, R and Schibler, L and Bannantine, JP and Biet, F}, title = {Genetic Features of Mycobacterium avium subsp. paratuberculosis Strains Circulating in the West of France Deciphered by Whole-Genome Sequencing.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0339222}, doi = {10.1128/spectrum.03392-22}, pmid = {36445077}, issn = {2165-0497}, abstract = {Paratuberculosis is a chronic infection of the intestine, mainly the ileum, caused by Mycobacterium avium subsp. paratuberculosis in cattle and other ruminants. This enzootic disease is present worldwide and has a negative impact on the dairy cattle industry. For this subspecies, the current genotyping tools do not provide the needed resolution to investigate the genetic diversity of closely related strains. These limitations can be overcome by the application of whole-genome sequencing (WGS), particularly for clonal populations such as M. avium subsp. paratuberculosis. The purpose of the present study was to undertake a WGS analysis with a panel of 200 animal field M. avium subsp. paratuberculosis strains selected based on a previous large-scale longitudinal study of Prim'Holstein and Normande dairy breeds naturally infected with M. avium subsp. paratuberculosis in the West of France. The pangenome analysis revealed that M. avium subsp. paratuberculosis has a closed pangenome. The phylogeny, based on alignment of 2,786 nonhomoplasic single nucleotide polymorphisms (SNPs), showed that the strain population is structured into three clades independently of the cattle breed or geographic distribution. The increased resolution of phylogeny obtained by WGS confirmed the homoplasic nature of the markers variable-number tandem repeat (VNTR) and short sequence repeat (SSR) used for M. avium subsp. paratuberculosis genotyping. These phylogenetic data also revealed independent introductions of the different genotypes in two main waves since at least 2003. WGS applied to this sampling demonstrated the presence of mixed infections in herds and at the individual animal level. Collectively, the phylogeny results inferred with French isolates compared to M. avium subsp. paratuberculosis isolates from around the world suggest introductions of M. avium subsp. paratuberculosis genotypes through the animal trade. Relationships between genetic traits and epidemiological data can now be investigated to better understand transmission dynamics of the disease. IMPORTANCE Mycobacterium avium subsp. paratuberculosis causes Johne's disease in ruminants, which is present worldwide and has significant negative impacts on the dairy cattle industry and animal welfare. Prevention and control of M. avium subsp. paratuberculosis infection are hampered by knowledge gaps in strain virulence, genotype distribution, and transmission dynamics. This work has revealed new insights into M. avium subsp. paratuberculosis strains currently circulating in western France and how they are related to strains circulating globally. We applied whole-genome sequencing (WGS) to obtain comprehensive information on genome evolution and discrimination of closely related strains. This approach revealed the history of M. avium subsp. paratuberculosis infection in France, refined the pangenomic characteristics of M. avium subsp. paratuberculosis, and demonstrated the existence of mixed infection in animals. Finally, this study identified predominant genotypes, which allow a better understanding of disease transmission dynamics. This information will facilitate tracking of this pathogen on farms and across agricultural regions, thus informing transmission pathways and disease control points.}, }
@article {pmid36437921, year = {2022}, author = {Singh, V and Pandey, S and Bhardwaj, A}, title = {From the reference human genome to human pangenome: Premise, promise and challenge.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1042550}, pmid = {36437921}, issn = {1664-8021}, abstract = {The Reference Human Genome remains the single most important resource for mapping genetic variations and assessing their impact. However, it is monophasic, incomplete and not representative of the variation that exists in the population. Given the extent of ethno-geographic diversity and the consequent diversity in clinical manifestations of these variations, population specific references were developed overtime. The dramatically plummeting cost of sequencing whole genomes and the advent of third generation long range sequencers allowing accurate, error free, telomere-to-telomere assemblies of human genomes present us with a unique and unprecedented opportunity to develop a more composite standard reference consisting of a collection of multiple genomes that capture the maximal variation existing in the population, with the deepest annotation possible, enabling a realistic, reliable and actionable estimation of clinical significance of specific variations. The Human Pangenome Project thus is a logical next step promising a more accurate and global representation of genomic variations. The pangenome effort must be reciprocally complemented with precise variant discovery tools and exhaustive annotation to ensure unambiguous clinical assessment of the variant in ethno-geographical context. Here we discuss a broad roadmap, the challenges and way forward in developing a universal pangenome reference including data visualization techniques and integration of prior knowledge base in the new graph based architecture and tools to submit, compare, query, annotate and retrieve relevant information from the pangenomes. The biggest challenge, however, will be the ethical, legal and social implications and the training of human resource to the new reference paradigm.}, }
@article {pmid36436132, year = {2022}, author = {Zoaiter, M and Magdy Wasfy, R and Caputo, A and Fenollar, F and Zeaiter, Z and Fournier, PE and Houhamdi, L}, title = {Streptococcus bouchesdurhonensis sp. nov. isolated from a bronchoalveolar lavage of a patient with pneumonia.}, journal = {Archives of microbiology}, volume = {205}, number = {1}, pages = {3}, pmid = {36436132}, issn = {1432-072X}, mesh = {Humans ; Aged ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; *Genome, Bacterial ; DNA, Bacterial/genetics ; Streptococcus/genetics ; Bronchoalveolar Lavage ; *Pneumonia/genetics ; }, abstract = {Strain Marseille-Q6994 was isolated from a 72-year-old patient with pneumonia from Bouches-du-Rhône department, in France. Cells were Gram positive, non-motile, catalase and oxidase-negative cocci. The major fatty acids were hexadecanoic (47.4%) and tetradecanoic acids (28.3%). 16S rRNA gene sequence comparison suggested that strain Marseille-Q6994 was affiliated to the Streptococcus genus. GroEL phylogenetic analysis separated strain Marseille-Q6994 in a distinct branch from the closely related Streptococcus-type strains with standing in nomenclature. Whole genome sequencing-based methods (OrthoAverage Nucleotide Identity, digital DNA-DNA hybridization and pangenome analysis) supported the classification of the strain into a novel species. Therefore, based on the phenotypic, genomic, and phylogenetic analyses, we propose the name Streptococcus bouchesdurhonensis sp. nov for which strain Marseille-Q6994[T] (CSUR Marseille-Q6994 = DSMZ 113892) is the type strain.}, }
@article {pmid36432770, year = {2022}, author = {Jha, UC and Nayyar, H and von Wettberg, EJB and Naik, YD and Thudi, M and Siddique, KHM}, title = {Legume Pangenome: Status and Scope for Crop Improvement.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {22}, pages = {}, doi = {10.3390/plants11223041}, pmid = {36432770}, issn = {2223-7747}, abstract = {In the last decade, legume genomics research has seen a paradigm shift due to advances in genome sequencing technologies, assembly algorithms, and computational genomics that enabled the construction of high-quality reference genome assemblies of major legume crops. These advances have certainly facilitated the identification of novel genetic variants underlying the traits of agronomic importance in many legume crops. Furthermore, these robust sequencing technologies have allowed us to study structural variations across the whole genome in multiple individuals and at the species level using 'pangenome analysis.' This review updates the progress of constructing pangenome assemblies for various legume crops and discusses the prospects for these pangenomes and how to harness the information to improve various traits of economic importance through molecular breeding to increase genetic gain in legumes and tackle the increasing global food crisis.}, }
@article {pmid36429532, year = {2022}, author = {Almuhayawi, MS and Al Jaouni, SK and Selim, S and Alkhalifah, DHM and Marc, RA and Aslam, S and Poczai, P}, title = {Integrated Pangenome Analysis and Pharmacophore Modeling Revealed Potential Novel Inhibitors against Enterobacter xiangfangensis.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {22}, pages = {}, doi = {10.3390/ijerph192214812}, pmid = {36429532}, issn = {1660-4601}, mesh = {*Bacterial Proteins/genetics/metabolism ; *Enterobacter/genetics/metabolism ; Genome, Bacterial ; Uridine Diphosphate ; }, abstract = {Enterobacter xiangfangensis is a novel, multidrug-resistant pathogen belonging to the Enterobacter genus and has the ability to acquire resistance to multiple antibiotic classes. However, there is currently no registered E. xiangfangensis drug on the market that has been shown to be effective. Hence, there is an urgent need to identify novel therapeutic targets and effective treatments for E. xiangfangensis. In the current study, a bacterial pan genome analysis and subtractive proteomics approach was employed to the core proteomes of six strains of E. xiangfangensis using several bioinformatic tools, software, and servers. However, 2611 nonredundant proteins were predicted from the 21,720 core proteins of core proteome. Out of 2611 nonredundant proteins, 372 were obtained from Geptop2.0 as essential proteins. After the subtractive proteomics and subcellular localization analysis, only 133 proteins were found in cytoplasm. All cytoplasmic proteins were examined using BLASTp against the virulence factor database, which classifies 20 therapeutic targets as virulent. Out of these 20, 3 cytoplasmic proteins: ferric iron uptake transcriptional regulator (FUR), UDP-2,3diacylglucosamine diphosphatase (UDP), and lipid-A-disaccharide synthase (lpxB) were chosen as potential drug targets. These drug targets are important for bacterial survival, virulence, and growth and could be used as therapeutic targets. More than 2500 plant chemicals were used to molecularly dock these proteins. Furthermore, the lowest-binding energetic docked compounds were found. The top five hit compounds, Adenine, Mollugin, Xanthohumol C, Sakuranetin, and Toosendanin demonstrated optimum binding against all three target proteins. Furthermore, molecular dynamics simulations and MM/GBSA analyses validated the stability of ligand-protein complexes and revealed that these compounds could serve as potential E. xiangfangensis replication inhibitors. Consequently, this study marks a significant step forward in the creation of new and powerful drugs against E. xiangfangensis. Future studies should validate these targets experimentally to prove their function in E. xiangfangensis survival and virulence.}, }
@article {pmid36427110, year = {2022}, author = {González-Castillo, A and Carballo, JL and Bautista-Guerrero, E}, title = {Genomics, Phylogeny, and in Silico Phenotyping of Nitrosopumilus Genus.}, journal = {Current microbiology}, volume = {80}, number = {1}, pages = {3}, pmid = {36427110}, issn = {1432-0991}, mesh = {Animals ; Phylogeny ; *Genomics ; Archaea ; *Porifera ; Multilocus Sequence Typing ; }, abstract = {The present study reports the first genome of Nitrosopumilus extracted from the marine sponge Thoosa mismalolli. The genomic study of Nitrosopumilus genus using seven genomes type strains (N. maritimus, N. piranensis, N. zosterae, N. ureiphilus, N. adriaticus, N. oxyclinae and N. cobalaminigenes), four genomes Candidatus species (Ca. N. koreensis, Ca. N. sp. AR2, Ca. N. salaria BD31, and SZUA-335), and six reference genomes (SI075, SI0036, SI0060, SI0034, SI0048, and bin36o) isolated from marine sponge, a tropical marine fish tank, dimly lit deep coastal waters, the lower euphotic zone of coastal waters, near-surface sediment, and MAG N. sp NMAG03 isolated from Thoosa mismalolli was performed. These genomes were characterized by means of a polyphasic approach comprising multilocus sequence analysis (MLSA) of 139 single-copy genes (SCG), core-pangenome, ANI, and in silico phenotypic characterization. We found that the genomes of the Nitrosopumilus genus formed three separate clusters (A, B, and C) based in 139 SCG sequence similarity. The genomes showed values between 75.2 and 99.5% for ANI, the core genome consisted of 168 gene families and the pangenome of 6,011 gene families. Based on the genomic analyses performed, the cluster A may contain a potential new species (NMAG03), and the cluster C could be represented by three new species of the genus. Finally, based on the results shown in this polyphasic approach, we support the use of the integrated approach for genomic analysis of poorly studied genera.}, }
@article {pmid36425027, year = {2022}, author = {Gtari, M}, title = {Taxogenomic status of phylogenetically distant Frankia clusters warrants their elevation to the rank of genus: A description of Protofrankia gen. nov., Parafrankia gen. nov., and Pseudofrankia gen. nov. as three novel genera within the family Frankiaceae.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1041425}, pmid = {36425027}, issn = {1664-302X}, abstract = {The genus Frankia is at present the sole genus in the family Frankiaceae and encompasses filamentous, sporangia-forming actinomycetes principally isolated from root nodules of taxonomically disparate dicotyledonous hosts named actinorhizal plants. Multiple independent phylogenetic analyses agree with the division of the genus Frankia into four well-supported clusters. Within these clusters, Frankia strains are well defined based on host infectivity range, mode of infection, morphology, and their behaviour in culture. In this study, phylogenomics, overall genome related indices (OGRI), together with available data sets for phenotypic and host-plant ranges available for the type strains of Frankia species, were considered. The robustness and the deep radiation observed in Frankia at the subgeneric level, fulfilling the primary principle of phylogenetic systematics, were strengthened by establishing genome criteria for new genus demarcation boundaries. Therefore, the taxonomic elevation of the Frankia clusters to the rank of the genus is proposed. The genus Frankia should be revised to encompass cluster 1 species only and three novel genera, Protofrankia gen. nov., Parafrankia gen. nov., and Pseudofrankia gen. nov., are proposed to accommodate clusters 2, 3, and 4 species, respectively. New combinations for validly named species are also provided.}, }
@article {pmid36423113, year = {2022}, author = {Swetha, RG and Basu, S and Ramaiah, S and Anbarasu, A}, title = {Multi-Epitope Vaccine for Monkeypox Using Pan-Genome and Reverse Vaccinology Approaches.}, journal = {Viruses}, volume = {14}, number = {11}, pages = {}, pmid = {36423113}, issn = {1999-4915}, mesh = {Child ; Humans ; Vaccinology ; *Monkeypox ; Molecular Docking Simulation ; Epitopes, B-Lymphocyte ; *Vaccines ; }, abstract = {Outbreaks of monkeypox virus infections have imposed major health concerns worldwide, with high morbidity threats to children and immunocompromised adults. Although repurposed drugs and vaccines are being used to curb the disease, the evolving traits of the virus, exhibiting considerable genetic dynamicity, challenge the limits of a targeted treatment. A pan-genome-based reverse vaccinology approach can provide fast and efficient solutions to resolve persistent inconveniences in experimental vaccine design during an outbreak-exigency. The approach encompassed screening of available monkeypox whole genomes (n = 910) to identify viral targets. From 102 screened viral targets, viral proteins L5L, A28, and L5 were finalized based on their location, solubility, and antigenicity. The potential T-cell and B-cell epitopes were extracted from the proteins using immunoinformatics tools and algorithms. Multiple vaccine constructs were designed by combining the epitopes. Based on immunological properties, chemical stability, and structural quality, a novel multi-epitopic vaccine construct, V4, was finalized. Flexible-docking and coarse-dynamics simulation portrayed that the V4 had high binding affinity towards human HLA-proteins (binding energy < -15.0 kcal/mol) with low conformational fluctuations (<1 Å). Thus, the vaccine construct (V4) may act as an efficient vaccine to induce immunity against monkeypox, which encourages experimental validation and similar approaches against emerging viral infections.}, }
@article {pmid36421834, year = {2022}, author = {Jalil, M and Quddos, F and Anwer, F and Nasir, S and Rahman, A and Alharbi, M and Alshammari, A and Alshammari, HK and Ali, A}, title = {Comparative Pan-Genomic Analysis Revealed an Improved Multi-Locus Sequence Typing Scheme for Staphylococcus aureus.}, journal = {Genes}, volume = {13}, number = {11}, pages = {}, pmid = {36421834}, issn = {2073-4425}, mesh = {Humans ; Multilocus Sequence Typing/methods ; Staphylococcus aureus/genetics ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Phylogeny ; *Staphylococcal Infections/epidemiology ; Genomics ; }, abstract = {The growing prevalence of antibiotic-resistant Staphylococcus aureus strains mandates selective susceptibility testing and epidemiological investigations. It also draws attention to an efficient typing strategy. Whole genome sequencing helps in genetic comparison, strain differentiation, and typing; however, it is not that cost-effective. In comparison, Multi-Locus Sequence Typing (MLST) is an efficient typing method employed for bacterial strain typing and characterizations. In this paper, a comprehensive pangenome and phylogenetic analysis of 502/1279 S. aureus genomes is carried out to understand the species divergence. Additionally, the current Multi-Locus Sequence Typing (MLST) scheme was evaluated, and genes were excluded or substituted by alternative genes based on reported shortcomings, genomic data, and statistical scores calculated. The data generated were helpful in devising a new Multi-Locus Sequence Typing (MLST) scheme for the efficient typing of S. aureus strains. The revised scheme is now a blend of previously used genes and new candidate genes. The genes yQil, aroE, and gmk are replaced with better gene candidates, opuCC, aspS, and rpiB, based on their genome localization, representation, and statistical scores. Therefore, the proposed Multi-Locus Sequence Typing (MLST) method offers a greater resolution with 58 sequence types (STs) in comparison to the prior scheme's 42 STs.}, }
@article {pmid36420896, year = {2022}, author = {Frankish, A and Carbonell-Sala, S and Diekhans, M and Jungreis, I and Loveland, JE and Mudge, JM and Sisu, C and Wright, JC and Arnan, C and Barnes, I and Banerjee, A and Bennett, R and Berry, A and Bignell, A and Boix, C and Calvet, F and Cerdán-Vélez, D and Cunningham, F and Davidson, C and Donaldson, S and Dursun, C and Fatima, R and Giorgetti, S and Giron, CG and Gonzalez, JM and Hardy, M and Harrison, PW and Hourlier, T and Hollis, Z and Hunt, T and James, B and Jiang, Y and Johnson, R and Kay, M and Lagarde, J and Martin, FJ and Gómez, LM and Nair, S and Ni, P and Pozo, F and Ramalingam, V and Ruffier, M and Schmitt, BM and Schreiber, JM and Steed, E and Suner, MM and Sumathipala, D and Sycheva, I and Uszczynska-Ratajczak, B and Wass, E and Yang, YT and Yates, A and Zafrulla, Z and Choudhary, JS and Gerstein, M and Guigo, R and Hubbard, TJP and Kellis, M and Kundaje, A and Paten, B and Tress, ML and Flicek, P}, title = {GENCODE: reference annotation for the human and mouse genomes in 2023.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkac1071}, pmid = {36420896}, issn = {1362-4962}, support = {U41HG007234/NH/NIH HHS/United States ; WT222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; }, abstract = {GENCODE produces high quality gene and transcript annotation for the human and mouse genomes. All GENCODE annotation is supported by experimental data and serves as a reference for genome biology and clinical genomics. The GENCODE consortium generates targeted experimental data, develops bioinformatic tools and carries out analyses that, along with externally produced data and methods, support the identification and annotation of transcript structures and the determination of their function. Here, we present an update on the annotation of human and mouse genes, including developments in the tools, data, analyses and major collaborations which underpin this progress. For example, we report the creation of a set of non-canonical ORFs identified in GENCODE transcripts, the LRGASP collaboration to assess the use of long transcriptomic data to build transcript models, the progress in collaborations with RefSeq and UniProt to increase convergence in the annotation of human and mouse protein-coding genes, the propagation of GENCODE across the human pan-genome and the development of new tools to support annotation of regulatory features by GENCODE. Our annotation is accessible via Ensembl, the UCSC Genome Browser and https://www.gencodegenes.org.}, }
@article {pmid36420160, year = {2022}, author = {Tripodi, P}, title = {Next generation sequencing technologies to explore the diversity of germplasm resources: Achievements and trends in tomato.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {6250-6258}, pmid = {36420160}, issn = {2001-0370}, abstract = {Tomato is one of the major vegetable crops grown worldwide and a model species for genetic and biological research. Progress in genomic technologies made possible the development of forefront methods for high-scale sequencing, providing comprehensive insight into the genetic architecture of germplasm resources. This review revisits next-generation sequencing strategies and applications to investigate the diversity of tomato, describing the common platforms used for SNP genotyping of large collections, de novo sequencing, and whole genome resequencing. Significant findings in evolutionary history are outlined, thus discussing how genomics has provided new hints about the processes behind domestication. Finally, achievement and perspectives on pan-genome construction and graphical pan-genome development toward precise mining of the natural variation to be exploited for breeding purposes are presented.}, }
@article {pmid36419435, year = {2022}, author = {Wang, Q and Zhang, L and Zhang, Y and Chen, H and Song, J and Lyu, M and Chen, R and Zhang, L}, title = {Comparative genomic analyses reveal genetic characteristics and pathogenic factors of Bacillus pumilus HM-7.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1008648}, pmid = {36419435}, issn = {1664-302X}, abstract = {Bacillus pumilus plays an important role in industrial application and biocontrol activities, as well as causing humans and plants disease, leading to economic losses and biosafety concerns. However, until now, the pathogenesis and underlying mechanisms of B. pumilus strains remain unclear. In our previous study, one representative isolate of B. pumilus named HM-7 has been recovered and proved to be the causal agent of fruit rot on muskmelon (Cucumis melo). Herein, we present a complete and annotated genome sequence of HM-7 that contains 4,111 coding genes in a single 3,951,520 bp chromosome with 41.04% GC content. A total of 3,481 genes were functionally annotated with the GO, COG, and KEGG databases. Pan-core genome analysis of HM-7 and 20 representative B. pumilus strains, as well as six closely related Bacillus species, discovered 740 core genes and 15,205 genes in the pan-genome of 21 B. pumilus strains, in which 485 specific-genes were identified in HM-7 genome. The average nucleotide identity (ANI), and whole-genome-based phylogenetic analysis revealed that HM-7 was most closely related to the C4, GR8, MTCC-B6033, TUAT1 and SH-B11 strains, but evolutionarily distinct from other strains in B. pumilus. Collinearity analysis of the six similar B. pumilus strains showed high levels of synteny but also several divergent regions for each strains. In the HM-7 genome, we identified 484 genes in the carbohydrate-active enzymes (CAZyme) class, 650 genes encoding virulence factors, and 1,115 genes associated with pathogen-host interactions. Moreover, three HM-7-specific regions were determined, which contained 424 protein-coding genes. Further investigation of these genes showed that 19 pathogenesis-related genes were mainly associated with flagella formation and secretion of toxic products, which might be involved in the virulence of strain HM-7. Our results provided detailed genomic and taxonomic information for the HM-7 strain, and discovered its potential pathogenic mechanism, which lay a foundation for developing effective prevention and control strategies against this pathogen in the future.}, }
@article {pmid36419432, year = {2022}, author = {Kumar, P and Rani, S and Dahiya, P and Kumar, A and Dang, AS and Suneja, P}, title = {Whole genome analysis for plant growth promotion profiling of Pantoea agglomerans CPHN2, a non-rhizobial nodule endophyte.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {998821}, pmid = {36419432}, issn = {1664-302X}, abstract = {Reduced agricultural production as well as issues like nutrient-depleted soils, eutrophication, and groundwater contamination have drawn attention to the use of endophyte-based bioformulations to restore soil fertility. Pantoea agglomerans CPHN2, a non-rhizobial nodule endophyte isolated from Cicer arietinum, exhibited a variety of plant growth-promoting traits. In this study, we used NextSeq500 technology to analyze whole-genome sequence information of this plant growth-promoting endophytic bacteria. The genome of P. agglomerans CPHN2 has a length of 4,839,532 bp and a G + C content of 55.2%. The whole genome comprises three different genomic fractions, comprising one circular chromosome and two circular plasmids. A comparative analysis between P. agglomerans CPHN2 and 10 genetically similar strains was performed using a bacterial pan-genome pipeline. All the predicted and annotated gene sequences for plant growth promotions (PGPs), such as phosphate solubilization, siderophore synthesis, nitrogen metabolism, and indole-3-acetic acid (IAA) of P. agglomerans CPHN2, were identified. The whole-genome analysis of P. agglomerans CPHN2 provides an insight into the mechanisms underlying PGP by endophytes and its potential applications as a biofertilizer.}, }
@article {pmid36417612, year = {2022}, author = {Brito, LP and Santos, DS and Freitas, NSA and Medeiros, RS and Souza, PRE and Soares, MTCV and Porto, ALF}, title = {In silico evaluation of genomic characteristics of Streptococcus infantarius subsp. infantarius for application in fermentations.}, journal = {Anais da Academia Brasileira de Ciencias}, volume = {94}, number = {suppl 3}, pages = {e20211447}, doi = {10.1590/0001-3765202220211447}, pmid = {36417612}, issn = {1678-2690}, mesh = {Fermentation ; *Streptococcus/genetics ; *Genomics ; Sequence Analysis, DNA ; }, abstract = {This study aims to evaluate the in silico genomic characteristics of Streptococcus infantarius subsp. infantarius, isolated from Coalho cheese from Paraíba, Brazil, with a view to application in lactic fermentations. rRNA sequences from the 16S ribosomal region were used as input to GenBank, in the search for patterns that could reveal a non-pathogenic behavior of S. infantarius subsp. infantarius, comparing mobile genetic elements, antibiotic resistance genes, pan-genome analysis and multi-genome alignment among related species. S. infantarius subsp. infantarius CJ18 was the only complete genome reported by BLAST/NCBI with high similarity and after comparative genetics with complete genomes of Streptococcus agalactiae (SAG153, NJ1606) and Streptococcus thermophilus (ST106, CS18, IDCC2201, APC151) revealed that CJ18 showed a low number of transposases and integrases, infection by phage bacteria of the Streptococcus genus, absence of antibiotic resistance genes and presence of bacteriocin, folate and riboflavin producing genes. The genome alignment revealed that the collinear blocks of S. thermophilus ST106 and S. agalactiae SAG153 have inverted blocks when compared to the CJ18 genome due to gene positioning, insertions and deletions. Therefore, the strains of S. infantarius subsp. infantarius isolated from Coalho cheese from Paraíba showed genomic similarity with CJ18 and the mobility of genes analyzed in silico showed absence of pathogenicity throughout the genome of CJ18, indicating the potential of these strains for the dairy industry.}, }
@article {pmid36416120, year = {2022}, author = {Yang, L and Yang, Y and Huang, L and Cui, X and Liu, Y}, title = {From single- to multi-omics: future research trends in medicinal plants.}, journal = {Briefings in bioinformatics}, volume = {}, number = {}, pages = {}, doi = {10.1093/bib/bbac485}, pmid = {36416120}, issn = {1477-4054}, abstract = {Medicinal plants are the main source of natural metabolites with specialised pharmacological activities and have been widely examined by plant researchers. Numerous omics studies of medicinal plants have been performed to identify molecular markers of species and functional genes controlling key biological traits, as well as to understand biosynthetic pathways of bioactive metabolites and the regulatory mechanisms of environmental responses. Omics technologies have been widely applied to medicinal plants, including as taxonomics, transcriptomics, metabolomics, proteomics, genomics, pangenomics, epigenomics and mutagenomics. However, because of the complex biological regulation network, single omics usually fail to explain the specific biological phenomena. In recent years, reports of integrated multi-omics studies of medicinal plants have increased. Until now, there have few assessments of recent developments and upcoming trends in omics studies of medicinal plants. We highlight recent developments in omics research of medicinal plants, summarise the typical bioinformatics resources available for analysing omics datasets, and discuss related future directions and challenges. This information facilitates further studies of medicinal plants, refinement of current approaches and leads to new ideas.}, }
@article {pmid36415217, year = {2022}, author = {Golchha, NC and Nighojkar, A and Nighojkar, S}, title = {Redefining genomic view of Clostridioides difficile through pangenome analysis and identification of drug targets from its core genome.}, journal = {Drug target insights}, volume = {16}, number = {}, pages = {17-24}, pmid = {36415217}, issn = {1177-3928}, abstract = {INTRODUCTION:: Clostridioides difficile infection (CDI) is a leading cause of gastrointestinal infections and in the present day is a major concern for global health care system. The unavailability of specific antibiotics for CDI treatment and its emerging cases worldwide further broaden the challenge to control CDI.
METHODS:: The availability of a large number of genome sequences for C. difficile and many bioinformatics tools for genome analysis provides the opportunity for in silico pangenomic analysis. In the present study, 97 strains of C. difficile were used for pangenomic studies and characterized for their phylogenomic and functional analysis.
RESULTS:: Pangenome analysis reveals open pangenome of C. difficile and high genetic diversity. Sequence and interactome analysis of 1,481 core genes was done and eight potent drug targets are identified. Three drug targets, namely, aminodeoxychorismate synthase (PabB), D-alanyl-D-alanine carboxypeptidase (DD-CPase) and undecaprenyl diphospho-muramoyl pentapeptide beta-N-acetylglucosaminyl transferase (MurG transferase), have been reported as drug targets for other human pathogens, and five targets, namely, bifunctional diguanylate cyclase/phosphodiesterase (cyclic-diGMP), sporulation transcription factor (Spo0A), histidinol-phosphate transaminase (HisC), 3-deoxy-7-phosphoheptulonate synthase (DAHP synthase) and c-di-GMP phosphodiesterase (PdcA), are novel.
CONCLUSION:: The suggested potent targets could act as broad-spectrum drug targets for C. difficile. However, further validation needs to be done before using them for lead compound discovery.}, }
@article {pmid36412754, year = {2022}, author = {Sánchez-Suárez, J and Díaz, L and Coy-Barrera, E and Villamil, L}, title = {Specialized Metabolism of Gordonia Genus: An Integrated Survey on Chemodiversity Combined with a Comparative Genomics-Based Analysis.}, journal = {Biotech (Basel (Switzerland))}, volume = {11}, number = {4}, pages = {}, doi = {10.3390/biotech11040053}, pmid = {36412754}, issn = {2673-6284}, abstract = {Members of the phylum Actinomycetota (formerly Actinobacteria) have historically been the most prolific providers of small bioactive molecules. Although the genus Streptomyces is the best-known member for this issue, other genera, such as Gordonia, have shown interesting potential in their specialized metabolism. Thus, we combined herein the result of a comprehensive literature survey on metabolites derived from Gordonia strains with a comparative genomic analysis to examine the potential of the specialized metabolism of the genus Gordonia. Thirty Gordonia-derived compounds of different classes were gathered (i.e., alkaloids, amides, phenylpropanoids, and terpenoids), exhibiting antimicrobial and cytotoxic activities, and several were also isolated from Streptomyces (e.g., actinomycin, nocardamin, diolmycin A1). With the genome data, we estimated an open pan-genome of 57,901 genes, most of them being part of the cloud genome. Regarding the BGCs content, 531 clusters were found, including Terpenes, RiPP-like, and NRPS clusters as the most frequent clusters. Our findings demonstrated that Gordonia is a poorly studied genus in terms of its specialized metabolism production and potential applications. Nevertheless, given their BGCs content, Gordonia spp. are a valuable biological resource that could expand the chemical spectrum of the phylum Actinomycetota, involving novel BGCs for inspiring innovative outlines for synthetic biology and further use in biotechnological initiatives. Therefore, further studies and more efforts should be made to explore different environments and evaluate other bioactivities.}, }
@article {pmid36409181, year = {2022}, author = {Mun, T and Vaddadi, NSK and Langmead, B}, title = {Pangenomic Genotyping with the Marker Array.}, journal = {Algorithms in bioinformatics : ... International Workshop, WABI ..., proceedings. WABI (Workshop)}, volume = {242}, number = {}, pages = {}, pmid = {36409181}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; }, abstract = {We present a new method and software tool called rowbowt that applies a pangenome index to the problem of inferring genotypes from short-read sequencing data. The method uses a novel indexing structure called the marker array. Using the marker array, we can genotype variants with respect from large panels like the 1000 Genomes Project while avoiding the reference bias that results when aligning to a single linear reference. rowbowt can infer accurate genotypes in less time and memory compared to existing graph-based methods.}, }
@article {pmid36408900, year = {2022}, author = {Fullam, A and Letunic, I and Schmidt, TSB and Ducarmon, QR and Karcher, N and Khedkar, S and Kuhn, M and Larralde, M and Maistrenko, OM and Malfertheiner, L and Milanese, A and Rodrigues, JFM and Sanchis-López, C and Schudoma, C and Szklarczyk, D and Sunagawa, S and Zeller, G and Huerta-Cepas, J and von Mering, C and Bork, P and Mende, DR}, title = {proGenomes3: approaching one million accurately and consistently annotated high-quality prokaryotic genomes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkac1078}, pmid = {36408900}, issn = {1362-4962}, support = {205321_184955/SNSF_/Swiss National Science Foundation/Switzerland ; }, abstract = {The interpretation of genomic, transcriptomic and other microbial 'omics data is highly dependent on the availability of well-annotated genomes. As the number of publicly available microbial genomes continues to increase exponentially, the need for quality control and consistent annotation is becoming critical. We present proGenomes3, a database of 907 388 high-quality genomes containing 4 billion genes that passed stringent criteria and have been consistently annotated using multiple functional and taxonomic databases including mobile genetic elements and biosynthetic gene clusters. proGenomes3 encompasses 41 171 species-level clusters, defined based on universal single copy marker genes, for which pan-genomes and contextual habitat annotations are provided. The database is available at http://progenomes.embl.de/.}, }
@article {pmid36408592, year = {2022}, author = {Vij, S and Thakur, R and Rishi, P}, title = {Reverse engineering approach: a step towards a new era of vaccinology with special reference to Salmonella.}, journal = {Expert review of vaccines}, volume = {21}, number = {12}, pages = {1763-1785}, doi = {10.1080/14760584.2022.2148661}, pmid = {36408592}, issn = {1744-8395}, mesh = {Humans ; Vaccinology ; *Typhoid Fever/prevention & control ; Salmonella/genetics ; *Typhoid-Paratyphoid Vaccines ; Anti-Bacterial Agents ; Epitopes ; }, abstract = {INTRODUCTION: Salmonella is responsible for causing enteric fever, septicemia, and gastroenteritis in humans. Due to high disease burden and emergence of multi- and extensively drug-resistant Salmonella strains, it is becoming difficult to treat the infection with existing battery of antibiotics as we are not able to discover newer antibiotics at the same pace at which the pathogens are acquiring resistance. Though vaccines against Salmonella are available commercially, they have limited efficacy. Advancements in genome sequencing technologies and immunoinformatics approaches have solved the problem significantly by giving rise to a new era of vaccine designing, i.e. 'Reverse engineering.' Reverse engineering/vaccinology has expedited the vaccine identification process. Using this approach, multiple potential proteins/epitopes can be identified and constructed as a single entity to tackle enteric fever.
AREAS COVERED: This review provides details of reverse engineering approach and discusses various protein and epitope-based vaccine candidates identified using this approach against typhoidal Salmonella.
EXPERT OPINION: Reverse engineering approach holds great promise for developing strategies to tackle the pathogen(s) by overcoming the limitations posed by existing vaccines. Progressive advancements in the arena of reverse vaccinology, structural biology, and systems biology combined with an improved understanding of host-pathogen interactions are essential components to design new-generation vaccines.}, }
@article {pmid36405966, year = {2022}, author = {Guo, Y and Zeng, C and Ma, C and Cai, H and Jiang, X and Zhai, S and Xu, X and Lin, M}, title = {Comparative genomics analysis of the multidrug-resistant Aeromonas hydrophila MX16A providing insights into antibiotic resistance genes.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {1042350}, pmid = {36405966}, issn = {2235-2988}, mesh = {*Aeromonas hydrophila/genetics ; *Anti-Bacterial Agents/pharmacology ; Drug Resistance, Microbial ; beta-Lactams ; Genomics ; }, abstract = {In this paper, the whole genome of the multidrug-resistant Aeromonas hydrophila MX16A was comprehensively analyzed and compared after sequencing by PacBio RS II. To shed light on the drug resistance mechanism of A. hydrophila MX16A, a Kirby-Bauer disk diffusion method was used to assess the phenotypic drug susceptibility. Importantly, resistance against β-lactam, sulfonamides, rifamycins, macrolides, tetracyclines and chloramphenicols was largely consistent with the prediction analysis results of drug resistance genes in the CARD database. The varied types of resistance genes identified from A. hydrophila MX16A revealed multiple resistance mechanisms, including enzyme inactivation, gene mutation and active effusion. The publicly available complete genomes of 35 Aeromonas hydrophila strains on NCBI, including MX16A, were downloaded for genomic comparison and analysis. The analysis of 33 genomes with ANI greater than 95% showed that the pan-genome consisted of 9556 genes, and the core genes converged to 3485 genes. In summary, the obtained results showed that A. hydrophila exhibited a great genomic diversity as well as diverse metabolic function and it is believed that frequent exchanges between strains lead to the horizontal transfer of drug resistance genes.}, }
@article {pmid36404338, year = {2022}, author = {Orata, FD and Hussain, NAS and Liang, KYH and Hu, D and Boucher, YF}, title = {Genomes of Vibrio metoecus co-isolated with Vibrio cholerae extend our understanding of differences between these closely related species.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {42}, pmid = {36404338}, issn = {1757-4749}, abstract = {BACKGROUND: Vibrio cholerae, the causative agent of cholera, is a well-studied species, whereas Vibrio metoecus is a recently described close relative that is also associated with human infections. The availability of V. metoecus genomes provides further insight into its genetic differences from V. cholerae. Additionally, both species have been co-isolated from a cholera-free brackish coastal pond and have been suggested to interact with each other by horizontal gene transfer (HGT).
RESULTS: The genomes of 17 strains from each species were sequenced. All strains share a large core genome (2675 gene families) and very few genes are unique to each species (< 3% of the pan-genome of both species). This led to the identification of potential molecular markers-for nitrite reduction, as well as peptidase and rhodanese activities-to further distinguish V. metoecus from V. cholerae. Interspecies HGT events were inferred in 21% of the core genes and 45% of the accessory genes. A directional bias in gene transfer events was found in the core genome, where V. metoecus was a recipient of three times (75%) more genes from V. cholerae than it was a donor (25%).
CONCLUSION: V. metoecus was misclassified as an atypical variant of V. cholerae due to their resemblance in a majority of biochemical characteristics. More distinguishing phenotypic assays can be developed based on the discovery of potential gene markers to avoid any future misclassifications. Furthermore, differences in relative abundance or seasonality were observed between the species and could contribute to the bias in directionality of HGT.}, }
@article {pmid36395320, year = {2022}, author = {Lofgren, LA and Ross, BS and Cramer, RA and Stajich, JE}, title = {The pan-genome of Aspergillus fumigatus provides a high-resolution view of its population structure revealing high levels of lineage-specific diversity driven by recombination.}, journal = {PLoS biology}, volume = {20}, number = {11}, pages = {e3001890}, pmid = {36395320}, issn = {1545-7885}, support = {R01 AI130128/AI/NIAID NIH HHS/United States ; S10 OD016290/OD/NIH HHS/United States ; }, mesh = {*Antifungal Agents ; *Aspergillus fumigatus/genetics ; Drug Resistance, Fungal ; Genomics ; Recombination, Genetic/genetics ; }, abstract = {Aspergillus fumigatus is a deadly agent of human fungal disease where virulence heterogeneity is thought to be at least partially structured by genetic variation between strains. While population genomic analyses based on reference genome alignments offer valuable insights into how gene variants are distributed across populations, these approaches fail to capture intraspecific variation in genes absent from the reference genome. Pan-genomic analyses based on de novo assemblies offer a promising alternative to reference-based genomics with the potential to address the full genetic repertoire of a species. Here, we evaluate 260 genome sequences of A. fumigatus including 62 newly sequenced strains, using a combination of population genomics, phylogenomics, and pan-genomics. Our results offer a high-resolution assessment of population structure and recombination frequency, phylogenetically structured gene presence-absence variation, evidence for metabolic specificity, and the distribution of putative antifungal resistance genes. Although A. fumigatus disperses primarily via asexual conidia, we identified extraordinarily high levels of recombination with the lowest linkage disequilibrium decay value reported for any fungal species to date. We provide evidence for 3 primary populations of A. fumigatus, with recombination occurring only rarely between populations and often within them. These 3 populations are structured by both gene variation and distinct patterns of gene presence-absence with unique suites of accessory genes present exclusively in each clade. Accessory genes displayed functional enrichment for nitrogen and carbohydrate metabolism suggesting that populations may be stratified by environmental niche specialization. Similarly, the distribution of antifungal resistance genes and resistance alleles were often structured by phylogeny. Altogether, the pan-genome of A. fumigatus represents one of the largest fungal pan-genomes reported to date including many genes unrepresented in the Af293 reference genome. These results highlight the inadequacy of relying on a single-reference genome-based approach for evaluating intraspecific variation and the power of combined genomic approaches to elucidate population structure, genetic diversity, and putative ecological drivers of clinically relevant fungi.}, }
@article {pmid36386637, year = {2022}, author = {Jiang, ZM and Deng, Y and Han, XF and Su, J and Wang, H and Yu, LY and Zhang, YQ}, title = {Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov., two IAA-producing novel rare bacterial species inhabiting desert biological soil crusts.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1034816}, pmid = {36386637}, issn = {1664-302X}, abstract = {Two Gram-staining negative strains (CPCC 101082[T] and CPCC 101083[T]) were isolated from biological sandy soil crusts samples collected from Badain Jaran desert, China. Both isolates were heterotrophic phototroph, could produce indole-3-acetic acid. The 16S rRNA gene sequences of these two strains were closely related to the members of the family Geminicoccaceae, showing high similarities with Geminicoccus roseus DSM 18922[T] (96.9%) and Arboricoccus pini B29T1[T] (90.1%), respectively. In phylogenetic tree based on 16S rRNA gene sequences, strain CPCC 101082[T] and CPCC 101083[T] formed a robust distinct clade with Geminicoccus roseus DSM 18922[T] within the family Geminicoccaceae, which indicated that these two isolates could be classified into the genus Geminicoccus. The growth of strain CPCC 101082[T] occurred at 15-42°C and pH 4.0-10.0 (optima at 28-37°C and pH 6.0-8.0). The growth of strain CPCC 101083[T] occurred at 4-45°C and pH 4.0-10.0 (optima at 25-30°C and pH 6.0-8.0). The major cellular fatty acids of CPCC 101082[T] and CPCC 101083[T] contained C18:1 ω7c/C18:1 ω6c, cyclo-C19:0 ω8c, and C16:0. Q-10 was detected as the sole respiratory quinone. Diphosphatidylglycerol, phosphatidylglycerol, phosphatidylcholine, phosphatidylethanolamine, an unidentified phospholipid and an unidentified aminolipid were tested in the polar lipids profile. The genomes of the two isolates were characterized as about 5.9 Mbp in size with the G + C content of nearly 68%. The IAA-producing encoding genes were predicated in both genomes. The values of average nucleotide identity were 80.6, 81.2 and 92.4% based on a pairwise comparison of the genomes of strains CPCC 101082[T] and CPCC 101083[T] and Geminicoccus roseus DSM 18922[T], respectively. On the basis of the genotypic, chemotaxonomic and phenotypic characteristics, the strains CPCC 101082[T] (=NBRC 113513[T] = KCTC 62853[T]) and CPCC 101083[T] (=NBRC 113514[T] = KCTC 62854[T]) are proposed to represent two novel species of the genus Geminicoccus with the names Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov.}, }
@article {pmid36377929, year = {2022}, author = {Daware, A and Malik, A and Srivastava, R and Das, D and Ellur, RK and Singh, AK and Tyagi, AK and Parida, SK}, title = {Rice Pangenome Array (RPGA): an efficient genotyping solution for pangenome-based accelerated crop improvement in rice.}, journal = {The Plant journal : for cell and molecular biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/tpj.16028}, pmid = {36377929}, issn = {1365-313X}, abstract = {The advent of the pangenome era has unraveled previously unknown genetic variation existing within diverse crop plants, including rice. This untapped genetic variation is believed to account for a major portion of phenotypic variation existing in crop plants. However, the use of conventional single reference-guided genotyping often fails to capture large portion of this genetic variation leading to a reference bias. This makes it difficult to identify and utilize novel population/cultivar-specific genes for crop improvement. Thus, we developed a rice pangenome genotyping array (RPGA) harboring probes assaying 80K single nucleotide polymorphisms (SNPs) and presence-absence variants (PAVs) spanning the entire 3K rice pangenome. This array provides a simple, user-friendly and cost-effective (60 to 80 USD per sample) solution for rapid pangenome-based genotyping in rice. The GWAS conducted using RPGA-SNP genotyping data of a rice diversity panel detected a total of 42 loci, including previously known as well as novel genomic loci regulating grain size/weight traits in rice. Eight of these identified trait-associated loci (dispensable loci) could not be detected with conventional single reference genome-based GWAS. A WD repeat-containing PROTEIN 12 gene underlying one of such dispensable locus on chromosome 7 (qLWR7) along with other non-dispensable loci were subsequently detected using high-resolution QTL mapping confirming authenticity of RPGA-led GWAS. This demonstrates the potential of RPGA-based genotyping to overcome reference bias. The application of RPGA-based genotyping for population structure analysis, hybridity testing, ultra-high-density genetic map construction and chromosome-level genome assembly, and marker-assisted selection was also demonstrated. A web application (http://www.rpgaweb.com) was further developed to provide easy to use platform for the imputation of RPGA-based genotyping data using 3K Rice Reference Panel and subsequent GWAS.}, }
@article {pmid36377253, year = {2022}, author = {Tello, D and Gonzalez-Garcia, LN and Gomez, J and Zuluaga-Monares, JC and Garcia, R and Angel, R and Mahecha, D and Duarte, E and Leon, MDR and Reyes, F and Escobar-Velásquez, C and Linares-Vásquez, M and Cardozo, N and Duitama, J}, title = {NGSEP 4: Efficient and accurate identification of orthogroups and whole-genome alignment.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {}, doi = {10.1111/1755-0998.13737}, pmid = {36377253}, issn = {1755-0998}, abstract = {Whole-genome alignment allows researchers to understand the genomic structure and variation among genomes. Approaches based on direct pairwise comparisons of DNA sequences require large computational capacities. As a consequence, pipelines combining tools for orthologous gene identification and synteny have been developed. In this manuscript, we present the latest functionalities implemented in NGSEP 4, to identify orthogroups and perform whole genome alignments. NGSEP implements functionalities for identification of clusters of homologus genes, synteny analysis and whole genome alignment. Our results showed that the NGSEP algorithm for orthogroups identification has competitive accuracy and efficiency in comparison to commonly used tools. The implementation also includes a visualization of the whole genome alignment based on synteny of the orthogroups that were identified, and a reconstruction of the pangenome based on frequencies of the orthogroups among the genomes. NGSEP 4 also includes a new graphical user interface based on the JavaFX technology. We expect that these new developments will be very useful for several studies in evolutionary biology and population genomics.}, }
@article {pmid36376589, year = {2022}, author = {Chivian, D and Jungbluth, SP and Dehal, PS and Wood-Charlson, EM and Canon, RS and Allen, BH and Clark, MM and Gu, T and Land, ML and Price, GA and Riehl, WJ and Sneddon, MW and Sutormin, R and Zhang, Q and Cottingham, RW and Henry, CS and Arkin, AP}, title = {Metagenome-assembled genome extraction and analysis from microbiomes using KBase.}, journal = {Nature protocols}, volume = {}, number = {}, pages = {}, pmid = {36376589}, issn = {1750-2799}, abstract = {Uncultivated Bacteria and Archaea account for the vast majority of species on Earth, but obtaining their genomes directly from the environment, using shotgun sequencing, has only become possible recently. To realize the hope of capturing Earth's microbial genetic complement and to facilitate the investigation of the functional roles of specific lineages in a given ecosystem, technologies that accelerate the recovery of high-quality genomes are necessary. We present a series of analysis steps and data products for the extraction of high-quality metagenome-assembled genomes (MAGs) from microbiomes using the U.S. Department of Energy Systems Biology Knowledgebase (KBase) platform (http://www.kbase.us/). Overall, these steps take about a day to obtain extracted genomes when starting from smaller environmental shotgun read libraries, or up to about a week from larger libraries. In KBase, the process is end-to-end, allowing a user to go from the initial sequencing reads all the way through to MAGs, which can then be analyzed with other KBase capabilities such as phylogenetic placement, functional assignment, metabolic modeling, pangenome functional profiling, RNA-Seq and others. While portions of such capabilities are available individually from other resources, the combination of the intuitive usability, data interoperability and integration of tools in a freely available computational resource makes KBase a powerful platform for obtaining MAGs from microbiomes. While this workflow offers tools for each of the key steps in the genome extraction process, it also provides a scaffold that can be easily extended with additional MAG recovery and analysis tools, via the KBase software development kit (SDK).}, }
@article {pmid36375718, year = {2022}, author = {Gonçalves Dos Santos, R and Castillo, RH and Neres Rodrigues, DL and Lima, A and Ferreira Dos Anjos, W and Rifici, C and Attili, AR and Tiwari, S and Jaiswal, AK and Spier, SJ and Mazzullo, G and Morais-Rodrigues, F and Pinto Gomide, AC and Lima de Jesus, LC and Aburjaile, FF and Brenig, B and Cuteri, V and Luiz de Paula Castro, T and Seyffert, N and Santos, A and Góes-Neto, A and de Jesus Sousa, T and Azevedo, V}, title = {Comparative genomic analysis of the Dietzia genus: an insight into genomic diversity, and adaptation.}, journal = {Research in microbiology}, volume = {}, number = {}, pages = {103998}, doi = {10.1016/j.resmic.2022.103998}, pmid = {36375718}, issn = {1769-7123}, abstract = {Dietzia strains are widely distributed in the environment, presenting an opportunistic role, and some species have undetermined taxonomic characteristics. Here, we propose the existence of errors in the classification of species in this genus using comparative genomics. We performed ANI, dDDH, pangenome and genomic plasticity analyses better to elucidate the phylogenomic relationships between Dietzia strains. For this, we used 55 genomes of Dietzia downloaded from public databases that were combined with a newly sequenced. Sequence analysis of a phylogenetic tree based on genome similarity comparisons and dDDH, ANI analyses supported grouping different Dietzia species into four distinct groups. The pangenome analysis corroborated the classification of these groups, supporting the idea that some species of Dietzia could be reassigned in a possible classification into three distinct species, each containing less variability than that found within the global pangenome of all strains. Additionally, analysis of genomic plasticity based on groups containing Dietzia strains found differences in the presence and absence of symbiotic Islands and pathogenic islands related to their isolation site. We propose that the comparison of pangenome subsets together with phylogenomic approaches can be used as an alternative for the classification and differentiation of new species of the genus Dietzia.}, }
@article {pmid36375370, year = {2022}, author = {Islam, J and Sarkar, H and Hoque, H and Hasan, MN and Jewel, GMNA}, title = {In-silico approach of identifying novel therapeutic targets against Yersinia pestis using pan and subtractive genomic analysis.}, journal = {Computational biology and chemistry}, volume = {101}, number = {}, pages = {107784}, doi = {10.1016/j.compbiolchem.2022.107784}, pmid = {36375370}, issn = {1476-928X}, mesh = {Humans ; *Yersinia pestis/genetics ; *Plague/drug therapy/genetics/microbiology ; Genomics ; Genome, Bacterial ; Virulence Factors ; }, abstract = {The magnitude of human affliction brought about by bacterial infections has been on the rise since the mid-5th century. Yersinia pestis is one such notable, gram-negative bacterium that inflicted havoc around the globe three times throughout different millenniums by causing deadly plagues. Despite the unremitting efforts by scientists, different strains of Yersinia pestis are still affecting the populations in various parts of the world by growing resistant to existing antimicrobial agents owing to their overuse. The current scenario, therefore, calls for new therapeutics to further combat the disease. In this study, 3105 core, 387 pathogen-specific unique, 536 choke-point, 796 virulence factors, and 115 antimicrobial resistant proteins were found using a pan-genomic and subtractive genome analysis of nine Yersinia pestis strains that could be instrumental in the development of drugs against Yersinia pestis. Subsequently, 1461 and 1114 essential proteins were identified as non-homologous to human and gut microflora. 535 and 30 proteins were predicted as cytoplasmic and broad-spectrum targets respectively. Finally, four potential targets were selected for their high connectivity in protein-protein interaction network. These selected target proteins are associated with one of the major lipopolysaccharide biosynthesis pathways. Therefore, dismantling their activity might indicate a probable strategy for developing therapeutics to combat bacterial infection caused by Yersinia pestis. However, further experimental validation in the laboratory is needed to consolidate the research findings.}, }
@article {pmid36367506, year = {2022}, author = {Qu, L and Li, Y and Wang, W and Shao, Z and Gao, Z and Lai, Q}, title = {Aestuarium zhoushanense is a later heterotypic synonym of Marivivens donghaensis, and transfer of Paradonghicola geojensis to the genus Marivivens as Marivivens geojensis comb. nov.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {11}, pages = {}, doi = {10.1099/ijsem.0.005564}, pmid = {36367506}, issn = {1466-5034}, mesh = {RNA, Ribosomal, 16S/genetics ; Phylogeny ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Base Composition ; Sequence Analysis, DNA ; *Fatty Acids/chemistry ; Nucleic Acid Hybridization ; }, abstract = {The 16S rRNA genes of Aestuarium zhoushanense G7[T] and Paradonghicola geojensis FJ12[T] shared 100 % sequence identity with Marivivens donghaensis AM-4[T]. Phylogeny of 16S rRNA gene sequences showed that the three type strains formed a monophyletic clade within the genus Marivivens. Whole genome sequence comparisons showed that three type strains shared 46.7-69.7 % digital DNA-DNA hybridization, 92.1-96.4 % average nucleotide identity and 96.2-98.1 % average amino acid identity. The high 16S rRNA gene similarity values show that three type strains should belong to the same genus. The pan-genome of the five strains contained 5754 genes including 1877 core genes. Based on the principle of priority, we propose that A. zhoushanense Yu et al. 2019 is a later heterotypic synonym of M. donghaensis Park et al. 2016, and P. geojensis should be reclassified as Marivivens geojensis comb. nov., respectively.}, }
@article {pmid36366394, year = {2022}, author = {Mushtaq, M and Khan, S and Hassan, M and Al-Harbi, AI and Hameed, AR and Khan, K and Ismail, S and Irfan, M and Ahmad, S}, title = {Computational Design of a Chimeric Vaccine against Plesiomonas shigelloides Using Pan-Genome and Reverse Vaccinology.}, journal = {Vaccines}, volume = {10}, number = {11}, pages = {}, pmid = {36366394}, issn = {2076-393X}, abstract = {The swift emergence of antibiotic resistance (AR) in bacterial pathogens to make themselves adaptable to changing environments has become an alarming health issue. To prevent AR infection, many ways can be accomplished such as by decreasing the misuse of antibiotics in human and animal medicine. Among these AR bacterial species, Plesiomonas shigelloides is one of the etiological agents of intestinal infection in humans. It is a gram-negative rod-shaped bacterium that is highly resistant to several classes of antibiotics, and no licensed vaccine against the aforementioned pathogen is available. Hence, substantial efforts are required to screen protective antigens from the pathogen whole genome that can be subjected easily to experimental evaluations. Here, we employed a reverse vaccinology (RV) approach to design a multi-antigenic epitopes based vaccine against P. shigelloides. The complete genomes of P. shigelloides were retrieved from the National Center for Biotechnological Information (NCBI) that on average consist of 5226 proteins. The complete proteomes were subjected to different subtractive proteomics filters, and in the results of that analysis, out of total proteins, 2399 were revealed as non-redundant and 2827 as redundant proteins. The non-redundant proteins were further checked for subcellular localization analysis, in which three were localized in the extracellular matrix, eight were outer membrane, and 13 were found in the periplasmic membrane. All surface localized proteins were found to be virulent. Out of a total of 24 virulent proteins, three proteins (flagellar hook protein (FlgE), hypothetical protein, and TonB-dependent hemoglobin/transferrin/lactoferrin family receptor protein) were considered as potential vaccine targets and subjected to epitopes prediction. The predicted epitopes were further examined for antigenicity, toxicity, and solubility. A total of 10 epitopes were selected (GFKESRAEF, VQVPTEAGQ, KINENGVVV, ENKALSQET, QGYASANDE, RLNPTDSRW, TLDYRLNPT, RVTKKQSDK, GEREGKNRP, RDKKTNQPL). The selected epitopes were linked with each other via specific GPGPG linkers in order to design a multi-epitopes vaccine construct, and linked with cholera toxin B subunit adjuvant to make the designed vaccine construct more efficient in terms of antigenicity. The 3D structure of the vaccine construct was modeled ab initio as no appropriate template was available. Furthermore, molecular docking was carried out to check the interaction affinity of the designed vaccine with major histocompatibility complex (MHC-)I (PDB ID: 1L1Y), MHC-II (1KG0), and toll-like receptor 4 ((TLR-4) (PDB: 4G8A). Molecular dynamic simulation was applied to evaluate the dynamic behavior of vaccine-receptor complexes. Lastly, the binding free energies of the vaccine with receptors were estimated by using MMPB/GBSA methods. All of the aforementioned analyses concluded that the designed vaccine molecule as a good candidate to be used in experimental studies to disclose its immune protective efficacy in animal models.}, }
@article {pmid36363712, year = {2022}, author = {Murr, L and Huber, I and Pavlovic, M and Guertler, P and Messelhaeusser, U and Weiss, M and Ehrmann, M and Tuschak, C and Bauer, H and Wenning, M and Busch, U and Bretschneider, N}, title = {Whole-Genome Sequence Comparisons of Listeria monocytogenes Isolated from Meat and Fish Reveal High Inter- and Intra-Sample Diversity.}, journal = {Microorganisms}, volume = {10}, number = {11}, pages = {}, pmid = {36363712}, issn = {2076-2607}, abstract = {Interpretation of whole-genome sequencing (WGS) data for foodborne outbreak investigations is complex, as the genetic diversity within processing plants and transmission events need to be considered. In this study, we analyzed 92 food-associated Listeria monocytogenes isolates by WGS-based methods. We aimed to examine the genetic diversity within meat and fish production chains and to assess the applicability of suggested thresholds for clustering of potentially related isolates. Therefore, meat-associated isolates originating from the same samples or processing plants as well as fish-associated isolates were analyzed as distinct sets. In silico serogrouping, multilocus sequence typing (MLST), core genome MLST (cgMLST), and pangenome analysis were combined with screenings for prophages and genetic traits. Isolates of the same subtypes (cgMLST types (CTs) or MLST sequence types (STs)) were additionally compared by SNP calling. This revealed the occurrence of more than one CT within all three investigated plants and within two samples. Analysis of the fish set resulted in predominant assignment of isolates from pangasius catfish and salmon to ST2 and ST121, respectively, potentially indicating persistence within the respective production chains. The approach not only allowed the detection of distinct subtypes but also the determination of differences between closely related isolates, which need to be considered when interpreting WGS data for surveillance.}, }
@article {pmid36362240, year = {2022}, author = {Khoder, M and Osman, M and Kassem, II and Rafei, R and Shahin, A and Fournier, PE and Rolain, JM and Hamze, M}, title = {Whole Genome Analyses Accurately Identify Neisseria spp. and Limit Taxonomic Ambiguity.}, journal = {International journal of molecular sciences}, volume = {23}, number = {21}, pages = {}, pmid = {36362240}, issn = {1422-0067}, mesh = {Male ; Humans ; Phylogeny ; *Neisseria/genetics ; Neisseria gonorrhoeae/genetics ; *Neisseria meningitidis/genetics ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization ; DNA ; Genome, Bacterial ; }, abstract = {Genome sequencing facilitates the study of bacterial taxonomy and allows the re-evaluation of the taxonomic relationships between species. Here, we aimed to analyze the draft genomes of four commensal Neisseria clinical isolates from the semen of infertile Lebanese men. To determine the phylogenetic relationships among these strains and other Neisseria spp. and to confirm their identity at the genomic level, we compared the genomes of these four isolates with the complete genome sequences of Neisseria gonorrhoeae and Neisseria meningitidis and the draft genomes of Neisseria flavescens, Neisseria perflava, Neisseria mucosa, and Neisseria macacae that are available in the NCBI Genbank database. Our findings revealed that the WGS analysis accurately identified and corroborated the matrix-assisted laser desorption ionization-time of flight (MALDI-TOF) species identities of the Neisseria isolates. The combination of three well-established genome-based taxonomic tools (in silico DNA-DNA Hybridization, Ortho Average Nucleotide identity, and pangenomic studies) proved to be relatively the best identification approach. Notably, we also discovered that some Neisseria strains that are deposited in databases contain many taxonomical errors. The latter is very important and must be addressed to prevent misdiagnosis and missing emerging etiologies. We also highlight the need for robust cut-offs to delineate the species using genomic tools.}, }
@article {pmid36362207, year = {2022}, author = {Hameed, A and Poznanski, P and Nadolska-Orczyk, A and Orczyk, W}, title = {Graph Pangenomes Track Genetic Variants for Crop Improvement.}, journal = {International journal of molecular sciences}, volume = {23}, number = {21}, pages = {}, pmid = {36362207}, issn = {1422-0067}, mesh = {Humans ; *Genome-Wide Association Study ; *Quantitative Trait Loci ; Polymorphism, Single Nucleotide ; Plant Breeding ; Multifactorial Inheritance ; Crops, Agricultural/genetics ; }, abstract = {Global climate change and the urgency to transform crops require an exhaustive genetic evaluation. The large polyploid genomes of food crops, such as cereals, make it difficult to identify candidate genes with confirmed hereditary. Although genome-wide association studies (GWAS) have been proficient in identifying genetic variants that are associated with complex traits, the resolution of acquired heritability faces several significant bottlenecks such as incomplete detection of structural variants (SV), genetic heterogeneity, and/or locus heterogeneity. Consequently, a biased estimate is generated with respect to agronomically complex traits. The graph pangenomes have resolved this missing heritability and provide significant details in terms of specific loci segregating among individuals and evolving to variations. The graph pangenome approach facilitates crop improvements through genome-linked fast breeding.}, }
@article {pmid36358771, year = {2022}, author = {Cinque, A and Minnei, R and Floris, M and Trevisani, F}, title = {The Clinical and Molecular Features in the VHL Renal Cancers; Close or Distant Relatives with Sporadic Clear Cell Renal Cell Carcinoma?.}, journal = {Cancers}, volume = {14}, number = {21}, pages = {}, pmid = {36358771}, issn = {2072-6694}, abstract = {Von Hippel-Lindau (VHL) disease is an autosomal dominant inherited cancer syndrome caused by germline mutations in the VHL tumor suppressor gene, characterized by the susceptibility to a wide array of benign and malign neoplasms, including clear-cell renal cell carcinoma. Moreover, VHL somatic inactivation is a crucial molecular event also in sporadic ccRCCs tumorigenesis. While systemic biomarkers in the VHL syndrome do not currently play a role in clinical practice, a new promising class of predictive biomarkers, microRNAs, has been increasingly studied. Lots of pan-genomic studies have deeply investigated the possible biological role of microRNAs in the development and progression of sporadic ccRCC; however, few studies have investigated the miRNA profile in VHL patients. Our review summarize all the new insights related to clinical and molecular features in VHL renal cancers, with a particular focus on the overlap with sporadic ccRCC.}, }
@article {pmid36358219, year = {2022}, author = {Moglad, E and Alanazi, N and Altayb, HN}, title = {Genomic Study of Chromosomally and Plasmid-Mediated Multidrug Resistance and Virulence Determinants in Klebsiella Pneumoniae Isolates Obtained from a Tertiary Hospital in Al-Kharj, KSA.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {11}, pages = {}, pmid = {36358219}, issn = {2079-6382}, abstract = {Klebsiella pneumoniae is an emergent pathogen causing respiratory tract, bloodstream, and urinary tract infections in humans. This study defines the genomic sequence data, genotypic and phenotypic characterization of K. pneumoniae clinically isolated from Al-Kharj, KSA. Whole-genome analysis of four K. pneumoniae strains was performed, including de novo assembly, functional annotation, whole-genome-phylogenetic analysis, antibiotic-resistant gene identification, prophage regions, virulent factor, and pan-genome analysis. The results showed that K6 and K7 strains were MDR and ESBL producers, K16 was an ESBL producer, and K8 was sensitive to all tested drugs except ampicillin. K6 and K7 were identified with sequence type (ST) 23, while K16 and K8 were identified with STs 353 and 592, respectively. K6 and K7 were identified with the K1 (wzi1 genotype) capsule and O1 serotype, while K8 was identified with the K57 (wzi206 genotype) capsule and O3b. K6 isolates harbored 10 antimicrobial resistance genes (ARGs) associated with four different plasmids; the chloramphenicol acetyltransferase (catB3), blaOXA-1 and aac(6')-Ib-cr genes were detected in plasmid pB-8922_OXA-48. K6 and K7 also carried a similar gene cassette in plasmid pC1K6P0122-2; the gene cassettes were the trimethoprim-resistant gene (dfrA14), integron integrase (IntI1), insertion sequence (IS1), transposase protein, and replication initiation protein (RepE). Two hypervirulent plasmids were reported in isolates K6 and K7 that carried synthesis genes (iucA, iucB, iucC, iucD, and iutA) and iron siderophore genes (iroB, iroC, iroD, and iroN). The presence of these plasmids in high-risk clones suggests their dissemination in our region, which represents a serious health problem.}, }
@article {pmid36353749, year = {2022}, author = {Oren, E and Dafna, A and Tzuri, G and Halperin, I and Isaacson, T and Elkabetz, M and Meir, A and Saar, U and Ohali, S and La, T and Romay, C and Tadmor, Y and Schaffer, AA and Buckler, ES and Cohen, R and Burger, J and Gur, A}, title = {Pan-genome and multi-parental framework for high-resolution trait dissection in melon (Cucumis melo).}, journal = {The Plant journal : for cell and molecular biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/tpj.16021}, pmid = {36353749}, issn = {1365-313X}, abstract = {Linking genotype with phenotype is a fundamental goal in biology and requires robust data for both. Recent advances in plant-genome sequencing have expedited comparisons among multiple-related individuals. The abundance of structural genomic within-species variation that has been discovered indicates that a single reference genome cannot represent the complete sequence diversity of a species, leading to the expansion of the pan-genome concept. For high-resolution forward genetics, this unprecedented access to genomic variation should be paralleled and integrated with phenotypic characterization of genetic diversity. We developed a multi-parental framework for trait dissection in melon (Cucumis melo), leveraging a novel pan-genome constructed for this highly variable cucurbit crop. A core subset of 25 diverse founders (MelonCore25), consisting of 24 accessions from the two widely cultivated subspecies of C. melo, encompassing 12 horticultural groups, and 1 feral accession was sequenced using a combination of short- and long-read technologies, and their genomes were assembled de novo. The construction of this melon pan-genome exposed substantial variation in genome size and structure, including detection of ~300 000 structural variants and ~9 million SNPs. A half-diallel derived set of 300 F2 populations, representing all possible MelonCore25 parental combinations, was constructed as a framework for trait dissection through integration with the pan-genome. We demonstrate the potential of this unified framework for genetic analysis of various melon traits, including rind color intensity and pattern, fruit sugar content, and resistance to fungal diseases. We anticipate that utilization of this integrated resource will enhance genetic dissection of important traits and accelerate melon breeding.}, }
@article {pmid36350178, year = {2022}, author = {Dong, X and Zhu, M and Li, Y and Huang, D and Wang, L and Yan, C and Zhang, L and Dong, F and Lu, J and Lin, X and Li, K and Bao, Q and Cong, C and Pan, W}, title = {Whole-Genome Sequencing-Based Species Classification, Multilocus Sequence Typing, and Antimicrobial Resistance Mechanism Analysis of the Enterobacter cloacae Complex in Southern China.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0216022}, doi = {10.1128/spectrum.02160-22}, pmid = {36350178}, issn = {2165-0497}, abstract = {Members of the Enterobacter cloacae complex (ECC) are important opportunistic nosocomial pathogens that are associated with a great variety of infections. Due to limited data on the genome-based classification of species and investigation of resistance mechanisms, in this work, we collected 172 clinical ECC isolates between 2019 and 2020 from three hospitals in Zhejiang, China and performed a retrospective whole-genome sequencing to analyze their population structure and drug resistance mechanisms. Of the 172 ECC isolates, 160 belonged to 9 classified species, and 12 belonged to unclassified species based on ANI analysis. Most isolates belonged to E. hormaechei (45.14%) followed by E. kobei (13.71%), which contained 126 STs, including 62 novel STs, as determined by multilocus sequence typing (MLST) analysis. Pan-genome analysis of the two ECC species showed that they have an "open" tendency, which indicated that their Pan-genome increased considerably with the addition of new genomes. A total of 80 resistance genes associated with 11 antimicrobial agent categories were identified in the genomes of all the isolates. The most prevailing resistance genes (12/29, 41.38%) were related to β-lactams followed by aminoglycosides. A total of 247 β-lactamase genes were identified, of which the blaACT genes were the most dominant (145/247, 58.70%), followed by the blaTEM genes (21/247, 8.50%). The inherent ACT type β-lactamase genes differed among different species. blaACT-2 and blaACT-3 were only present in E. asburiae, while blaACT-9, blaACT-12, and blaACT-6 exclusively appeared in E. kobei, E. ludwigii, and E. mori. Amon