@article {pmid33458736, year = {2021}, author = {Pais, AKL and Silva, JRD and Santos, LVSD and Albuquerque, GMR and Farias, ARG and Silva Junior, WJ and Balbino, VQ and Silva, AMF and Gama, MASD and Souza, EB}, title = {Genomic sequencing of different sequevars of Ralstonia solanacearum belonging to the Moko ecotype.}, journal = {Genetics and molecular biology}, volume = {44}, number = {1}, pages = {e20200172}, doi = {10.1590/1678-4685-GMB-2020-0172}, pmid = {33458736}, issn = {1415-4757}, abstract = {Banana vascular wilt or Moko is a disease caused by Ralstonia solanacearum. This study aimed to sequence, assemble, annotate, and compare the genomes of R. solanacearum Moko ecotypes of different sequevar strains from Brazil. Average nucleotide identity analyses demonstrated a high correlation (> 96%) between the genome sequences of strains CCRMRs277 (sequevar IIA-24), CCRMRs287 (IIB-4), CCRMRs304 (IIA-24), and CCRMRsB7 (IIB-25), which were grouped into phylotypes IIA and IIB. The number of coding sequences present in chromosomes and megaplasmids varied from 3,070 to 3,521 and 1,669 to 1,750, respectively. Pangenome analysis identified 3,378 clusters in the chromosomes, of which 2,604 were shared by all four analyzed genomes and 2,580 were single copies. In megaplasmids, 1,834 clusters were identified, of which 1,005 were shared by all four genomes and 992 were identified as single copies. Strains CCRMRsB7 and CCRMRs287 differed from the others by having unique clusters in both their chromosomes and megaplasmids, and CCRMRsB7 possessed the largest genome among all Moko ecotype strains sequenced to date. Therefore, the genomic information obtained in this study provides a theoretical basis for the identification, characterization, and phylogenetic analysis of R. solanacearum Moko ecotypes.}, } @article {pmid33452249, year = {2021}, author = {Rai, A and Hirakawa, H and Nakabayashi, R and Kikuchi, S and Hayashi, K and Rai, M and Tsugawa, H and Nakaya, T and Mori, T and Nagasaki, H and Fukushi, R and Kusuya, Y and Takahashi, H and Uchiyama, H and Toyoda, A and Hikosaka, S and Goto, E and Saito, K and Yamazaki, M}, title = {Chromosome-level genome assembly of Ophiorrhiza pumila reveals the evolution of camptothecin biosynthesis.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {405}, pmid = {33452249}, issn = {2041-1723}, support = {19K16391//MEXT | Japan Society for the Promotion of Science (JSPS)/ ; 16H06454//MEXT | Japan Society for the Promotion of Science (JSPS)/ ; 19H05652//MEXT | Japan Society for the Promotion of Science (JSPS)/ ; 16H06279//MEXT | Japan Society for the Promotion of Science (JSPS)/ ; 17ak0101046h0002//Japan Agency for Medical Research and Development (AMED)/ ; }, abstract = {Plant genomes remain highly fragmented and are often characterized by hundreds to thousands of assembly gaps. Here, we report chromosome-level reference and phased genome assembly of Ophiorrhiza pumila, a camptothecin-producing medicinal plant, through an ordered multi-scaffolding and experimental validation approach. With 21 assembly gaps and a contig N50 of 18.49 Mb, Ophiorrhiza genome is one of the most complete plant genomes assembled to date. We also report 273 nitrogen-containing metabolites, including diverse monoterpene indole alkaloids (MIAs). A comparative genomics approach identifies strictosidine biogenesis as the origin of MIA evolution. The emergence of strictosidine biosynthesis-catalyzing enzymes precede downstream enzymes' evolution post γ whole-genome triplication, which occurred approximately 110 Mya in O. pumila, and before the whole-genome duplication in Camptotheca acuminata identified here. Combining comparative genome analysis, multi-omics analysis, and metabolic gene-cluster analysis, we propose a working model for MIA evolution, and a pangenome for MIA biosynthesis, which will help in establishing a sustainable supply of camptothecin.}, } @article {pmid33446715, year = {2021}, author = {Bravakos, P and Mandalakis, M and Nomikou, P and Anastasiou, TI and Kristoffersen, JB and Stavroulaki, M and Kilias, S and Kotoulas, G and Magoulas, A and Polymenakou, PN}, title = {Genomic adaptation of Pseudomonas strains to acidity and antibiotics in hydrothermal vents at Kolumbo submarine volcano, Greece.}, journal = {Scientific reports}, volume = {11}, number = {1}, pages = {1336}, pmid = {33446715}, issn = {2045-2322}, support = {MIS5002470//Operational Programme "Competitiveness, Entrepreneurship and Innovation" (NSRF 2014-2020)/ ; MIS5002670//Operational Programme "Competitiveness, Entrepreneurship and Innovation" (NSRF 2014-2020)/ ; }, abstract = {Although the rise of antibiotic and multidrug resistant bacteria is one of the biggest current threats to human health, our understanding of the mechanisms involved in antibiotic resistance selection remains scarce. We performed whole genome sequencing of 21 Pseudomonas strains, previously isolated from an active submarine volcano of Greece, the Kolumbo volcano. Our goal was to identify the genetic basis of the enhanced co-tolerance to antibiotics and acidity of these Pseudomonas strains. Pangenome analysis identified 10,908 Gene Clusters (GCs). It revealed that the numbers of phage-related GCs and sigma factors, which both provide the mechanisms of adaptation to environmental stressors, were much higher in the high tolerant Pseudomonas strains compared to the rest ones. All identified GCs of these strains were associated with antimicrobial and multidrug resistance. The present study provides strong evidence that the CO2-rich seawater of the volcano associated with low pH might be a reservoir of microorganisms carrying multidrug efflux-mediated systems and pumps. We, therefore, suggest further studies of other extreme environments (or ecosystems) and their associated physicochemical parameters (or factors) in the rise of antibiotic resistance.}, } @article {pmid33438035, year = {2021}, author = {Heaton, MP and Smith, TPL and Bickhart, DM and Vander Ley, BL and Kuehn, LA and Oppenheimer, J and Shafer, WR and Schuetze, FT and Stroud, B and McClure, JC and Barfield, JP and Blackburn, HD and Kalbfleisch, TS and Davenport, KM and Kuhn, KL and Green, RE and Shapiro, B and Rosen, BD}, title = {A reference genome assembly of Simmental cattle, Bos taurus taurus.}, journal = {The Journal of heredity}, volume = {}, number = {}, pages = {}, doi = {10.1093/jhered/esab002}, pmid = {33438035}, issn = {1465-7333}, abstract = {Genomics research has relied principally on the establishment and curation of a reference genome for the species. However, it is increasingly recognized that a single reference genome cannot fully describe the extent of genetic variation within many widely-distributed species. Pangenome representations are based on high-quality genome assemblies of multiple individuals and intended to represent the broadest possible diversity within a species. A Bovine Pangenome Consortium (BPC) has recently been established to begin assembling genomes from more than 600 recognized breeds of cattle, together with other related species to provide information on ancestral alleles and haplotypes. Previously reported de novo genome assemblies for Angus, Brahman, Hereford, and Highland breeds of cattle are part of the initial BPC effort. The present report describes a complete single haplotype assembly at chromosome-scale for a fullblood Simmental cow from an F1 bison-cattle hybrid fetus by trio binning. Simmental cattle, also known as Fleckvieh due to their red and white spots, originated in central Europe in the 1830's as a triple-purpose breed selected for draught, meat, and dairy production. There are over 50 million Simmental cattle in the world, known today for their fast growth and beef yields. This assembly (ARS_Simm1.0) is similar in length to the other bovine assemblies at 2.86 Gb, with a scaffold N50 of 102 Mb (max scaffold 156.8 Mb) and meets or exceeds the continuity of the best B. taurus reference assemblies to date.}, } @article {pmid33431432, year = {2021}, author = {Hasan, NA and Norton, GJ and Virdi, R and Epperson, LE and Vang, CK and Hellbusch, B and Bai, X and Chan, ED and Strong, M and Honda, JR}, title = {Measurable genomic changes in Mycobacterium avium subsp. hominissuis after long-term adaptation in Acanthamoeba lenticulata and reduced persistence in macrophages.}, journal = {Journal of bacteriology}, volume = {}, number = {}, pages = {}, doi = {10.1128/JB.00257-20}, pmid = {33431432}, issn = {1098-5530}, abstract = {Free-living amoebae are ubiquitous in aquatic environments and act as environmental reservoirs for nontuberculous mycobacteria. Mycobacterium avium subsp. hominissuis recovered from Acanthamoeba has been demonstrated to be more virulent in both human and murine models. Here, we investigate the persistence of M. avium subsp. hominissuis after short-term (2 weeks) and long-term (42 weeks) co-culture in Acanthamoeba lenticulata We hypothesize that A. lenticulata-adapted M. avium subsp. hominissuis demonstrate phenotypic and genomic changes facilitating intracellular persistence in naïve Acanthamoeba and human macrophages. M. avium subsp. hominissuis CFU in co-culture with A. lenticulata were recorded every 2 weeks up to 60 weeks. While A. lenticulata-associated M. avium subsp. hominissuis CFU did not significantly change across 60 weeks of co-culture, longer adaptation time in amoebae reduced colony size. Isolates recovered after 2 or 42 weeks of amoebae co-culture were referred as "early-adapted" and "late-adapted" M. avium subsp. hominissuis, respectively. Whole genome sequencing was performed on amoebae-adapted isolates with pan-genome comparisons to the original M. avium subsp. hominissuis isolate. Next, amoebae-adapted isolates were assessed for their persistence in A. lenticulata,A. castellanii, and human THP-1 macrophages. Multiplex cytokine/chemokine analyses were conducted on THP-1 culture supernatants. Compared to the original isolate, counts of late-adapted M. avium subsp. hominissuis were reduced in Acanthamoeba and contrary to expectations, lower counts were also observed in THP-1 macrophages with concomitant decrease in TNFa, IL-6, and MIP-1b suggesting that host adaptation may influence the inflammatory properties of M. aviumIMPORTANCE Short-term interaction between Acanthamoeba and M. avium has been demonstrated to increase infectivity in human and murine models of infection, establishing the paradigm that amoebae "train" M. avium in the environment by selecting for phenotypes capable of enduring in human cells. We investigate this phenomenon further by determining the consequence of long-term amoebae adaptation on M. avium subsp. hominissuis persistence in host cells. We monitored genomic changes across long-term Acanthamoeba co-culture and report significant changes to the M. avium subsp. hominissuis genome in response to amoebae-adaptation and reduced colony size. Furthermore, we examined isolates co-cultured with A. lenticulata for 2 or 42 weeks and provide biological evidence that long-term co-culture in amoebae reduces M. avium persistence in human macrophages.}, } @article {pmid33430372, year = {2021}, author = {Firrao, G and Scortichini, M and Pagliari, L}, title = {Orthology-Based Estimate of the Contribution of Horizontal Gene Transfer from Distantly Related Bacteria to the Intraspecific Diversity and Differentiation of Xylella fastidiosa.}, journal = {Pathogens (Basel, Switzerland)}, volume = {10}, number = {1}, pages = {}, doi = {10.3390/pathogens10010046}, pmid = {33430372}, issn = {2076-0817}, abstract = {Xylella fastidiosa is a xylem-limited bacterium phylogenetically related to the xanthomonads, with an unusually large and diversified range of plant hosts. To ascertain the origin of its peculiarities, its pan-genome was scanned to identify the genes that are not coherent with its phylogenetic position within the order Xanthomonadales. The results of the analysis revealed that a large fraction of the genes of the Xylella pan-genome have no ortholog or close paralog in the order Xanthomonadales. For a significant part of the genes, the closest homologue was found in bacteria belonging to distantly related taxonomic groups, most frequently in the Betaproteobacteria. Other species, such as Xanthomonas vasicola and Xanthomonas albilineans which were investigated for comparison, did not show a similar genetic contribution from distant branches of the prokaryotic tree of life. This finding indicates that the process of acquisition of DNA from the environment is still a relevant component of Xylella fastidiosa evolution. Although the ability of Xylella fastidiosa strains to recombine among themselves is well known, the results of the pan-genome analyses stressed the additional relevance of environmental DNA in shaping their genomes, with potential consequences on their phytopathological features.}, } @article {pmid33429431, year = {2021}, author = {Du, H and Diao, C and Zhao, P and Zhou, L and Liu, JF}, title = {Integrated hybrid de novo assembly technologies to obtain high-quality pig genome using short and long reads.}, journal = {Briefings in bioinformatics}, volume = {}, number = {}, pages = {}, doi = {10.1093/bib/bbaa399}, pmid = {33429431}, issn = {1477-4054}, abstract = {With the rapid progress of sequencing technologies, various types of sequencing reads and assembly algorithms have been designed to construct genome assemblies. Although recent studies have attempted to evaluate the appropriate type of sequencing reads and algorithms for assembling high-quality genomes, it is still a challenge to set the correct combination for constructing animal genomes. Here, we present a comparative performance assessment of 14 assembly combinations-9 software programs with different short and long reads of Duroc pig. Based on the results of the optimization process for genome construction, we designed an integrated hybrid de novo assembly pipeline, HSCG, and constructed a draft genome for Duroc pig. Comparison between the new genome and Sus scrofa 11.1 revealed important breakpoints in two S. scrofa 11.1 genes. Our findings may provide new insights into the pan-genome analysis studies of agricultural animals, and the integrated assembly pipeline may serve as a guide for the assembly of other animal genomes.}, } @article {pmid33428861, year = {2021}, author = {Harrison, F and Smyth, AR}, title = {Professor Pangloss and the Pangenome: Does Staphylococcus aureus Have the Best of All Possible Worlds?.}, journal = {American journal of respiratory and critical care medicine}, volume = {}, number = {}, pages = {}, doi = {10.1164/rccm.202012-4533ED}, pmid = {33428861}, issn = {1535-4970}, } @article {pmid33421537, year = {2021}, author = {Wang, M and Ruan, R and Li, H}, title = {The completed genome sequence of the pathogenic ascomycete fungus Penicillium digitatum.}, journal = {Genomics}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.ygeno.2021.01.001}, pmid = {33421537}, issn = {1089-8646}, abstract = {P. digitatum, the causative agent of green mold, is one of the most destructive pathogens in the citrus industry. To facilitate basal researches on this important plant pathogen, here we report a finished genome sequence for P. digitatum strain PDW03 using a combination of Illumina, PacBio, and Hi-C sequencing technologies. The assembly comprised 6 chromosomes from telomere to telomere and encodes approximately 9000 proteins. Genomic re-analyses identified 302 Carbohydrate-active enzymes, 420 secreted proteins, and 39 secondary metabolite (SM) gene clusters. Furthermore, we found 10 fragmentary SM clusters in the P. digitatum PDW03 genome. Pangenome analysis based on 5 P. digitatum genomes available showed that conserved orthogroups account for ~68% of the species pangenome. Taken together, this fully completed P. digitatum genome will provide an optimum resource for further researches to investigate the driving forces of fungal host switch and effectors functioning in plant-pathogen interaction.}, } @article {pmid33419343, year = {2020}, author = {Higdon, SM and Huang, BC and Bennett, AB and Weimer, BC}, title = {Identification of Nitrogen Fixation Genes in Lactococcus Isolated from Maize Using Population Genomics and Machine Learning.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, doi = {10.3390/microorganisms8122043}, pmid = {33419343}, issn = {2076-2607}, support = {2019-67013-29724//united states department of agriculture - NIFA/ ; }, abstract = {Sierra Mixe maize is a landrace variety from Oaxaca, Mexico, that utilizes nitrogen derived from the atmosphere via an undefined nitrogen fixation mechanism. The diazotrophic microbiota associated with the plant's mucilaginous aerial root exudate composed of complex carbohydrates was previously identified and characterized by our group where we found 23 lactococci capable of biological nitrogen fixation (BNF) without containing any of the proposed essential genes for this trait (nifHDKENB). To determine the genes in Lactococcus associated with this phenotype, we selected 70 lactococci from the dairy industry that are not known to be diazotrophic to conduct a comparative population genomic analysis. This showed that the diazotrophic lactococcal genomes were distinctly different from the dairy isolates. Examining the pangenome followed by genome-wide association study and machine learning identified genes with the functions needed for BNF in the maize isolates that were absent from the dairy isolates. Many of the putative genes received an 'unknown' annotation, which led to the domain analysis of the 135 homologs. This revealed genes with molecular functions needed for BNF, including mucilage carbohydrate catabolism, glycan-mediated host adhesion, iron/siderophore utilization, and oxidation/reduction control. This is the first report of this pathway in this organism to underpin BNF. Consequently, we proposed a model needed for BNF in lactococci that plausibly accounts for BNF in the absence of the nif operon in this organism.}, } @article {pmid33417534, year = {2021}, author = {Horesh, G and Blackwell, GA and Tonkin-Hill, G and Corander, J and Heinz, E and Thomson, NR}, title = {A comprehensive and high-quality collection of Escherichia coli genomes and their genes.}, journal = {Microbial genomics}, volume = {}, number = {}, pages = {}, doi = {10.1099/mgen.0.000499}, pmid = {33417534}, issn = {2057-5858}, } @article {pmid33413118, year = {2021}, author = {Nzoyikorera, N and Diawara, I and Fresia, P and Maaloum, F and Katfy, K and Nayme, K and Maaloum, M and Cornick, J and Chaguza, C and Timinouni, M and Belabess, H and Zerouali, K and Elmdaghri, N}, title = {Whole genomic comparative analysis of Streptococcus pneumoniae serotype 1 isolates causing invasive and non-invasive infections among children under 5 years in Casablanca, Morocco.}, journal = {BMC genomics}, volume = {22}, number = {1}, pages = {39}, pmid = {33413118}, issn = {1471-2164}, support = {OPP1023440//Bill and Melinda Gates Foundation/ ; }, abstract = {BACKGROUND: Streptococcus pneumoniae serotype 1 remains a leading cause of invasive pneumococcal diseases, even in countries with PCV-10/PCV-13 vaccine implementation. The main objective of this study, which is part of the Pneumococcal African Genome project (PAGe), was to determine the phylogenetic relationships of serotype 1 isolates recovered from children patients in Casablanca (Morocco), compared to these from other African countries; and to investigate the contribution of accessory genes and recombination events to the genetic diversity of this serotype.

RESULTS: The genome average size of the six-pneumococcus serotype 1 from Casablanca was 2,227,119 bp, and the average content of coding sequences was 2113, ranging from 2041 to 2161. Pangenome analysis of the 80 genomes used in this study revealed 1685 core genes and 1805 accessory genes. The phylogenetic tree based on core genes and the hierarchical bayesian clustering analysis revealed five sublineages with a phylogeographic structure by country. The Moroccan strains cluster in two different lineages, the five invasive strains clusters altogether in a divergent clade distantly related to the non-invasive strain, that cluster with all the serotype 1 genomes from Africa.

CONCLUSIONS: The whole genome sequencing provides increased resolution analysis of the highly virulent serotype 1 in Casablanca, Morocco. Our results are concordant with previous works, showing that the phylogeography of S. pneumoniae serotype 1 is structured by country, and despite the small size (six isolates) of the Moroccan sample, our analysis shows the genetic cohesion of the Moroccan invasive isolates.}, } @article {pmid33397904, year = {2021}, author = {Yahara, K and Suzuki, M and Hirabayashi, A and Suda, W and Hattori, M and Suzuki, Y and Okazaki, Y}, title = {Long-read metagenomics using PromethION uncovers oral bacteriophages and their interaction with host bacteria.}, journal = {Nature communications}, volume = {12}, number = {1}, pages = {27}, pmid = {33397904}, issn = {2041-1723}, abstract = {Bacteriophages (phages), or bacterial viruses, are very diverse and highly abundant worldwide, including as a part of the human microbiomes. Although a few metagenomic studies have focused on oral phages, they relied on short-read sequencing. Here, we conduct a long-read metagenomic study of human saliva using PromethION. Our analyses, which integrate both PromethION and HiSeq data of >30 Gb per sample with low human DNA contamination, identify hundreds of viral contigs; 0-43.8% and 12.5-56.3% of the confidently predicted phages and prophages, respectively, do not cluster with those reported previously. Our analyses demonstrate enhanced scaffolding, and the ability to place a prophage in its host genomic context and enable its taxonomic classification. Our analyses also identify a Streptococcus phage/prophage group and nine jumbo phages/prophages. 86% of the phage/prophage group and 67% of the jumbo phages/prophages contain remote homologs of antimicrobial resistance genes. Pan-genome analysis of the phages/prophages reveals remarkable diversity, identifying 0.3% and 86.4% of the genes as core and singletons, respectively. Furthermore, our study suggests that oral phages present in human saliva are under selective pressure to escape CRISPR immunity. Our study demonstrates the power of long-read metagenomics utilizing PromethION in uncovering bacteriophages and their interaction with host bacteria.}, } @article {pmid33397434, year = {2021}, author = {Della Coletta, R and Qiu, Y and Ou, S and Hufford, MB and Hirsch, CN}, title = {How the pan-genome is changing crop genomics and improvement.}, journal = {Genome biology}, volume = {22}, number = {1}, pages = {3}, pmid = {33397434}, issn = {1474-760X}, support = {IOS-1546727//National Science Foundation/ ; IOS-1934384//National Science Foundation/ ; IOS-1744001//National Science Foundation/ ; IOS-1546719//National Science Foundation/ ; 2018-67013-27571//National Institute of Food and Agriculture/ ; }, abstract = {Crop genomics has seen dramatic advances in recent years due to improvements in sequencing technology, assembly methods, and computational resources. These advances have led to the development of new tools to facilitate crop improvement. The study of structural variation within species and the characterization of the pan-genome has revealed extensive genome content variation among individuals within a species that is paradigm shifting to crop genomics and improvement. Here, we review advances in crop genomics and how utilization of these tools is shifting in light of pan-genomes that are becoming available for many crop species.}, } @article {pmid33396617, year = {2020}, author = {Fontana, F and Alessandri, G and Lugli, GA and Mancabelli, L and Longhi, G and Anzalone, R and Viappiani, A and Ventura, M and Turroni, F and Milani, C}, title = {Probiogenomics Analysis of 97 Lactobacilluscrispatus Strains as a Tool for the Identification of Promising Next-Generation Probiotics.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, doi = {10.3390/microorganisms9010073}, pmid = {33396617}, issn = {2076-2607}, support = {-//Fondazione Cariparma/ ; }, abstract = {Members of the genus Lactobacillus represent the most common colonizers of the human vagina and are well-known for preserving vaginal health and contrasting the colonization of opportunistic pathogens. Remarkably, high abundance of Lactobacillus crispatus in the vaginal environment has been linked to vaginal health, leading to the widespread use of many L. crispatus strains as probiotics. Nevertheless, despite the scientific and industrial relevance of this species, a comprehensive investigation of the genomics of L. crispatus taxon is still missing. For this reason, we have performed a comparative genomics analysis of 97 L. crispatus strains, encompassing 16 strains sequenced in the framework of this study alongside 81 additional publicly available genome sequences. Thus, allowing the dissection of the L.crispatus pan-genome and core-genome followed by a comprehensive phylogenetic analysis based on the predicted core genes that revealed clustering based on ecological origin. Subsequently, a genomics-targeted approach, i.e., probiogenomics analysis, was applied for in-depth analysis of the eight L. crispatus strains of human origin sequenced in this study. In detail their genetic repertoire was screened for strain-specific genes responsible for phenotypic features that may guide the identification of optimal candidates for next-generation probiotics. The latter includes bacteriocin production, carbohydrates transport and metabolism, as well as a range of features that may be responsible for improved ecological fitness. In silico results regarding the genetic repertoire involved in carbohydrate metabolism were also validated by growth assays on a range of sugars, leading to the selection of putative novel probiotic strains.}, } @article {pmid33383865, year = {2020}, author = {Wibberg, D and Price-Carter, M and Rückert, C and Blom, J and Möbius, P}, title = {Complete Genome Sequence of Ovine Mycobacterium avium subsp. paratuberculosis Strain JIII-386 (MAP-S/type III) and Its Comparison to MAP-S/type I, MAP-C, and M. avium Complex Genomes.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, doi = {10.3390/microorganisms9010070}, pmid = {33383865}, issn = {2076-2607}, abstract = {Mycobacterium avium (M. a.) subsp. paratuberculosis (MAP) is a worldwide-distributed obligate pathogen in ruminants causing Johne's disease. Due to a lack of complete subtype III genome sequences, there is not yet conclusive information about genetic differences between strains of cattle (MAP-C, type II) and sheep (MAP-S) type, and especially between MAP-S subtypes I, and III. Here we present the complete, circular genome of MAP-S/type III strain JIII-386 (DE) closed by Nanopore-technology and its comparison with MAP-S/type I closed genome of strain Telford (AUS), MAP-S/type III draft genome of strain S397 (U.S.), twelve closed MAP-C strains, and eight closed M.-a.-complex-strains. Structural comparative alignments revealed clearly the mosaic nature of MAP, emphasized differences between the subtypes and the higher diversity of MAP-S genomes. The comparison of various genomic elements including transposases and genomic islands provide new insights in MAP genomics. MAP type specific phenotypic features may be attributed to genes of known large sequence polymorphisms (LSPS s) regions I-IV and deletions #1 and #2, confirmed here, but could also result from identified frameshifts or interruptions of various virulence-associated genes (e.g., mbtC in MAP-S). Comprehensive core and pan genome analysis uncovered unique genes (e.g., cytochromes) and genes probably acquired by horizontal gene transfer in different MAP-types and subtypes, but also emphasized the highly conserved and close relationship, and the complex evolution of M.-a.-strains.}, } @article {pmid33383801, year = {2020}, author = {Yang, SM and Baek, J and Kim, E and Kim, HB and Ko, S and Kim, D and Yoon, H and Kim, HY}, title = {Development of a Genoserotyping Method for Salmonella Infantis Detection on the Basis of Pangenome Analysis.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, doi = {10.3390/microorganisms9010067}, pmid = {33383801}, issn = {2076-2607}, support = {19162MFDS042//Ministry of Food and Drug Safety/ ; }, abstract = {In recent years, Salmonella Infantis has become a predominant serovariant in clinical and poultry isolates, thereby imposing a substantial economic burden on both public health and the livestock industry. With the aim of coping with the steep increase in serovar Infantis prevalence, a polymerase chain reaction (PCR)-based rapid and accurate diagnostic assay was developed in this study through pangenome profiling of 60 Salmonella serovars. A gene marker, SIN_02055, was identified, which is present in the S. Infantis genome but not in the pangenome of the other serovars. Primers specific to SIN_02055 were used to accurately detect serovar Infantis, and to successfully differentiate Infantis from the other 59 serovars in real-time PCR with a R2 of 0.999 and an efficiency of 95.76%. The developed method was applied to 54 Salmonella strains belonging to eight dominant serovars, and distinguished Infantis from the other seven serovars with an accuracy of 100%. The diagnostic primer set also did not show false positive amplification with 32 strains from eight non-Salmonella bacterial species. This cost-effective and rapid method can be considered an alternative to the classic serotyping using antisera.}, } @article {pmid33381850, year = {2020}, author = {Bazin, A and Gautreau, G and Médigue, C and Vallenet, D and Calteau, A}, title = {panRGP: a pangenome-based method to predict genomic islands and explore their diversity.}, journal = {Bioinformatics (Oxford, England)}, volume = {36}, number = {Supplement_2}, pages = {i651-i658}, doi = {10.1093/bioinformatics/btaa792}, pmid = {33381850}, issn = {1367-4811}, abstract = {MOTIVATION: Horizontal gene transfer (HGT) is a major source of variability in prokaryotic genomes. Regions of genome plasticity (RGPs) are clusters of genes located in highly variable genomic regions. Most of them arise from HGT and correspond to genomic islands (GIs). The study of those regions at the species level has become increasingly difficult with the data deluge of genomes. To date, no methods are available to identify GIs using hundreds of genomes to explore their diversity.

RESULTS: We present here the panRGP method that predicts RGPs using pangenome graphs made of all available genomes for a given species. It allows the study of thousands of genomes in order to access the diversity of RGPs and to predict spots of insertions. It gave the best predictions when benchmarked along other GI detection tools against a reference dataset. In addition, we illustrated its use on metagenome assembled genomes by redefining the borders of the leuX tRNA hotspot, a well-studied spot of insertion in Escherichia coli. panRPG is a scalable and reliable tool to predict GIs and spots making it an ideal approach for large comparative studies.

The methods presented in the current work are available through the following software: https://github.com/labgem/PPanGGOLiN. Detailed results and scripts to compute the benchmark metrics are available at https://github.com/axbazin/panrgp_supdata.}, } @article {pmid33375492, year = {2020}, author = {Surachat, K and Kantachote, D and Deachamag, P and Wonglapsuwan, M}, title = {Genomic Insight into Pediococcus acidilactici HN9, a Potential Probiotic Strain Isolated from the Traditional Thai-Style Fermented Beef Nhang.}, journal = {Microorganisms}, volume = {9}, number = {1}, pages = {}, doi = {10.3390/microorganisms9010050}, pmid = {33375492}, issn = {2076-2607}, abstract = {Pediococcus acidilactici HN9 is a beneficial lactic acid bacterium isolated from Nhang, a traditional Thai-style fermented beef. In this study, the molecular properties of P. acidilactici HN9 were characterized to provide insights into its potential probiotic activity. Specifically, this work sought to report the complete genome of P. acidilactici HN9 and perform a comparative genome analysis with other bacterial strains belonging to the genus Pediococcus. Genomic features of HN9 were compared with those of all other bacterial Pediococcus strains to examine the adaptation, evolutionary relationships, and diversity within this genus. Additionally, several bioinformatic approaches were used to investigate phylogenetic relationships, genome stability, virulence factors, bacteriocin production, and antimicrobial resistance genes of the HN9 strain, as well as to ensure its safety as a potential starter culture in food applications. A 2,034,522 bp circular chromosome and two circular plasmids, designated pHN9-1 (42,239-bp) and pHN9-2 (30,711-bp), were detected, and used for pan-genome analysis, as well as for identification of bacteriocin-encoding genes in 129 strains belonging to all Pediococcus species. Two CRISPR regions were identified in P. acidilactici HN9, including type II-A CRISPR/CRISPR-associated (Cas). This study provides an in-depth analysis on P. acidilactici HN9, facilitating a better understanding of its adaptability to different environments and its mechanism to maintain genome stability over time.}, } @article {pmid33371442, year = {2020}, author = {Blesa, A and Baquedano, I and González-de la Fuente, S and Mencía, M and Berenguer, J}, title = {Integrative and Conjugative Element ICETh1 Functions as a Pangenomic DNA Capture Module in Thermus thermophilus.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33371442}, issn = {2076-2607}, support = {PID2019-109073RB-I00//Spanish Ministry of Science and Innovation/ ; BIO2016-77031-R//Spanish Ministry of Science and Innovation/ ; }, abstract = {Transjugation is an unconventional conjugation mechanism in Thermus thermophilus (Tth) that involves the active participation of both mating partners, encompassing a DNA secretion system (DSS) in the donor and an active natural competence apparatus (NCA) in the recipient cells. DSS is encoded within an integrative and conjugative element (ICETh1) in the strain Tth HB27, whereas the NCA is constitutively expressed in both mates. Previous experiments suggested the presence of multiple origins of transfer along the genome, which could generate genomic mosaicity among the progeny. Here, we designed transjugation experiments between two closely related strains of Tth with highly syntenic genomes, containing enough single nucleotide polymorphisms to allow precise parenthood analysis. Individual clones from the progeny were sequenced, revealing their origin as derivatives of our ICETh1-containing intended "donor" strain (HB27), which had acquired separate fragments from the genome of the ICETh1-free HB8 cells, which are our intended recipient. Due to the bidirectional nature of transjugation, only assays employing competence-defective HB27 derivatives as donors allowed the recovery of HB8-derived progeny. These results show a preference for a retrotransfer mechanism in transjugation in ICETh1-bearing strains, supporting an inter-strain gene-capture function for ICETh1. This function could benefit the donor-capable host by facilitating the acquisition of adaptive traits from external sources, ultimately increasing the open pangenome of Thermus, maximizing the potential repertoire of physiological and phenotypical traits related to adaptation and speciation.}, } @article {pmid33362726, year = {2020}, author = {Verma, DK and Chaudhary, C and Singh, L and Sidhu, C and Siddhardha, B and Prasad, SE and Thakur, KG}, title = {Isolation and Taxonomic Characterization of Novel Haloarchaeal Isolates From Indian Solar Saltern: A Brief Review on Distribution of Bacteriorhodopsins and V-Type ATPases in Haloarchaea.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {554927}, pmid = {33362726}, issn = {1664-302X}, abstract = {Haloarchaea inhabit high salinity environments worldwide. They are a potentially rich source of crucial biomolecules like carotenoids and industrially useful proteins. However, diversity in haloarchaea present in Indian high salinity environments is poorly studied. In the present study, we isolated 12 haloarchaeal strains from hypersaline Kottakuppam, Tamil Nadu solar saltern in India. 16S rRNA based taxonomic characterization of these isolates suggested that nine of them are novel strains that belong to genera Haloarcula, Halomicrobium, and Haloferax. Transmission electron microscopy suggests the polymorphic nature of these haloarchaeal isolates. Most of the haloarchaeal species are known to be high producers of carotenoids. We were able to isolate carotenoids from all these 12 isolates. The UV-Vis spectroscopy-based analysis suggests that bacterioruberin and lycopene are the major carotenoids produced by these isolates. Based on the visual inspection of the purified carotenoids, the isolates were classified into two broad categories i.e., yellow and orange, attributed to the differences in the ratio of bacterioruberin and lycopene as confirmed by the UV-Vis spectral analysis. Using a PCR-based screening assay, we were able to detect the presence of the bacteriorhodopsin gene (bop) in 11 isolates. We performed whole-genome sequencing for three bop positive and one bop negative haloarchaeal isolates. Whole-genome sequencing, followed by pan-genome analysis identified multiple unique genes involved in various biological functions. We also successfully cloned, expressed, and purified functional recombinant bacteriorhodopsin (BR) from one of the isolates using Escherichia coli as an expression host. BR has light-driven proton pumping activity resulting in the proton gradient across the membrane, which is utilized by V-Type ATPases to produce ATP. We analyzed the distribution of bop and other accessory genes involved in functional BR expression and ATP synthesis in all the representative haloarchaeal species. Our bioinformatics-based analysis of all the sequenced members of genus Haloarcula suggests that bop, if present, is usually inserted between the genes coding for B and D subunits of the V-type ATPases operon. This study provides new insights into the genomic variations in haloarchaea and reports expression of new BR variant having good expression in functional form in E. coli.}, } @article {pmid33360877, year = {2020}, author = {Li, F and Ye, Q and Chen, M and Zhou, B and Xiang, X and Wang, C and Shang, Y and Zhang, J and Pang, R and Wang, J and Xue, L and Cai, S and Ding, Y and Wu, Q}, title = {Mining of novel target genes through pan-genome analysis for multiplex PCR differentiation of the major Listeria monocytogenes serotypes.}, journal = {International journal of food microbiology}, volume = {339}, number = {}, pages = {109026}, doi = {10.1016/j.ijfoodmicro.2020.109026}, pmid = {33360877}, issn = {1879-3460}, abstract = {The abundant information provided by the pan-genome analysis approach reveals the diversity among Listeria monocytogenes serotypes. The objective of this study was to mine novel target genes using pan-genome analysis for multiplex PCR detection and differentiation of the major L. monocytogenes serotypes present in food. Pan-genome analysis and PCR validation revealed a total of 10 specific targets: one for lineage I, two for serogroup I.1, one for serogroup I.2, two for lineage II, one for serogroup II.1, three for lineage III. Primers for the novel targets were highly specific in individual reactions. The detection limits were 103-104 colony-forming units (CFU)/mL in pure bacterial cultures, meeting the requirements of molecular detection. Based on these novel targets, two new "lineage" multiplex PCR assays were developed to simultaneously distinguish between three lineages (I, II, and III) and five major serotypes (1/2a, 1/2b, 1/2c, 4b, and 4c) of L. monocytogenes. The detection limits of lineage I and lineage II&III mPCRs were 0.771 pg/μL and 1.76 pg/μL genomic DNA, respectively. The specificity of the mPCRs was robustly verified using other L. monocytogenes and non-L. monocytogenes serotypes. These results suggest that the two "lineage" multiplex PCRs based on novel targets offer a promising approach for accurate, sensitive, and rapid identification of L. monocytogenes serotypes.}, } @article {pmid33360413, year = {2020}, author = {Lassalle, F and Dastgheib, SMM and Zhao, FJ and Zhang, J and Verbarg, S and Frühling, A and Brinkmann, H and Osborne, TH and Sikorski, J and Balloux, F and Didelot, X and Santini, JM and Petersen, J}, title = {Phylogenomics reveals the basis of adaptation of Pseudorhizobium species to extreme environments and supports a taxonomic revision of the genus.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {1}, pages = {126165}, doi = {10.1016/j.syapm.2020.126165}, pmid = {33360413}, issn = {1618-0984}, abstract = {The family Rhizobiaceae includes many genera of soil bacteria, often isolated for their association with plants. Herein, we investigate the genomic diversity of a group of Rhizobium species and unclassified strains isolated from atypical environments, including seawater, rock matrix or polluted soil. Based on whole-genome similarity and core genome phylogeny, we show that this group corresponds to the genus Pseudorhizobium. We thus reclassify Rhizobium halotolerans, R. marinum, R. flavum and R. endolithicum as P. halotolerans sp. nov., P. marinum comb. nov., P. flavum comb. nov. and P. endolithicum comb. nov., respectively, and show that P. pelagicum is a synonym of P. marinum. We also delineate a new chemolithoautotroph species, P. banfieldiae sp. nov., whose type strain is NT-26T (=DSM 106348T=CFBP 8663T). This genome-based classification was supported by a chemotaxonomic comparison, with increasing taxonomic resolution provided by fatty acid, protein and metabolic profiles. In addition, we used a phylogenetic approach to infer scenarios of duplication, horizontal transfer and loss for all genes in the Pseudorhizobium pangenome. We thus identify the key functions associated with the diversification of each species and higher clades, shedding light on the mechanisms of adaptation to their respective ecological niches. Respiratory proteins acquired at the origin of Pseudorhizobium were combined with clade-specific genes to enable different strategies for detoxification and nutrition in harsh, nutrient-poor environments.}, } @article {pmid33347948, year = {2020}, author = {Pardini Gontijo, MT and Pereira Vidigal, PM and Soto Lopez, ME and Brocchi, M}, title = {Bacteriophages that infect Gram-negative bacteria as source of signal-arrest-release motif lysins.}, journal = {Research in microbiology}, volume = {}, number = {}, pages = {103794}, doi = {10.1016/j.resmic.2020.103794}, pmid = {33347948}, issn = {1769-7123}, abstract = {Treatment of infections caused by multidrug-resistant (MDR) Gram-negative bacteria is challenging, a potential solution for which is the use of bacteriophage-derived lytic enzymes. However, the exogenous action of bacteriophage lysins against Gram-negative bacteria is hindered due to the presence of an impermeable outer membrane in these bacteria. Nevertheless, recent research has demonstrated that some lysins are capable of permeating the outer membrane of Gram-negative bacteria with the help of signal peptides. In the present study, we investigated the genomes of 309 bacteriophages that infect Gram-negative pathogens of clinical interest in order to determine the evolutionary markers of signal peptide-containing lysins. Complete genomes displayed 265 putative lysins, of which 17 (6.41%) contained signal-arrest-release motifs and 41 (15.47%) contained cleavable signal peptides. There was no apparent relationship between host specificity and lysin diversity. Nevertheless, the evolution of lysin genes might not be independent of the rest of the bacteriophage genome once pan-genome clustering and lysin diversity appear to be correlated. In addition, signal peptide- and signal-arrest-release-containing lysins were monophyletically distributed in the protein cladogram, suggesting that the natural selection of holin-independent lysins is divergent. Our study screened 58 (21.89%) out of 265 potential candidates for in vitro experimentation against MDR bacteria.}, } @article {pmid33347470, year = {2020}, author = {Viana, MVC and Profeta, R and da Silva, AL and Hurtado, R and Cerqueira, JC and Ribeiro, BFS and Almeida, MO and Morais-Rodrigues, F and Soares, SC and Oliveira, M and Tavares, L and Figueiredo, H and Wattam, AR and Barh, D and Ghosh, P and Silva, A and Azevedo, V}, title = {Taxonomic classification of strain PO100/5 shows a broader geographic distribution and genetic markers of the recently described Corynebacterium silvaticum.}, journal = {PloS one}, volume = {15}, number = {12}, pages = {e0244210}, doi = {10.1371/journal.pone.0244210}, pmid = {33347470}, issn = {1932-6203}, abstract = {The bacterial strain PO100/5 was isolated from a skin abscess taken from a pig (Sus scrofa domesticus) in the Alentejo region of southern Portugal. It was identified as Corynebacterium pseudotuberculosis using biochemical tests, multiplex PCR and Pulsed Field Gel Electrophoresis. After genome sequencing and rpoB phylogeny, the strain was classified as C. ulcerans. To better understand the taxonomy of this strain and improve identification methods, we compared strain PO100/5 to other publicly available genomes from C. diphtheriae group. Taxonomic analysis reclassified it and three others strains as the recently described C. silvaticum, which have been isolated from wild boar and roe deer in Germany and Austria. The results showed that PO100/5 is the first sequenced genome of a C. silvaticum strain from livestock and a different geographical region, has the unique sequence type ST709, and could be could produce the diphtheriae toxin, along with strain 05-13. Genomic analysis of PO100/5 showed four prophages, and eight conserved genomic islands in comparison to C. ulcerans. Pangenome analysis of 38 C. silvaticum and 76 C. ulcerans genomes suggested that C. silvaticum is a genetically homogeneous species, with 73.6% of its genes conserved and a pangenome near to be closed (α > 0.952). There are 172 genes that are unique to C. silvaticum in comparison to C. ulcerans. Most of these conserved genes are related to nutrient uptake and metabolism, prophages or immunity against them, and could be genetic markers for species identification. Strains PO100/5 (livestock) and KL0182T (wild boar) were predicted to be potential human pathogens. This information may be useful for identification and surveillance of this pathogen.}, } @article {pmid33344086, year = {2020}, author = {Hansen, MJ and Kudirkiene, E and Dalsgaard, I}, title = {Analysis of 44 Vibrio anguillarum genomes reveals high genetic diversity.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e10451}, pmid = {33344086}, issn = {2167-8359}, abstract = {Vibriosis, a hemorrhagic septicemic disease caused by the bacterium Vibrio anguillarum, is an important bacterial infection in Danish sea-reared rainbow trout. Despite of vaccination, outbreaks still occur, likely because the vaccine is based on V. anguillarum strains from abroad/other hosts than rainbow trout. Information about the genetic diversity of V. anguillarum specifically in Danish rainbow trout, is required to investigate this claim. Consequently, the aim of the present investigation was to sequence and to characterize a collection of 44 V. anguillarum strains obtained primarily from vibriosis outbreaks in Danish rainbow trout. The strains were sequenced, de novo assembled, and the genomes examined for the presence of plasmids, virulence, and acquired antibiotic resistance genes. To investigate the phylogeny, single nucleotide polymorphisms were identified, and the pan-genome was calculated. All strains carried tet(34) encoding tetracycline resistance, and 36 strains also contained qnrVC6 for increased fluoroquinolone/quinolone resistance. But interestingly, all strains were phenotypic sensitive to both oxytetracycline and oxolinic acid. Almost all serotype O1 strains contained a pJM1-like plasmid and nine serotype O2A strains carried the plasmid p15. The distribution of virulence genes was rather similar across the strains, although evident variance among serotypes was observed. Most significant, almost all serotype O2 and O3 strains, as well as the serotype O1 strain without a pJM1-like plasmid, carried genes encoding piscibactin biosynthesis. Hence supporting the hypothesis, that piscibactin plays a crucial role in virulence for pathogenic strains lacking the anguibactin system. The phylogenetic analysis and pan-genome calculations revealed great diversity within V. anguillarum. Serotype O1 strains were in general very similar, whereas considerable variation was found among serotype O2A strains. The great diversity within the V. anguillarum serotype O2A genomes is most likely the reason why vaccines provide good protection from some strains, but not from others. Hopefully, the new genomic data and knowledge provided in this study might help develop an optimized vaccine against V. anguillarum in the future to reduce the use of antibiotics, minimize economic losses and improve the welfare of the fish.}, } @article {pmid33343549, year = {2020}, author = {Ghaly, TM and Paulsen, IT and Sajjad, A and Tetu, SG and Gillings, MR}, title = {A Novel Family of Acinetobacter Mega-Plasmids Are Disseminating Multi-Drug Resistance Across the Globe While Acquiring Location-Specific Accessory Genes.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {605952}, pmid = {33343549}, issn = {1664-302X}, abstract = {Acinetobacter species are emerging as major nosocomial pathogens, aided by their ability to acquire resistance to all classes of antibiotics. A key factor leading to their multi-drug resistance phenotypes is the acquisition of a wide variety of mobile genetic elements, particularly large conjugative plasmids. Here, we characterize a family of 21 multi-drug resistance mega-plasmids in 11 different Acinetobacter species isolated from various locations across the globe. The plasmid family exhibits a highly dynamic and diverse accessory genome, including 221 antibiotic resistance genes (ARGs) that confer resistance to 13 classes of antibiotics. We show that plasmids isolated within the same geographic region are often evolutionarily divergent members of this family based on their core-genome, yet they exhibit a more similar accessory genome. Individual plasmids, therefore, can disseminate to different locations around the globe, where they then appear to acquire diverse sets of accessory genes from their local surroundings. Further, we show that plasmids from several geographic regions were enriched with location-specific functional traits. Together, our findings show that these mega-plasmids can transmit across species boundaries, have the capacity for global dissemination, can accumulate a diverse suite of location-specific accessory genes, and can confer multi-drug resistance phenotypes of significant concern for human health. We therefore highlight this previously undescribed plasmid family as a serious threat to healthcare systems worldwide. These findings also add to the growing concern that mega-plasmids are key disseminators of antibiotic resistance and require global surveillance.}, } @article {pmid33339499, year = {2020}, author = {Cai, Z and Guo, Q and Yao, Z and Zheng, W and Xie, J and Bai, S and Zhang, H}, title = {Comparative genomics of Klebsiella michiganensis BD177 and related members of Klebsiella sp. reveal the symbiotic relationship with Bactrocera dorsalis.}, journal = {BMC genetics}, volume = {21}, number = {Suppl 2}, pages = {138}, pmid = {33339499}, issn = {1471-2156}, abstract = {BACKGROUND: Bactrocera dorsalis is a destructive polyphagous and highly invasive insect pest of tropical and subtropical species of fruit and vegetable crops. The sterile insect technique (SIT) has been used for decades to control insect pests of agricultural, veterinary, and human health importance. Irradiation of pupae in SIT can reduce the ecological fitness of the sterile insects. Our previous study has shown that a gut bacterial strain BD177 that could restore ecological fitness by promoting host food intake and metabolic activities.

RESULTS: Using long-read sequence technologies, we assembled the complete genome of K. michiganensis BD177 strain. The complete genome of K. michiganensis BD177 comprises one circular chromosome and four plasmids with a GC content of 55.03%. The pan-genome analysis was performed on 119 genomes (strain BD177 genome and 118 out of 128 published Klebsiella sp. genomes since ten were discarded). The pan-genome includes a total of 49305 gene clusters, a small number of 858 core genes, and a high number of accessory (10566) genes. Pan-genome and average nucleotide identity (ANI) analysis showed that BD177 is more similar to the type strain K. michiganensis DSM2544, while away from the type strain K. oxytoca ATCC13182. Comparative genome analysis with 21 K. oxytoca and 12 K. michiganensis strains, identified 213 unique genes, several of them related to amino acid metabolism, metabolism of cofactors and vitamins, and xenobiotics biodegradation and metabolism in BD177 genome.

CONCLUSIONS: Phylogenomics analysis reclassified strain BD177 as a member of the species K. michiganensis. Comparative genome analysis suggested that K. michiganensis BD177 has the strain-specific ability to provide three essential amino acids (phenylalanine, tryptophan and methionine) and two vitamins B (folate and riboflavin) to B. dorsalis. The clear classification status of BD177 strain and identification of unique genetic characteristics may contribute to expanding our understanding of the symbiotic relationship of gut microbiota and B. dorsalis.}, } @article {pmid33339176, year = {2020}, author = {Ramsamy, Y and Mlisana, KP and Amoako, DG and Abia, ALK and Allam, M and Ismail, A and Singh, R and Essack, SY}, title = {Comparative Pathogenomics of Aeromonas veronii from Pigs in South Africa: Dominance of the Novel ST657 Clone.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33339176}, issn = {2076-2607}, support = {Grant no. 106063//National Research Foundation/ ; }, abstract = {The pathogenomics of carbapenem-resistant Aeromonas veronii (A. veronii) isolates recovered from pigs in KwaZulu-Natal, South Africa, was explored by whole genome sequencing on the Illumina MiSeq platform. Genomic functional annotation revealed a vast array of similar central networks (metabolic, cellular, and biochemical). The pan-genome analysis showed that the isolates formed a total of 4349 orthologous gene clusters, 4296 of which were shared; no unique clusters were observed. All the isolates had similar resistance phenotypes, which corroborated their chromosomally mediated resistome (blaCPHA 3 and blaOXA- 12) and belonged to a novel sequence type, ST657 (a satellite clone). Isolates in the same sub-clades clustered according to their clonal lineages and host. Mobilome analysis revealed the presence of chromosome-borne insertion sequence families. The estimated pathogenicity score (Pscore ≈ 0.60) indicated their potential pathogenicity in humans. Furthermore, these isolates carried several virulence factors (adherence factors, toxins, and immune evasion), in different permutations and combinations, indicating a differential ability to establish infection. Phylogenomic and metadata analyses revealed a predilection for water environments and aquatic animals, with more recent reports in humans and food animals across geographies, making A. veronii a potential One Health indicator bacterium.}, } @article {pmid33329454, year = {2020}, author = {Park, S and Steinegger, M and Cho, HS and Chun, J}, title = {Metagenomic Association Analysis of Gut Symbiont Limosilactobacillus reuteri Without Host-Specific Genome Isolation.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {585622}, pmid = {33329454}, issn = {1664-302X}, abstract = {Limosilactobacillus reuteri is a model symbiont that colonizes the guts of vertebrates in studies on host adaptation of the gut symbiont. Previous studies have investigated host-specific phylogenetic and functional properties by isolating the genomic sequence. This dependency on genome isolation is a significant bottleneck. Here, we propose a method to study the association between L. reuteri and its hosts directly from metagenomic reads without strain isolation using pan-genomes. We characterized the host-specificity of L. reuteri in metagenomic samples, not only in previously studied organisms (mice and pigs) but also in dogs. For each sample, two types of profiles were generated: (1) genome-based strain type abundance profiles and (2) gene composition profiles. Our profiles showed host-association of L. reuteri in both phylogenetic and functional aspects without depending on host-specific genome isolation. We observed not only the presence of host-specific lineages, but also the dominant lineages associated with the different hosts. Furthermore, we showed that metagenome-assembled genomes provide detailed insights into the host-specificity of L. reuteri. We inferred evolutionary trajectories of host-associative L. reuteri strains in the metagenomic samples by placing the metagenome-assembled genomes into a phylogenetic tree and identified novel host-specific genes that were unannotated in existing pan-genome databases. Our pan-genomic approach reduces the need for time-consuming and expensive host-specific genome isolation, while producing consistent results with previous host-association findings in mice and pigs. Additionally, we predicted associations that have not yet been studied in dogs.}, } @article {pmid33310406, year = {2020}, author = {Wolter, LA and Wietz, M and Ziesche, L and Breider, S and Leinberger, J and Poehlein, A and Daniel, R and Schulz, S and Brinkhoff, T}, title = {Pseudooceanicola algae sp. nov., isolated from the marine macroalga Fucus spiralis, shows genomic and physiological adaptations for an algae-associated lifestyle.}, journal = {Systematic and applied microbiology}, volume = {44}, number = {1}, pages = {126166}, doi = {10.1016/j.syapm.2020.126166}, pmid = {33310406}, issn = {1618-0984}, abstract = {The genus Pseudooceanicola from the alphaproteobacterial Roseobacter group currently includes ten validated species. We herein describe strain Lw-13eT, the first Pseudooceanicola species from marine macroalgae, isolated from the brown alga Fucus spiralis abundant at European and North American coasts. Physiological and pangenome analyses of Lw-13eT showed corresponding adaptive features. Adaptations to the tidal environment include a broad salinity tolerance, degradation of macroalgae-derived substrates (mannitol, mannose, proline), and resistance to several antibiotics and heavy metals. Notably, Lw-13eT can degrade oligomeric alginate via PL15 alginate lyase encoded in a polysaccharide utilization locus (PUL), rarely described for roseobacters to date. Plasmid localization of the PUL strengthens the importance of mobile genetic elements for evolutionary adaptations within the Roseobacter group. PL15 homologs were primarily detected in marine plant-associated metagenomes from coastal environments but not in the open ocean, corroborating its adaptive role in algae-rich habitats. Exceptional is the tolerance of Lw-13eT against the broad-spectrum antibiotic tropodithietic acid, produced by Phaeobacter spp. co-occurring in coastal habitats. Furthermore, Lw-13eT exhibits features resembling terrestrial plant-bacteria associations, i.e. biosynthesis of siderophores, terpenes and volatiles, which may contribute to mutual bacteria-algae interactions. Closest described relative of Lw-13eT is Pseudopuniceibacterium sediminis CY03T with 98.4% 16S rRNA gene sequence similarity. However, protein sequence-based core genome phylogeny and average nucleotide identity indicate affiliation of Lw-13eT with the genus Pseudooceanicola. Based on phylogenetic, physiological and (chemo)taxonomic distinctions, we propose strain Lw-13eT (=DSM 29013T=LMG 30557T) as a novel species with the name Pseudooceanicola algae.}, } @article {pmid33304460, year = {2020}, author = {Jiao, J and Tian, CF}, title = {Ancestral zinc-finger bearing protein MucR in alpha-proteobacteria: A novel xenogeneic silencer?.}, journal = {Computational and structural biotechnology journal}, volume = {18}, number = {}, pages = {3623-3631}, pmid = {33304460}, issn = {2001-0370}, abstract = {The MucR/Ros family protein is conserved in alpha-proteobacteria and characterized by its zinc-finger motif that has been proposed as the ancestral domain from which the eukaryotic C2H2 zinc-finger structure evolved. In the past decades, accumulated evidences have revealed MucR as a pleiotropic transcriptional regulator that integrating multiple functions such as virulence, symbiosis, cell cycle and various physiological processes. Scattered reports indicate that MucR mainly acts as a repressor, through oligomerization and binding to multiple sites of AT-rich target promoters. The N-terminal region and zinc-finger bearing C-terminal region of MucR mediate oligomerization and DNA-binding, respectively. These features are convergent to those of xenogeneic silencers such as H-NS, MvaT, Lsr2 and Rok, which are mainly found in other lineages. Phylogenetic analysis of MucR homologs suggests an ancestral origin of MucR in alpha- and delta-proteobacteria. Multiple independent duplication and lateral gene transfer events contribute to the diversity and phyletic distribution of MucR. Finally, we posed questions which remain unexplored regarding the putative roles of MucR as a xenogeneic silencer and a general manager in balancing adaptation and regulatory integration in the pangenome context.}, } @article {pmid33303031, year = {2020}, author = {Zhou, G and Liang, H and Gu, Y and Ju, C and He, L and Guo, P and Shao, Z and Zhang, J and Zhang, M}, title = {Comparative genomics of Helicobacter pullorum from different countries.}, journal = {Gut pathogens}, volume = {12}, number = {1}, pages = {56}, pmid = {33303031}, issn = {1757-4749}, support = {SZSM201803081//the Sanming Project of Medicine in Shenzhen/ ; 2018ZX10712-001//the National Key Program of China/ ; }, abstract = {BACKGROUND: Helicobacter pullorum commonly colonized in the gastrointestinal tract of poultry and caused gastroenteritis. This bacterium could be transmitted to humans through contaminated food and caused colitis and hepatitis. Currently, the genetic characteristics of the H. pullorum were not recognized enough. In this study, the genomes of 23 H. pullorum strains from different counties were comparatively analyzed. Among them, H. pullorum 2013BJHL was the first isolated and reported in China.

RESULTS: The genomes of the studied strains were estimated to vary from 1.55 to 2.03 Mb, with a GC content of ~ 34%. 4064 pan genes and 1267 core genes were obtained from the core-pan genome analysis using the Roary pipeline. Core genome SNPs (cg-SNPs) were obtained using Snippy4 software. Two groups were identified with the phylogenetic analysis based on the cg-SNPs. Some adhesion-related, immune regulation, motility-related, antiphagocytosis-related, toxin-related and quorum sensing related genes were identified as virulence factors. APH(3')-IIIa, APH(2'')-If, and AAC(6')-Ie-APH(2'')-Ia were identified as antibiotic resistance genes among the H. pullorum genomes. cat, SAT-4 and tetO genes were only identified in 2013BJHL, and tet(C) was identified in MIT98-5489. MIC determination revealed that the 2013BJHL showed acquired resistance to ciprofloxacin, nalidixic acid, tetracycline, gentamicin, streptomycin and erythromycin, only sensitive to ampicillin. The antibiotic resistance genetic determinants on the 2013BJHL genome correlate well with observed antimicrobial susceptibility patterns. Two types of VI secretion system (T6SS) were identified in 52.2% (12/23) the studied strains.

CONCLUSION: In this study, we obtained the genetic characteristics of H. pullorum from different sources in the world. The comprehensive genetic characteristics of H. pullorum were first described. H. pullorum showed highly genetic diversity and two sub-types of T6SSs were first identified in H. pullorum. 2013BJHL was found to be multidrug resistant as it was resistant to at least three different antibiotic classes.}, } @article {pmid33302542, year = {2020}, author = {Webster, J and Bogema, D and Chapman, TA}, title = {Comparative Genomics of Xanthomonas citri pv. citri A* Pathotype Reveals Three Distinct Clades with Varying Plasmid Distribution.}, journal = {Microorganisms}, volume = {8}, number = {12}, pages = {}, pmid = {33302542}, issn = {2076-2607}, support = {PBCRC2002, PBCRC2156//Plant Biosecurity CRC/ ; }, abstract = {Citrus bacterial canker (CBC) is an important disease of citrus cultivars worldwide that causes blister-like lesions on host plants and leads to more severe symptoms such as plant defoliation and premature fruit drop. The causative agent, Xanthomonas citri pv. citri, exists as three pathotypes-A, A*, and Aw-which differ in their host range and elicited host response. To date, comparative analyses have been hampered by the lack of closed genomes for the A* pathotype. In this study, we sequenced and assembled six CBC isolates of pathotype A* using second- and third-generation sequencing technologies to produce complete, closed assemblies. Analysis of these genomes and reference A, A*, and Aw sequences revealed genetic groups within the A* pathotype. Investigation of accessory genomes revealed virulence factors, including type IV secretion systems and heavy metal resistance genes, differentiating the genetic groups. Genomic comparisons of closed genome assemblies also provided plasmid distribution information for the three genetic groups of A*. The genomes presented here complement existing closed genomes of A and Aw pathotypes that are publicly available and open opportunities to investigate the evolution of X. citri pv. citri and the virulence factors that contribute to this serious pathogen.}, } @article {pmid33301093, year = {2021}, author = {Danilevicz, MF and Tay Fernandez, CG and Marsh, JI and Bayer, PE and Edwards, D}, title = {High-Throughput Genotyping Technologies in Plant Taxonomy.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2222}, number = {}, pages = {149-166}, pmid = {33301093}, issn = {1940-6029}, abstract = {Molecular markers provide researchers with a powerful tool for variation analysis between plant genomes. They are heritable and widely distributed across the genome and for this reason have many applications in plant taxonomy and genotyping. Over the last decade, molecular marker technology has developed rapidly and is now a crucial component for genetic linkage analysis, trait mapping, diversity analysis, and association studies. This chapter focuses on molecular marker discovery, its application, and future perspectives for plant genotyping through pangenome assemblies. Included are descriptions of automated methods for genome and sequence distance estimation, genome contaminant analysis in sequence reads, genome structural variation, and SNP discovery methods.}, } @article {pmid33295861, year = {2020}, author = {Choo, SW and Rishik, S and Wee, WY}, title = {Comparative genome analyses of Mycobacteroides immunogenum reveals two potential novel subspecies.}, journal = {Microbial genomics}, volume = {6}, number = {12}, pages = {}, doi = {10.1099/mgen.0.000495}, pmid = {33295861}, issn = {2057-5858}, abstract = {Mycobacteroides immunogenum is an emerging opportunistic pathogen implicated in nosocomial infections. Comparative genome analyses may provide better insights into its genomic structure, functions and evolution. The present analysis showed that M. immunogenum has an open pan-genome. Approximately 36.8% of putative virulence genes were identified in the accessory regions of M. immunogenum. Phylogenetic analyses revealed two potential novel subspecies of M. immunogenum, supported by evidence from ANIb (average nucleotide identity using blast) and GGDC (Genome to Genome Distance Calculator) analyses. We identified 74 genomic islands (GIs) in Subspecies 1 and 23 GIs in Subspecies 2. All Subspecies 2-harboured GIs were not found in Subspecies 1, indicating that they might have been acquired by Subspecies 2 after their divergence. Subspecies 2 has more defence genes than Subspecies 1, suggesting that it might be more resistant to the insertion of foreign DNA and probably explaining why Subspecies 2 has fewer GIs. Positive selection analysis suggest that M. immunogenum has a lower selection pressure compared to non-pathogenic mycobacteria. Thirteen genes were positively selected and many were involved in virulence.}, } @article {pmid33283866, year = {2020}, author = {Valero-Jiménez, CA and Steentjes, MBF and Slot, JC and Shi-Kunne, X and Scholten, OE and van Kan, JAL}, title = {Dynamics in Secondary Metabolite Gene Clusters in Otherwise Highly Syntenic and Stable Genomes in the Fungal Genus Botrytis.}, journal = {Genome biology and evolution}, volume = {12}, number = {12}, pages = {2491-2507}, pmid = {33283866}, issn = {1759-6653}, abstract = {Fungi of the genus Botrytis infect >1,400 plant species and cause losses in many crops. Besides the broad host range pathogen Botrytis cinerea, most other species are restricted to a single host. Long-read technology was used to sequence genomes of eight Botrytis species, mostly pathogenic on Allium species, and the related onion white rot fungus, Sclerotium cepivorum. Most assemblies contained <100 contigs, with the Botrytis aclada genome assembled in 16 gapless chromosomes. The core genome and pan-genome of 16 Botrytis species were defined and the secretome, effector, and secondary metabolite repertoires analyzed. Among those genes, none is shared among all Allium pathogens and absent from non-Allium pathogens. The genome of each of the Allium pathogens contains 8-39 predicted effector genes that are unique for that single species, none stood out as potential determinant for host specificity. Chromosome configurations of common ancestors of the genus Botrytis and family Sclerotiniaceae were reconstructed. The genomes of B. cinerea and B. aclada were highly syntenic with only 19 rearrangements between them. Genomes of Allium pathogens were compared with ten other Botrytis species (nonpathogenic on Allium) and with 25 Leotiomycetes for their repertoire of secondary metabolite gene clusters. The pattern was complex, with several clusters displaying patchy distribution. Two clusters involved in the synthesis of phytotoxic metabolites are at distinct genomic locations in different Botrytis species. We provide evidence that the clusters for botcinic acid production in B. cinerea and Botrytis sinoallii were acquired by horizontal transfer from taxa within the same genus.}, } @article {pmid33283865, year = {2020}, author = {Fagorzi, C and Ilie, A and Decorosi, F and Cangioli, L and Viti, C and Mengoni, A and diCenzo, GC}, title = {Symbiotic and Nonsymbiotic Members of the Genus Ensifer (syn. Sinorhizobium) Are Separated into Two Clades Based on Comparative Genomics and High-Throughput Phenotyping.}, journal = {Genome biology and evolution}, volume = {12}, number = {12}, pages = {2521-2534}, pmid = {33283865}, issn = {1759-6653}, abstract = {Rhizobium-legume symbioses serve as paradigmatic examples for the study of mutualism evolution. The genus Ensifer (syn. Sinorhizobium) contains diverse plant-associated bacteria, a subset of which can fix nitrogen in symbiosis with legumes. To gain insights into the evolution of symbiotic nitrogen fixation (SNF), and interkingdom mutualisms more generally, we performed extensive phenotypic, genomic, and phylogenetic analyses of the genus Ensifer. The data suggest that SNF likely emerged several times within the genus Ensifer through independent horizontal gene transfer events. Yet, the majority (105 of 106) of the Ensifer strains with the nodABC and nifHDK nodulation and nitrogen fixation genes were found within a single, monophyletic clade. Comparative genomics highlighted several differences between the "symbiotic" and "nonsymbiotic" clades, including divergences in their pangenome content. Additionally, strains of the symbiotic clade carried 325 fewer genes, on average, and appeared to have fewer rRNA operons than strains of the nonsymbiotic clade. Initial characterization of a subset of ten Ensifer strains identified several putative phenotypic differences between the clades. Tested strains of the nonsymbiotic clade could catabolize 25% more carbon sources, on average, than strains of the symbiotic clade, and they were better able to grow in LB medium and tolerate alkaline conditions. On the other hand, the tested strains of the symbiotic clade were better able to tolerate heat stress and acidic conditions. We suggest that these data support the division of the genus Ensifer into two main subgroups, as well as the hypothesis that pre-existing genetic features are required to facilitate the evolution of SNF in bacteria.}, } @article {pmid33273480, year = {2020}, author = {Gaba, S and Kumari, A and Medema, M and Kaushik, R}, title = {Pan-genome analysis and ancestral state reconstruction of class halobacteria: probability of a new super-order.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {21205}, pmid = {33273480}, issn = {2045-2322}, abstract = {Halobacteria, a class of Euryarchaeota are extremely halophilic archaea that can adapt to a wide range of salt concentration generally from 10% NaCl to saturated salt concentration of 32% NaCl. It consists of the orders: Halobacteriales, Haloferaciales and Natriabales. Pan-genome analysis of class Halobacteria was done to explore the core (300) and variable components (Softcore: 998, Cloud:36531, Shell:11784). The core component revealed genes of replication, transcription, translation and repair, whereas the variable component had a major portion of environmental information processing. The pan-gene matrix was mapped onto the core-gene tree to find the ancestral (44.8%) and derived genes (55.1%) of the Last Common Ancestor of Halobacteria. A High percentage of derived genes along with presence of transformation and conjugation genes indicate the occurrence of horizontal gene transfer during the evolution of Halobacteria. A Core and pan-gene tree were also constructed to infer a phylogeny which implicated on the new super-order comprising of Natrialbales and Halobacteriales.}, } @article {pmid33271875, year = {2020}, author = {Chen, Z and Erickson, DL and Meng, J}, title = {Benchmarking Long-Read Assemblers for Genomic Analyses of Bacterial Pathogens Using Oxford Nanopore Sequencing.}, journal = {International journal of molecular sciences}, volume = {21}, number = {23}, pages = {}, pmid = {33271875}, issn = {1422-0067}, support = {U01FD001418//U.S. Food and Drug Administration/ ; }, abstract = {Oxford Nanopore sequencing can be used to achieve complete bacterial genomes. However, the error rates of Oxford Nanopore long reads are greater compared to Illumina short reads. Long-read assemblers using a variety of assembly algorithms have been developed to overcome this deficiency, which have not been benchmarked for genomic analyses of bacterial pathogens using Oxford Nanopore long reads. In this study, long-read assemblers, namely Canu, Flye, Miniasm/Racon, Raven, Redbean, and Shasta, were thus benchmarked using Oxford Nanopore long reads of bacterial pathogens. Ten species were tested for mediocre- and low-quality simulated reads, and 10 species were tested for real reads. Raven was the most robust assembler, obtaining complete and accurate genomes. All Miniasm/Racon and Raven assemblies of mediocre-quality reads provided accurate antimicrobial resistance (AMR) profiles, while the Raven assembly of Klebsiella variicola with low-quality reads was the only assembly with an accurate AMR profile among all assemblers and species. All assemblers functioned well for predicting virulence genes using mediocre-quality and real reads, whereas only the Raven assemblies of low-quality reads had accurate numbers of virulence genes. Regarding multilocus sequence typing (MLST), Miniasm/Racon was the most effective assembler for mediocre-quality reads, while only the Raven assemblies of Escherichia coli O157:H7 and K. variicola with low-quality reads showed positive MLST results. Miniasm/Racon and Raven were the best performers for MLST using real reads. The Miniasm/Racon and Raven assemblies showed accurate phylogenetic inference. For the pan-genome analyses, Raven was the strongest assembler for simulated reads, whereas Miniasm/Racon and Raven performed the best for real reads. Overall, the most robust and accurate assembler was Raven, closely followed by Miniasm/Racon.}, } @article {pmid33264401, year = {2020}, author = {Brown, AV and Conners, SI and Huang, W and Wilkey, AP and Grant, D and Weeks, NT and Cannon, SB and Graham, MA and Nelson, RT}, title = {A new decade and new data at SoyBase, the USDA-ARS soybean genetics and genomics database.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkaa1107}, pmid = {33264401}, issn = {1362-4962}, abstract = {SoyBase, a USDA genetic and genomics database, holds professionally curated soybean genetic and genomic data, which is integrated and made accessible to researchers and breeders. The site holds several reference genome assemblies, as well as genetic maps, thousands of mapped traits, expression and epigenetic data, pedigree information, and extensive variant and genotyping data sets. SoyBase displays include genetic, genomic, and epigenetic maps of the soybean genome. Gene expression data is presented in the genome viewer as heat maps and pictorial and tabular displays in gene report pages. Millions of sequence variants have been added, representing variations across various collections of cultivars. This variant data is explorable using new interactive tools to visualize the distribution of those variants across the genome, between selected accessions. SoyBase holds several reference-quality soybean genome assemblies, accessible via various query tools and browsers, including a new visualization system for exploring the soybean pan-genome. SoyBase also serves as a nexus of announcements pertinent to the greater soybean research community. The database also includes a soybean-specific anatomic and biochemical trait ontology. The database can be accessed at https://soybase.org.}, } @article {pmid33255840, year = {2020}, author = {Zhang, Y and Thomas, W and Bayer, PE and Edwards, D and Batley, J}, title = {Frontiers in Dissecting and Managing Brassica Diseases: From Reference-Based RGA Candidate Identification to Building Pan-RGAomes.}, journal = {International journal of molecular sciences}, volume = {21}, number = {23}, pages = {}, pmid = {33255840}, issn = {1422-0067}, support = {FT130100604//Australian Research Council/ ; DP160104497//Australian Research Council/ ; }, abstract = {The Brassica genus contains abundant economically important vegetable and oilseed crops, which are under threat of diseases caused by fungal, bacterial and viral pathogens. Resistance gene analogues (RGAs) are associated with quantitative and qualitative disease resistance and the identification of candidate RGAs associated with disease resistance is crucial for understanding the mechanism and management of diseases through breeding. The availability of Brassica genome assemblies has greatly facilitated reference-based quantitative trait loci (QTL) mapping for disease resistance. In addition, pangenomes, which characterise both core and variable genes, have been constructed for B. rapa, B. oleracea and B. napus. Genome-wide characterisation of RGAs using conserved domains and motifs in reference genomes and pangenomes reveals their clustered arrangements and presence of structural variations. Here, we comprehensively review RGA identification in important Brassica genome and pangenome assemblies. Comparison of the RGAs in QTL between resistant and susceptible individuals allows for efficient identification of candidate disease resistance genes. However, the reference-based QTL mapping and RGA candidate identification approach is restricted by the under-represented RGA diversity characterised in the limited number of Brassica assemblies. The species-wide repertoire of RGAs make up the pan-resistance gene analogue genome (pan-RGAome). Building a pan-RGAome, through either whole genome resequencing or resistance gene enrichment sequencing, would effectively capture RGA diversity, greatly expanding breeding resources that can be utilised for crop improvement.}, } @article {pmid33253207, year = {2020}, author = {Yahara, H and Hiraki, A and Maruoka, Y and Hirabayashi, A and Suzuki, M and Yahara, K}, title = {Shotgun metagenome sequencing identification of a set of genes encoded by Actinomyces associated with medication-related osteonecrosis of the jaw.}, journal = {PloS one}, volume = {15}, number = {11}, pages = {e0241676}, pmid = {33253207}, issn = {1932-6203}, abstract = {Medication-related osteonecrosis of the jaw (MRONJ) is intractable and severely affects a patient's quality of life. Although many cases of MRONJ have been reported in the past decade, the disease pathophysiology is unclear and there are no evidence-based therapeutic strategies. MRONJ usually features bone inflammation and infection. Prior studies that explored the association between MRONJ and microbial infection used the culture-based approach, which is not applicable to hundreds of unculturable taxa in the human oral microbiome, or 16S ribosomal RNA gene sequencing, which does not provide quantitative information of the abundance of specific taxa, and information of the presence, abundance, and function of specific genes in the microbiome. Here, deep shotgun metagenome sequencing (>10 Gb per sample) of bulk DNA extracted from saliva of MRONJ patients and healthy controls was performed to overcome these limitations. Comparative quantitative analyses of taxonomic and functional composition of these deep metagenomes (initially of 5 patients and 5 healthy controls) revealed an average 10.1% increase of genus Actinomyces and a 33.2% decrease in genus Streptococcus normally predominant in the human oral microbiota. Pan-genome analysis identified genes present exclusively in the MRONJ samples. Further analysis of the reads mapping to the genes in the extended dataset comprising five additional MRONJ samples and publicly available dataset of nine healthy controls resulted in the identification of 31 genes significantly associated with MRONJ. All these genes were encoded by Actinomyces genomic regions. Of these, the top two abundant genes were almost exclusively encoded by Actinomyces among usual taxa in the human oral microbiota. The potential relationships of these key genes with the disease are discussed at molecular level based on the literature. Although the sample size was small, this study will aid future studies to verify the data and characterize these genes in vitro and in vivo to understand the disease mechanisms, develop molecular targeted drugs, and for early stage screening and prognosis prediction.}, } @article {pmid33245329, year = {2020}, author = {Hammond, JA and Gordon, EA and Socarras, KM and Chang Mell, J and Ehrlich, GD}, title = {Beyond the pan-genome: current perspectives on the functional and practical outcomes of the distributed genome hypothesis.}, journal = {Biochemical Society transactions}, volume = {}, number = {}, pages = {}, doi = {10.1042/BST20190713}, pmid = {33245329}, issn = {1470-8752}, abstract = {The principle of monoclonality with regard to bacterial infections was considered immutable prior to 30 years ago. This view, espoused by Koch for acute infections, has proven inadequate regarding chronic infections as persistence requires multiple forms of heterogeneity among the bacterial population. This understanding of bacterial plurality emerged from a synthesis of what-were-then novel technologies in molecular biology and imaging science. These technologies demonstrated that bacteria have complex life cycles, polymicrobial ecologies, and evolve in situ via the horizontal exchange of genic characters. Thus, there is an ongoing generation of diversity during infection that results in far more highly complex microbial communities than previously envisioned. This perspective is based on the fundamental tenet that the bacteria within an infecting population display genotypic diversity, including gene possession differences, which result from horizontal gene transfer mechanisms including transformation, conjugation, and transduction. This understanding is embodied in the concepts of the supragenome/pan-genome and the distributed genome hypothesis (DGH). These paradigms have fostered multiple researches in diverse areas of bacterial ecology including host-bacterial interactions covering the gamut of symbiotic relationships including mutualism, commensalism, and parasitism. With regard to the human host, within each of these symbiotic relationships all bacterial species possess attributes that contribute to colonization and persistence; those species/strains that are pathogenic also encode traits for invasion and metastases. Herein we provide an update on our understanding of bacterial plurality and discuss potential applications in diagnostics, therapeutics, and vaccinology based on perspectives provided by the DGH with regard to the evolution of pathogenicity.}, } @article {pmid33240617, year = {2020}, author = {Hudson, LK and Constantine-Renna, L and Thomas, L and Moore, C and Qian, X and Garman, K and Dunn, JR and Denes, TG}, title = {Genomic characterization and phylogenetic analysis of Salmonella enterica serovar Javiana.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e10256}, pmid = {33240617}, issn = {2167-8359}, abstract = {Salmonella enterica serovar Javiana is the fourth most reported serovar of laboratory-confirmed human Salmonella infections in the U.S. and in Tennessee (TN). Although Salmonella ser. Javiana is a common cause of human infection, the majority of cases are sporadic in nature rather than outbreak-associated. To better understand Salmonella ser. Javiana microbial population structure in TN, we completed a phylogenetic analysis of 111 Salmonella ser. Javiana clinical isolates from TN collected from Jan. 2017 to Oct. 2018. We identified mobile genetic elements and genes known to confer antibiotic resistance present in the isolates, and performed a pan-genome-wide association study (pan-GWAS) to compare gene content between clades identified in this study. The population structure of TN Salmonella ser. Javiana clinical isolates consisted of three genetic clades: TN clade I (n = 54), TN clade II (n = 4), and TN clade III (n = 48). Using a 5, 10, and 25 hqSNP distance threshold for cluster identification, nine, 12, and 10 potential epidemiologically-relevant clusters were identified, respectively. The majority of genes that were found to be over-represented in specific clades were located in mobile genetic element (MGE) regions, including genes encoding integrases and phage structures (91.5%). Additionally, a large portion of the over-represented genes from TN clade II (44.9%) were located on an 87.5 kb plasmid containing genes encoding a toxin/antitoxin system (ccdAB). Additionally, we completed phylogenetic analyses of global Salmonella ser. Javiana datasets to gain a broader insight into the population structure of this serovar. We found that the global phylogeny consisted of three major clades (one of which all of the TN isolates belonged to) and two cgMLST eBurstGroups (ceBGs) and that the branch length between the two Salmonella ser. Javiana ceBGs (1,423 allelic differences) was comparable to those from other serovars that have been reported as polyphyletic (929-2,850 allelic differences). This study demonstrates the population structure of TN and global Salmonella ser. Javiana isolates, a clinically important Salmonella serovar and can provide guidance for phylogenetic cluster analyses for public health surveillance and response.}, } @article {pmid33240320, year = {2020}, author = {Su, F and Tian, R and Yang, Y and Li, H and Sun, G and Li, Y and Han, B and Xu, X and Chen, X and Zhao, G and Cui, H and Xu, H}, title = {Comparative Genome Analysis Reveals the Molecular Basis of Niche Adaptation of Staphylococcus epidermidis Strains.}, journal = {Frontiers in genetics}, volume = {11}, number = {}, pages = {566080}, pmid = {33240320}, issn = {1664-8021}, abstract = {Staphylococcus epidermidis is one of the most commonly isolated species from human skin and the second leading cause of bloodstream infections. Here, we performed a large-scale comparative study without any pre-assigned reference to identify genomic determinants associated with the diversity and adaptation of S. epidermidis strains to various environments. Pan-genome of S. epidermidis was open with 435 core proteins and had a pan-genome size of 8,034 proteins. Genome-wide phylogenetic tree showed high heterogeneity and suggested that routine whole genome sequencing was a powerful tool for analyzing the complex evolution of S. epidermidis and for investigating the infection sources. Comparative genome analyses demonstrated a range of antimicrobial resistance (AMR) genes, especially those within mobile genetic elements. The complicated host-bacterium and bacterium-bacterium relationships help S. epidermidis to play a vital role in balancing the epithelial microflora. The highly variable and dynamic nature of the S. epidermidis genome may contribute to its success in adapting to broad habitats. Genes related to biofilm formation and cell toxicity were significantly enriched in the blood and skin, demonstrating their potentials in identifying risk genotypes. This study gave a general landscape of S. epidermidis pan-genome and provided valuable insights into mechanisms for genome evolution and lifestyle adaptation of this ecologically flexible species.}, } @article {pmid33239781, year = {2020}, author = {Jayakodi, M and Padmarasu, S and Haberer, G and Bonthala, VS and Gundlach, H and Monat, C and Lux, T and Kamal, N and Lang, D and Himmelbach, A and Ens, J and Zhang, XQ and Angessa, TT and Zhou, G and Tan, C and Hill, C and Wang, P and Schreiber, M and Boston, LB and Plott, C and Jenkins, J and Guo, Y and Fiebig, A and Budak, H and Xu, D and Zhang, J and Wang, C and Grimwood, J and Schmutz, J and Guo, G and Zhang, G and Mochida, K and Hirayama, T and Sato, K and Chalmers, KJ and Langridge, P and Waugh, R and Pozniak, CJ and Scholz, U and Mayer, KFX and Spannagl, M and Li, C and Mascher, M and Stein, N}, title = {The barley pan-genome reveals the hidden legacy of mutation breeding.}, journal = {Nature}, volume = {588}, number = {7837}, pages = {284-289}, pmid = {33239781}, issn = {1476-4687}, abstract = {Genetic diversity is key to crop improvement. Owing to pervasive genomic structural variation, a single reference genome assembly cannot capture the full complement of sequence diversity of a crop species (known as the 'pan-genome'1). Multiple high-quality sequence assemblies are an indispensable component of a pan-genome infrastructure. Barley (Hordeum vulgare L.) is an important cereal crop with a long history of cultivation that is adapted to a wide range of agro-climatic conditions2. Here we report the construction of chromosome-scale sequence assemblies for the genotypes of 20 varieties of barley-comprising landraces, cultivars and a wild barley-that were selected as representatives of global barley diversity. We catalogued genomic presence/absence variants and explored the use of structural variants for quantitative genetic analysis through whole-genome shotgun sequencing of 300 gene bank accessions. We discovered abundant large inversion polymorphisms and analysed in detail two inversions that are frequently found in current elite barley germplasm; one is probably the product of mutation breeding and the other is tightly linked to a locus that is involved in the expansion of geographical range. This first-generation barley pan-genome makes previously hidden genetic variation accessible to genetic studies and breeding.}, } @article {pmid33219399, year = {2020}, author = {Khan, S and Vancuren, SJ and Hill, JE}, title = {A Generalist Lifestyle Allows Rare Gardnerella spp. to Persist at Low Levels in the Vaginal Microbiome.}, journal = {Microbial ecology}, volume = {}, number = {}, pages = {}, pmid = {33219399}, issn = {1432-184X}, abstract = {Gardnerella spp. are considered a hallmark of bacterial vaginosis, a dysbiosis of the vaginal microbiome. There are four cpn60 sequence-based subgroups within the genus (A, B, C and D), and thirteen genome species have been defined recently. Gardnerella spp. co-occur in the vaginal microbiome with varying abundance, and these patterns are shaped by a resource-dependent, exploitative competition, which affects the growth rate of subgroups A, B and C negatively. The growth rate of rarely abundant subgroup D, however, increases with the increasing number of competitors, negatively affecting the growth rate of others. We hypothesized that a nutritional generalist lifestyle and minimal niche overlap with the other more abundant Gardnerella spp. facilitate the maintenance of subgroup D in the vaginal microbiome through negative frequency-dependent selection. Using 40 whole-genome sequences from isolates representing all four subgroups, we found that they could be distinguished based on the content of their predicted proteomes. Proteins associated with carbohydrate and amino acid uptake and metabolism were significant contributors to the separation of subgroups. Subgroup D isolates had significantly more of their proteins assigned to amino acid metabolism than the other subgroups. Subgroup D isolates were also significantly different from others in terms of number and type of carbon sources utilized in a phenotypic assay, while the other three could not be distinguished. Overall, the results suggest that a generalist lifestyle and lack of niche overlap with other Gardnerella spp. leads to subgroup D being favoured by negative frequency-dependent selection in the vaginal microbiome.}, } @article {pmid33217199, year = {2020}, author = {Tahir Ul Qamar, M and Zhu, X and Khan, MS and Xing, F and Chen, LL}, title = {Pan-genome: A promising resource for noncoding RNA discovery in plants.}, journal = {The plant genome}, volume = {13}, number = {3}, pages = {e20046}, doi = {10.1002/tpg2.20046}, pmid = {33217199}, issn = {1940-3372}, support = {31571351//National Natural Science Foundation of China/ ; 31871269//National Natural Science Foundation of China/ ; 2019CFA014//Natural Science Foundation of Hubei Province/ ; 2018YFD1000101//National Key Research and Development Program of China/ ; Postdoctoral Project//Guangxi University/ ; starting research grant for High-level Talents//Guangxi University/ ; }, abstract = {Plant genomes contain both protein-coding and noncoding sequences including transposable elements (TEs) and noncoding RNAs (ncRNAs). The ncRNAs are recognized as important elements that play fundamental roles in the structural organization and function of plant genomes. Despite various hypotheses, TEs are believed to be a major precursor of ncRNAs. Transposable elements are also prime factors that cause genomic variation among members of a species. Hence, TEs pose a major challenge in the discovery and analysis of ncRNAs. With the increase in the number of sequenced plant genomes, it is now accepted that a single reference genome is insufficient to represent the complete genomic diversity and contents of a species, and exploring the pan-genome of a species is critical. In this review, we summarize the recent progress in the field of plant pan-genomes. We also discuss TEs and their roles in ncRNA biogenesis and present our perspectives on the application of pan-genomes for the discovery of ncRNAs to fully explore and exploit their biological roles in plants.}, } @article {pmid33213029, year = {2020}, author = {Dar, HA and Zaheer, T and Ullah, N and Bakhtiar, SM and Zhang, T and Yasir, M and Azhar, EI and Ali, A}, title = {Pangenome Analysis of Mycobacterium tuberculosis Reveals Core-Drug Targets and Screening of Promising Lead Compounds for Drug Discovery.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {9}, number = {11}, pages = {}, pmid = {33213029}, issn = {2079-6382}, support = {FP-1-42//THE DEANSHIP OF SCIENTIFIC RESEARCH (DSR) AT KING ABDULAZIZ UNIVERSITY, JEDDAH, SAUDI ARABIA/ ; }, abstract = {Tuberculosis, caused by Mycobacterium tuberculosis (M. tuberculosis), is one of the leading causes of human deaths globally according to the WHO TB 2019 report. The continuous rise in multi- and extensive-drug resistance in M. tuberculosis broadens the challenges to control tuberculosis. The availability of a large number of completely sequenced genomes of M. tuberculosis has provided an opportunity to explore the pangenome of the species along with the pan-phylogeny and to identify potential novel drug targets leading to drug discovery. We attempt to calculate the pangenome of M. tuberculosis that comprises a total of 150 complete genomes and performed the phylo-genomic classification and analysis. Further, the conserved core genome (1251 proteins) is subjected to various sequential filters (non-human homology, essentiality, virulence, physicochemical parameters, and pathway analysis) resulted in identification of eight putative broad-spectrum drug targets. Upon molecular docking analyses of these targets with ligands available at the DrugBank database shortlisted a total of five promising ligands with projected inhibitory potential; namely, 2'deoxy-thymidine-5'-diphospho-alpha-d-glucose, uridine diphosphate glucose, 2'-deoxy-thymidine-beta-l-rhamnose, thymidine-5'-triphosphate, and citicoline. We are confident that with further lead optimization and experimental validation, these lead compounds may provide a sound basis to develop safe and effective drugs against tuberculosis disease in humans.}, } @article {pmid33204772, year = {2020}, author = {Korzhenkov, AA and Toshchakov, SV and Podosokorskaya, OA and Patrushev, MV and Kublanov, IV}, title = {Data on draft genome sequence of Caldanaerobacter sp. strain 1523vc, a thermophilic bacterium, isolated from a hot spring of Uzon Caldera, (Kamchatka, Russia).}, journal = {Data in brief}, volume = {33}, number = {}, pages = {106336}, pmid = {33204772}, issn = {2352-3409}, abstract = {The draft genome sequence of Caldanaerobacter sp. strain 1523vc, a thermophilic bacterium, isolated from a hot spring of Uzon Caldera, (Kamchatka, Russia) is presented. The complete genome assembly was of 2 713 207 bp with predicted completeness of 99.38%. Genome structural annotation revealed 2674 protein-coding genes, 127 pseudogenes and 77 RNA genes. Pangenome analysis of 7 currently available high quality Caldanaerobacter spp. genomes including 1523vc revealed 4673 gene clusters. Of them, 1130 clusters formed a core genome of genus Caldanaerobacter. Of the rest 3543 Caldanaerobacter pangenome genes, 385 were exclusively represented in 1523vc genome. 101 of 2801 Caldanaerobacter CDS were found to be encoding carbohydrate-active enzymes (CAZymes). The majority of CAZymes were predicted to be involved in degradation of beta-linked polysaccharides as chitin, cellulose and hemicelluloses, reflecting the metabolism of strain 1523vc, isolated on cellulose. 5 of 101 CAZyme genes were found to be unique for the strain 1523vc and belonged to GH23, GT56, GH15 and two CE9 family proteins. The draft genome of strain 1523vc was deposited at DBJ/EMBL/GenBank under the accessions JABEQB000000000, PRJNA629090 and SAMN14766777 for Genome, Bioproject and Biosample, respectively.}, } @article {pmid33202901, year = {2020}, author = {Kim, J and Sung, J and Han, K and Lee, W and Mun, S and Lee, J and Bahk, K and Yang, I and Bae, YK and Kim, C and Kim, JI and Seo, JS}, title = {A High Quality Asian Genome Assembly Identifies Features of Common Missing Regions.}, journal = {Genes}, volume = {11}, number = {11}, pages = {}, pmid = {33202901}, issn = {2073-4425}, support = {10050164//The Ministry of Trade, Industry and Energy (MOTIE, Korea)./ ; NRF-2017R1A2B2002136//the Ministry of Science, ICT & Future Planning/ ; NO. 2019R1A6A3A13093761//The Ministry of Education/ ; }, abstract = {The current human reference genome (GRCh38), with its superior quality, has contributed significantly to genome analysis. However, GRCh38 may still underrepresent the ethnic genome, specifically for Asians, though exactly what we are missing is still elusive. Here, we juxtaposed GRCh38 with a high-contiguity genome assembly of one Korean (AK1) to show that a part of AK1 genome is missing in GRCh38 and that the missing regions harbored ~1390 putative coding elements. Furthermore, we found that multiple populations shared some certain parts in the missing genome when we analyzed the "unmapped" (to GRCh38) reads of fourteen individuals (five East-Asians, four Europeans, and five Africans), amounting to ~5.3 Mb (~0.2% of AK1) of the total genomic regions. The recovered AK1 regions from the "unmapped reads", which were the estimated missing regions that did not exist in GRCh38, harbored candidate coding elements. We verified that most of the common (shared by ≥7 individuals) missing regions exist in human and chimpanzee DNA. Moreover, we further identified the occurrence mechanism and ethnic heterogeneity as well as the presence of the common missing regions. This study illuminates a potential advantage of using a pangenome reference and brings up the need for further investigations on the various features of regions globally missed in GRCh38.}, } @article {pmid33193203, year = {2020}, author = {Li, X and Lin, J and Hu, Y and Zhou, J}, title = {PARMAP: A Pan-Genome-Based Computational Framework for Predicting Antimicrobial Resistance.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {578795}, pmid = {33193203}, issn = {1664-302X}, abstract = {Antimicrobial resistance (AMR) has emerged as one of the most urgent global threats to public health. Accurate detection of AMR phenotypes is critical for reducing the spread of AMR strains. Here, we developed PARMAP (Prediction of Antimicrobial Resistance by MAPping genetic alterations in pan-genome) to predict AMR phenotypes and to identify AMR-associated genetic alterations based on the pan-genome of bacteria by utilizing machine learning algorithms. When we applied PARMAP to 1,597 Neisseria gonorrhoeae strains, it successfully predicted their AMR phenotypes based on a pan-genome analysis. Furthermore, it identified 328 genetic alterations in 23 known AMR genes and discovered many new AMR-associated genetic alterations in ciprofloxacin-resistant N. gonorrhoeae, and it clearly indicated the genetic heterogeneity of AMR genes in different subtypes of resistant N. gonorrhoeae. Additionally, PARMAP performed well in predicting the AMR phenotypes of Mycobacterium tuberculosis and Escherichia coli, indicating the robustness of the PARMAP framework. In conclusion, PARMAP not only precisely predicts the AMR of a population of strains of a given species but also uses whole-genome sequencing data to prioritize candidate AMR-associated genetic alterations based on their likelihood of contributing to AMR. Thus, we believe that PARMAP will accelerate investigations into AMR mechanisms in other human pathogens.}, } @article {pmid33193173, year = {2020}, author = {Yuan, C and Wei, Y and Zhang, S and Cheng, J and Cheng, X and Qian, C and Wang, Y and Zhang, Y and Yin, Z and Chen, H}, title = {Comparative Genomic Analysis Reveals Genetic Mechanisms of the Variety of Pathogenicity, Antibiotic Resistance, and Environmental Adaptation of Providencia Genus.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {572642}, pmid = {33193173}, issn = {1664-302X}, abstract = {The bacterial genus Providencia is Gram-negative opportunistic pathogens, which have been isolated from a variety of environments and organisms, ranging from humans to animals. Providencia alcalifaciens, Providencia rettgeri, and Providencia stuartii are the most common clinical isolates, however, these three species differ in their pathogenicity, antibiotic resistance and environmental adaptation. Genomes of 91 isolates of the genus Providencia were investigated to clarify their genetic diversity, focusing on virulence factors, antibiotic resistance genes, and environmental adaptation genes. Our study revealed an open pan-genome for the genus Providencia containing 14,720 gene families. Species of the genus Providencia exhibited different functional constraints, with the core genes, accessory genes, and unique genes. A maximum-likelihood phylogeny reconstructed with concatenated single-copy core genes classified all Providencia isolates into 11 distant groups. Comprehensive and systematic comparative genomic analyses revealed that specific distributions of virulence genes, which were highly homologous to virulence genes of the genus Proteus, contributed to diversity in pathogenicity of Providencia alcalifaciens, Providencia rettgeri, and Providencia stuartii. Furthermore, multidrug resistance (MDR) phenotypes of isolates of Providencia rettgeri and Providencia stuartii were predominantly due to resistance genes from class 1 and 2 integrons. In addition, Providencia rettgeri and Providencia stuartii harbored more genes related to material transport and energy metabolism, which conferred a stronger ability to adapt to diverse environments. Overall, our study provided valuable insights into the genetic diversity and functional features of the genus Providencia, and revealed genetic mechanisms underlying diversity in pathogenicity, antibiotic resistance and environmental adaptation of members of this genus.}, } @article {pmid33184704, year = {2020}, author = {Gao, L and Koo, DH and Juliana, P and Rife, T and Singh, D and Lemes da Silva, C and Lux, T and Dorn, KM and Clinesmith, M and Silva, P and Wang, X and Spannagl, M and Monat, C and Friebe, B and Steuernagel, B and Muehlbauer, GJ and Walkowiak, S and Pozniak, C and Singh, R and Stein, N and Mascher, M and Fritz, A and Poland, J}, title = {The Aegilops ventricosa 2NvS segment in bread wheat: cytology, genomics and breeding.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {}, number = {}, pages = {}, doi = {10.1007/s00122-020-03712-y}, pmid = {33184704}, issn = {1432-2242}, support = {1339389//National Science Foundation/ ; AID-OAA-A-13-00051//United States Agency for International Development/ ; }, abstract = {KEY MESSAGE: The first cytological characterization of the 2NvS segment in hexaploid wheat; complete de novo assembly and annotation of 2NvS segment; 2NvS frequency is increasing 2NvS and is associated with higher yield. The Aegilops ventricosa 2NvS translocation segment has been utilized in breeding disease-resistant wheat crops since the early 1990s. This segment is known to possess several important resistance genes against multiple wheat diseases including root knot nematode, stripe rust, leaf rust and stem rust. More recently, this segment has been associated with resistance to wheat blast, an emerging and devastating wheat disease in South America and Asia. To date, full characterization of the segment including its size, gene content and its association with grain yield is lacking. Here, we present a complete cytological and physical characterization of this agronomically important translocation in bread wheat. We de novo assembled the 2NvS segment in two wheat varieties, 'Jagger' and 'CDC Stanley,' and delineated the segment to be approximately 33 Mb. A total of 535 high-confidence genes were annotated within the 2NvS region, with > 10% belonging to the nucleotide-binding leucine-rich repeat (NLR) gene families. Identification of groups of NLR genes that are potentially N genome-specific and expressed in specific tissues can fast-track testing of candidate genes playing roles in various disease resistances. We also show the increasing frequency of 2NvS among spring and winter wheat breeding programs over two and a half decades, and the positive impact of 2NvS on wheat grain yield based on historical datasets. The significance of the 2NvS segment in wheat breeding due to resistance to multiple diseases and a positive impact on yield highlights the importance of understanding and characterizing the wheat pan-genome for better insights into molecular breeding for wheat improvement.}, } @article {pmid33183231, year = {2020}, author = {Piza-Buitrago, A and Rincón, V and Donato, J and Saavedra, SY and Duarte, C and Morero, J and Falquet, L and Reguero, MT and Barreto-Hernández, E}, title = {Genome-based characterization of two Colombian clinical Providencia rettgeri isolates co-harboring NDM-1, VIM-2, and other β-lactamases.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {345}, pmid = {33183231}, issn = {1471-2180}, support = {FP44842-155-2015//COLCIENCIAS/ ; 35030//DIEB, Universidad Nacional de Colombia CO)/ ; }, abstract = {BACKGROUND: Providencia rettgeri is a nosocomial pathogen associated with urinary tract infections and related to Healthcare-Associated Infection (HAI). In recent years isolates producing New Delhi Metallo-β-lactamase (NDM) and other β-lactamases have been reported that reduce the efficiency of clinical antimicrobial treatments. In this study, we analyzed antibiotic resistance, the presence of resistance genes and the clonal relationship of two P. rettgeri isolates obtained from male patients admitted to the same hospital in Bogotá - Colombia, 2015.

RESULTS: Antibiotic susceptibility profile evaluated by the Kirby-Bauer method revealed that both isolates were resistant to third-generation carbapenems and cephalosporins. Whole-genome sequencing (Illumina HiSeq) followed by SPAdes assembling, Prokka annotation in combination with an in-house Python program and resistance gene detection by ResFinder identified the same six β-lactamase genes in both isolates: blaNDM-1, blaVIM-2, blaCTX-M-15, blaOXA-10, blaCMY-2 and blaTEM-1. Additionally, various resistance genes associated with antibiotic target alteration (arnA, PmrE, PmrF, LpxA, LpxC, gyrB, folP, murA, rpoB, rpsL, tet34) were found and four efflux pumps (RosAB, EmrD, mdtH and cmlA). The additional resistance to gentamicin in one of the two isolates could be explained by a detected SNP in CpxA (Cys191Arg) which is involved in the stress response of the bacterial envelope. Genome BLAST comparison using CGView, the ANI value (99.99%) and the pangenome (using Roary) phylogenetic tree (same clade, small distance) showed high similarity between the isolates. The rMLST analysis indicated that both isolates were typed as rST-61,696, same as the RB151 isolate previously isolated in Bucaramanga, Colombia, 2013, and the FDAARGOS_330 isolate isolated in the USA, 2015.

CONCLUSIONS: We report the coexistence of the carbapenemase genes blaNDM-1, and blaVIM-2, together with the β-lactamase genes blaCTX-M-15, blaOXA-10, blaCMY-2 and blaTEM-1, in P. rettgeri isolates from two patients in Colombia. Whole-genome sequence analysis indicated a circulation of P. rettgeri rST-61,696 strains in America that needs to be investigated further.}, } @article {pmid33174833, year = {2020}, author = {Pandey, A and Humbert, MV and Jackson, A and Passey, JL and Hampson, DJ and Cleary, DW and La Ragione, RM and Christodoulides, M}, title = {Evidence of homologous recombination as a driver of diversity in Brachyspira pilosicoli.}, journal = {Microbial genomics}, volume = {6}, number = {12}, pages = {}, doi = {10.1099/mgen.0.000470}, pmid = {33174833}, issn = {2057-5858}, abstract = {The enteric, pathogenic spirochaete Brachyspira pilosicoli colonizes and infects a variety of birds and mammals, including humans. However, there is a paucity of genomic data available for this organism. This study introduces 12 newly sequenced draft genome assemblies, boosting the cohort of examined isolates by fourfold and cataloguing the intraspecific genomic diversity of the organism more comprehensively. We used several in silico techniques to define a core genome of 1751 genes and qualitatively and quantitatively examined the intraspecific species boundary using phylogenetic analysis and average nucleotide identity, before contextualizing this diversity against other members of the genus Brachyspira. Our study revealed that an additional isolate that was unable to be species typed against any other Brachyspira lacked putative virulence factors present in all other isolates. Finally, we quantified that homologous recombination has as great an effect on the evolution of the core genome of the B. pilosicoli as random mutation (r/m=1.02). Comparative genomics has informed Brachyspira diversity, population structure, host specificity and virulence. The data presented here can be used to contribute to developing advanced screening methods, diagnostic assays and prophylactic vaccines against this zoonotic pathogen.}, } @article {pmid33173909, year = {2020}, author = {Lau, BT and Pavlichin, D and Hooker, AC and Almeda, A and Shin, G and Chen, J and Sahoo, MK and Huang, C and Pinsky, BA and Lee, H and Ji, HP}, title = {Profiling SARS-CoV-2 mutation fingerprints that range from the viral pangenome to individual infection quasispecies.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, pmid = {33173909}, support = {P01 HG000205/HG/NHGRI NIH HHS/United States ; R01 HG006137/HG/NHGRI NIH HHS/United States ; R35 HG011292/HG/NHGRI NIH HHS/United States ; U01 HG010963/HG/NHGRI NIH HHS/United States ; }, abstract = {Background: The genome of SARS-CoV-2 is susceptible to mutations during viral replication due to the errors generated by RNA-dependent RNA polymerases. These mutations enable the SARS-CoV-2 to evolve into new strains. Viral quasispecies emerge from de novo mutations that occur in individual patients. In combination, these sets of viral mutations provide distinct genetic fingerprints that reveal the patterns of transmission and have utility in contract tracing.

Methods: Leveraging thousands of sequenced SARS-CoV-2 genomes, we performed a viral pangenome analysis to identify conserved genomic sequences. We used a rapid and highly efficient computational approach that relies on k-mers, short tracts of sequence, instead of conventional sequence alignment. Using this method, we annotated viral mutation signatures that were associated with specific strains. Based on these highly conserved viral sequences, we developed a rapid and highly scalable targeted sequencing assay to identify mutations, detect quasispecies and identify mutation signatures from patients. These results were compared to the pangenome genetic fingerprints.

Results: We built a k-mer index for thousands of SARS-CoV-2 genomes and identified conserved genomics regions and landscape of mutations across thousands of virus genomes. We delineated mutation profiles spanning common genetic fingerprints (the combination of mutations in a viral assembly) and rare ones that occur in only small fraction of patients. We developed a targeted sequencing assay by selecting primers from the conserved viral genome regions to flank frequent mutations. Using a cohort of SARS-CoV-2 clinical samples, we identified genetic fingerprints consisting of strain-specific mutations seen across populations and de novo quasispecies mutations localized to individual infections. We compared the mutation profiles of viral samples undergoing analysis with the features of the pangenome.

Conclusions: We conducted an analysis for viral mutation profiles that provide the basis of genetic fingerprints. Our study linked pangenome analysis with targeted deep sequenced SARS-CoV-2 clinical samples. We identified quasispecies mutations occurring within individual patients, mutations demarcating dominant species and the prevalence of mutation signatures, of which a significant number were relatively unique. Analysis of these genetic fingerprints may provide a way of conducting molecular contact tracing.}, } @article {pmid33171694, year = {2020}, author = {Drijver, EPMD and Stohr, JJJM and Verweij, JJ and Verhulst, C and Velkers, FC and Stegeman, A and Bergh, MFQKD and Kluytmans, JAJW and Group, IS}, title = {Limited Genetic Diversity of blaCMY-2-Containing IncI1-pST12 Plasmids from Enterobacteriaceae of Human and Broiler Chicken Origin in The Netherlands.}, journal = {Microorganisms}, volume = {8}, number = {11}, pages = {}, pmid = {33171694}, issn = {2076-2607}, support = {Interreg V Flanders-The Netherlands program//European Regional Development Fund/ ; }, abstract = {Distinguishing epidemiologically related and unrelated plasmids is essential to confirm plasmid transmission. We compared IncI1-pST12 plasmids from both human and livestock origin and explored the degree of sequence similarity between plasmids from Enterobacteriaceae with different epidemiological links. Short-read sequence data of Enterobacteriaceae cultured from humans and broilers were screened for the presence of both a blaCMY-2 gene and an IncI1-pST12 replicon. Isolates were long-read sequenced on a MinION sequencer (OxfordNanopore Technologies). After plasmid reconstruction using hybrid assembly, pairwise single nucleotide polymorphisms (SNPs) were determined. The plasmids were annotated, and a pan-genome was constructed to compare genes variably present between the different plasmids. Nine Escherichia coli sequences of broiler origin, four Escherichia coli sequences, and one Salmonella enterica sequence of human origin were selected for the current analysis. A circular contig with the IncI1-pST12 replicon and blaCMY-2 gene was extracted from the assembly graph of all fourteen isolates. Analysis of the IncI1-pST12 plasmids revealed a low number of SNP differences (range of 0-9 SNPs). The range of SNP differences overlapped in isolates with different epidemiological links. One-hundred and twelve from a total of 113 genes of the pan-genome were present in all plasmid constructs. Next generation sequencing analysis of blaCMY-2-containing IncI1-pST12 plasmids isolated from Enterobacteriaceae with different epidemiological links show a high degree of sequence similarity in terms of SNP differences and the number of shared genes. Therefore, statements on the horizontal transfer of these plasmids based on genetic identity should be made with caution.}, } @article {pmid33168033, year = {2020}, author = {Gerdol, M and Moreira, R and Cruz, F and Gómez-Garrido, J and Vlasova, A and Rosani, U and Venier, P and Naranjo-Ortiz, MA and Murgarella, M and Greco, S and Balseiro, P and Corvelo, A and Frias, L and Gut, M and Gabaldón, T and Pallavicini, A and Canchaya, C and Novoa, B and Alioto, TS and Posada, D and Figueras, A}, title = {Massive gene presence-absence variation shapes an open pan-genome in the Mediterranean mussel.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {275}, pmid = {33168033}, issn = {1474-760X}, support = {AGL2011-14507-E//Ministerio de Ciencia, Innovación y Universidades/ ; AGL2015-65705-R//Ministerio de Ciencia, Innovación y Universidades/ ; RTI2018-095997-B-I00//Ministerio de Ciencia, Innovación y Universidades/ ; INCITE 10PXIB402096PR//Consellería de Economía, Emprego e Industria, Xunta de Galicia/ ; IN607B 2016/12//Consellería de Economía, Emprego e Industria, Xunta de Galicia/ ; 678589//Horizon 2020 Framework Programme/ ; Consolidator/ERC_/European Research Council/International ; }, abstract = {BACKGROUND: The Mediterranean mussel Mytilus galloprovincialis is an ecologically and economically relevant edible marine bivalve, highly invasive and resilient to biotic and abiotic stressors causing recurrent massive mortalities in other bivalves. Although these traits have been recently linked with the maintenance of a high genetic variation within natural populations, the factors underlying the evolutionary success of this species remain unclear.

RESULTS: Here, after the assembly of a 1.28-Gb reference genome and the resequencing of 14 individuals from two independent populations, we reveal a complex pan-genomic architecture in M. galloprovincialis, with a core set of 45,000 genes plus a strikingly high number of dispensable genes (20,000) subject to presence-absence variation, which may be entirely missing in several individuals. We show that dispensable genes are associated with hemizygous genomic regions affected by structural variants, which overall account for nearly 580 Mb of DNA sequence not included in the reference genome assembly. As such, this is the first study to report the widespread occurrence of gene presence-absence variation at a whole-genome scale in the animal kingdom.

CONCLUSIONS: Dispensable genes usually belong to young and recently expanded gene families enriched in survival functions, which might be the key to explain the resilience and invasiveness of this species. This unique pan-genome architecture is characterized by dispensable genes in accessory genomic regions that exceed by orders of magnitude those observed in other metazoans, including humans, and closely mirror the open pan-genomes found in prokaryotes and in a few non-metazoan eukaryotes.}, } @article {pmid33163077, year = {2020}, author = {Vasilyev, IY and Nikolaeva, IV and Siniagina, MN and Kharchenko, AM and Shaikhieva, GS}, title = {Multidrug-Resistant Hypervirulent Klebsiella pneumoniae Found Persisting Silently in Infant Gut Microbiota.}, journal = {International journal of microbiology}, volume = {2020}, number = {}, pages = {4054393}, pmid = {33163077}, issn = {1687-918X}, abstract = {Since the spread of multidrug-resistant Klebsiella pneumoniae (MDRKP) strains is considered as a challenge for patients with weakened or suppressed immunity, the emergence of isolates carrying determinants of hypervirulent phenotypes in addition may become a serious problem even for healthy individuals. The aim of this study is an investigation of the nonoutbreak K. pneumoniae emergence occurred in early 2017 at a maternity hospital of Kazan, Russia. Ten bacterial isolates demonstrating multiple drug resistance phenotypes were collected from eight healthy full-term breastfed neonates, observed at the maternity hospital of Kazan, Russia. All the infants and their mothers were dismissed without symptoms or complaints, in a satisfactory condition. Whole-genome shotgun (WGS) sequencing was performed with the purpose to track down a possible spread source(s) and obtain detailed information about resistance determinants and pathogenic potential of the collected isolates. Microdilution tests have confirmed production of extended-spectrum β-lactamases (ESBL) and their resistance to aminoglycoside, β-lactam, fluoroquinolone, sulfonamide, nitrofurantoin, trimethoprim, and fosfomycin antibiotics and Klebsiella phage. The WGS analysis has revealed the genes that are resistant to aminoglycosides, fluoroquinolones, macrolides, sulfonamides, chloramphenicols, tetracyclines, and trimethoprim and ESBL determinants. The pangenome analysis had split the isolates into two phylogenetic clades. The first group, a more heterogeneous clade, was represented by 5 isolates with 4 different in silico multilocus sequence types (MLSTs). The second group contained 5 isolates from infants born vaginally with the single MLST ST23, positive for genes corresponding to hypervirulent phenotypes: yersiniabactin, aerobactin, salmochelin, colibactin, hypermucoid determinants, and specific alleles of K- and O-antigens. The source of the MDRKP spread was not defined. Infected infants have shown no developed disease symptoms.}, } @article {pmid33152994, year = {2020}, author = {Lugli, GA and Tarracchini, C and Alessandri, G and Milani, C and Mancabelli, L and Turroni, F and Neuzil-Bunesova, V and Ruiz, L and Margolles, A and Ventura, M}, title = {Decoding the Genomic Variability among Members of the Bifidobacteriumdentium Species.}, journal = {Microorganisms}, volume = {8}, number = {11}, pages = {}, pmid = {33152994}, issn = {2076-2607}, abstract = {Members of the Bifidobacterium dentium species are usually identified in the oral cavity of humans and associated with the development of plaque and dental caries. Nevertheless, they have also been detected from fecal samples, highlighting a widespread distribution among mammals. To explore the genetic variability of this species, we isolated and sequenced the genomes of 18 different B. dentium strains collected from fecal samples of several primate species and an Ursus arctos. Thus, we investigated the genomic variability and metabolic abilities of the new B. dentium isolates together with 20 public genome sequences. Comparative genomic analyses provided insights into the vast metabolic repertoire of the species, highlighting 19 glycosyl hydrolases families shared between each analyzed strain. Phylogenetic analysis of the B. dentium taxon, involving 1140 conserved genes, revealed a very close phylogenetic relatedness among members of this species. Furthermore, low genomic variability between strains was also confirmed by an average nucleotide identity analysis showing values higher than 98.2%. Investigating the genetic features of each strain, few putative functional mobile elements were identified. Besides, a consistent occurrence of defense mechanisms such as CRISPR-Cas and restriction-modification systems may be responsible for the high genome synteny identified among members of this taxon.}, } @article {pmid33150080, year = {2020}, author = {Dahlhausen, KE and Jospin, G and Coil, DA and Eisen, JA and Wilkins, LGE}, title = {Isolation and sequence-based characterization of a koala symbiont: Lonepinella koalarum.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e10177}, pmid = {33150080}, issn = {2167-8359}, abstract = {Koalas (Phascolarctos cinereus) are highly specialized herbivorous marsupials that feed almost exclusively on Eucalyptus leaves, which are known to contain varying concentrations of many different toxic chemical compounds. The literature suggests that Lonepinella koalarum, a bacterium in the Pasteurellaceae family, can break down some of these toxic chemical compounds. Furthermore, in a previous study, we identified L. koalarum as the most predictive taxon of koala survival during antibiotic treatment. Therefore, we believe that this bacterium may be important for koala health. Here, we isolated a strain of L. koalarum from a healthy koala female and sequenced its genome using a combination of short-read and long-read sequencing. We placed the genome assembly into a phylogenetic tree based on 120 genome markers using the Genome Taxonomy Database (GTDB), which currently does not include any L. koalarum assemblies. Our genome assembly fell in the middle of a group of Haemophilus, Pasteurella and Basfia species. According to average nucleotide identity and a 16S rRNA gene tree, the closest relative of our isolate is L. koalarum strain Y17189. Then, we annotated the gene sequences and compared them to 55 closely related, publicly available genomes. Several genes that are known to be involved in carbohydrate metabolism could exclusively be found in L. koalarum relative to the other taxa in the pangenome, including glycoside hydrolase families GH2, GH31, GH32, GH43 and GH77. Among the predicted genes of L. koalarum were 79 candidates putatively involved in the degradation of plant secondary metabolites. Additionally, several genes coding for amino acid variants were found that had been shown to confer antibiotic resistance in other bacterial species against pulvomycin, beta-lactam antibiotics and the antibiotic efflux pump KpnH. In summary, this genetic characterization allows us to build hypotheses to explore the potentially beneficial role that L. koalarum might play in the koala intestinal microbiome. Characterizing and understanding beneficial symbionts at the whole genome level is important for the development of anti- and probiotic treatments for koalas, a highly threatened species due to habitat loss, wildfires, and high prevalence of Chlamydia infections.}, } @article {pmid33144553, year = {2020}, author = {Kim, E and Cho, EJ and Yang, SM and Kim, HY}, title = {Identification and monitoring of Lactobacillus delbrueckii subspecies using pangenomic-based novel genetic markers.}, journal = {Journal of microbiology and biotechnology}, volume = {}, number = {}, pages = {}, doi = {10.4014/jmb.2009.09034}, pmid = {33144553}, issn = {1738-8872}, abstract = {Genetic markers currently used for the discrimination of Lactobacillus delbrueckii subspecies have low efficiency for identification at subspecies level. Therefore, the objective of this study was to select novel genetic markers for accurate identification and discrimination of six L. delbrueckii subspecies based on pangenome analysis. This study evaluated L. delbrueckii genomes to avoid making incorrect conclusions in the process of selecting genetic markers due to mislabeled genome. Genome analysis showed that two genomes of L. delbrueckii subspecies deposited in NCBI were misidentified. Based on these results, subspecies-specific genetic markers were selected by comparing pan and core-genome. Genetic markers were confirmed to be specific for 59,196,562 genome sequences via in silico analysis. They were found in all strains of the same subspecies, but not in other subspecies or bacterial strains. These genetic markers also could be used to accurately identify genomes at the subspecies level for genomes known at the species level. A real-time PCR method for the detection of three main subspecies (L. delbrueckii subsp. delbrueckii, lactis, and bulgaricus) was developed to cost-effectively identify them using genetic markers. Results showed 100% specificity for each subspecies. These genetic markers could differentiate each subspecies from 44 other lactic acid bacteria. This real-time PCR method was then applied to monitor 26 probiotics and dairy products. It was also used to identify 64 unknown strains isolated from raw milk samples and dairy products. Results confirmed that unknown isolates and subspecies contained in the product could be accurately identified using this real-time PCR method.}, } @article {pmid33129664, year = {2021}, author = {Rogalski, E and Ehrmann, MA and Vogel, RF}, title = {Intraspecies diversity and genome-phenotype-associations in Fructilactobacillus sanfranciscensis.}, journal = {Microbiological research}, volume = {243}, number = {}, pages = {126625}, doi = {10.1016/j.micres.2020.126625}, pmid = {33129664}, issn = {1618-0623}, abstract = {In this study the intraspecies diversity of Fructilactobacillus (F.) sanfranciscensis (formerly Lactobacillus sanfranciscensis) was characterized by comparative genomics supported by physiological data. Twenty-four strains of F. sanfranciscensis were analyzed and sorted into six different genomic clusters. The core genome comprised only 43,14 % of the pan genome, i.e. 0.87 Mbp of 2.04 Mbp. The main annotated genomic differences reside in maltose, fructose and sucrose as well as nucleotide metabolism, use of electron acceptors, and exopolysacchride formation. Furthermore, all strains are well equipped to cope with oxidative stress via NADH oxidase and a distinct thiol metabolism. Only ten of 24 genomes contain two maltose phosphorylase genes (mapA and mapB). In F. sanfranciscensis TMW 1.897 only mapA was found. All strains except those from genomic cluster 2 contained the mannitol dehydrogenase and should therefore be able to use fructose as external electron acceptor. Moreover, six strains were able to grow on fructose as sole carbon source, as they contained a functional fructokinase gene. No growth was observed on pentoses, i.e. xylose, arabinose or ribose, as sole carbon source. This can be referred to the absence of ribose pyranase rbsD in all genomes, and absence of or mutations in numerous other genes, which are essential for arabinose and xylose metabolism. Seven strains were able to produce exopolysaccharides (EPS) from sucrose. In addition, the strains containing levS were able to grow on sucrose as sole carbon source. Strains of one cluster exhibit auxotrophies for purine nucleotides. The physiological and genomic analyses suggest that the biodiversity of F. sanfranciscensis is larger than anticipated. Consequently, "original" habitats and lifestyles of F. sanfranciscensis may vary but can generally be referred to an adaptation to sugary (maltose/sucrose/fructose-rich) and aerobic environments as found in plants and insects. It can dominate sourdoughs as a result of reductive evolution and cooperation with fructose-delivering, acetate-tolerant yeasts.}, } @article {pmid33128615, year = {2020}, author = {Huang, WC and Hu, Y and Zhang, G and Li, M}, title = {Comparative genomic analysis reveals metabolic diversity of different Paenibacillus groups.}, journal = {Applied microbiology and biotechnology}, volume = {104}, number = {23}, pages = {10133-10143}, doi = {10.1007/s00253-020-10984-3}, pmid = {33128615}, issn = {1432-0614}, support = {91851105//National Natural Science Foundation of China (CN)/ ; }, abstract = {The genus Paenibacillus was originally recognized based on the 16S rRNA gene phylogeny. Recently, a standardized bacterial taxonomy approach based on a genome phylogeny has substantially revised the classification of Paenibacillus, dividing it into 23 genera. However, the metabolic differences among these groups remain undescribed. Here, genomes of 41 Paenibacillus strains comprising 25 species were sequenced, and a comparative genomic analysis was performed considering these and 187 publicly available Paenibacillus genomes to understand their phylogeny and metabolic differences. Phylogenetic analysis indicated that Paenibacillus clustered into 10 subgroups. Core genome and pan-genome analyses revealed similar functional categories among the different Paenibacillus subgroups; however, each group tended to harbor specific gene families. A large proportion of genes in the subgroups A, E, and G are related to carbohydrate metabolism. Among them, genes related to the glycoside hydrolase family were most abundant. Metabolic reconstruction of the newly sequenced genomes showed that the Embden-Meyerhof-Parnas pathway, pentose phosphate pathway, and citric acid cycle are central pathways of carbohydrate metabolism in Paenibacillus. Further, the genomes of the subgroups A and G lack genes involved in glyoxylate cycle and D-galacturonate degradation, respectively. The current study revealed the metabolic diversity of Paenibacillus subgroups assigned based on a genomic phylogeny and could inform the taxonomy of Paenibacillus. KEY POINTS: • Paenibacillus clustered into 10 subgroups. • Genomic content variation and metabolic diversity in the subgroup A, E, and G were described. • Carbohydrate transport and metabolism is important for Paenibacillus survival.}, } @article {pmid33115833, year = {2020}, author = {Zukancic, A and Khan, MA and Gurmen, SJ and Gliniecki, QM and Moritz-Kinkade, DL and Maddox, CW and Alam, MT}, title = {Staphylococcal Protein A (spa) Locus Is a Hot Spot for Recombination and Horizontal Gene Transfer in Staphylococcus pseudintermedius.}, journal = {mSphere}, volume = {5}, number = {5}, pages = {}, pmid = {33115833}, issn = {2379-5042}, abstract = {Staphylococcus pseudintermedius is a major canine pathogen but also occasionally colonizes and infects humans. Multidrug-resistant methicillin-resistant S. pseudintermedius (MDR MRSP) strains have emerged globally, making treatment and control of this pathogen challenging. Sequence type 71 (ST71), ST68, and ST45 are the most widespread and successful MDR MRSP clones. The potential genetic factors underlying the clonal success of these and other predominant clones remain unknown. Characterization of the pangenome, lineage-associated accessory genes, and genes acquired through horizontal gene transfer from other bacteria is important for identifying such factors. Here, we analyzed genome sequence data from 622 S. pseudintermedius isolates to investigate the evolution of pathogenicity across lineages. We show that the predominant clones carry one or more lineage-associated virulence genes. The gene encoding staphylococcal protein A (SpA), a key virulence factor involved in immune evasion and a potential vaccine antigen, is deleted in 62% of isolates. Most importantly, we have discovered that the spa locus is a hot spot for recombination and horizontal gene transfer in S. pseudintermedius, where genes related to restriction modification, prophage immunity, mercury resistance, and nucleotide and carbohydrate metabolism have been acquired in different lineages. Our study also establishes that ST45 is composed of two distinct sublineages that differ in their accessory gene content and virulence potential. Collectively, this study reports several previously undetected lineage-associated genetic factors that may have a role in the clonal success of the major MDR MRSP clones. These data provide a framework for future experimental studies on S. pseudintermedius pathogenesis and for developing novel therapeutics against this pathogen.IMPORTANCEStaphylococcus pseudintermedius is a major canine pathogen but can also occasionally infect humans. Identification of genetic factors contributing to the virulence and clonal success of multidrug-resistant S. pseudintermedius clones is critical for the development of therapeutics against this pathogen. Here, we characterized the genome sequences of a global collection of 622 S. pseudintermedius isolates. We show that all major clones, besides carrying core virulence genes, which are present in all strains, carry one or more lineage-specific genes. Many of these genes have been acquired from other bacterial species through a horizontal gene transfer mechanism. Importantly, we have discovered that the staphylococcal protein A gene (spa), a widely used marker for molecular typing of S. pseudintermedius strains and a potential vaccine candidate antigen, is deleted in 62% of strains. Furthermore, the spa locus in S. pseudintermedius acts as a reservoir to accumulate lineage-associated genes with adaptive functions.}, } @article {pmid33106639, year = {2020}, author = {Ding, Y and Weckwerth, PR and Poretsky, E and Murphy, KM and Sims, J and Saldivar, E and Christensen, SA and Char, SN and Yang, B and Tong, AD and Shen, Z and Kremling, KA and Buckler, ES and Kono, T and Nelson, DR and Bohlmann, J and Bakker, MG and Vaughan, MM and Khalil, AS and Betsiashvili, M and Dressano, K and Köllner, TG and Briggs, SP and Zerbe, P and Schmelz, EA and Huffaker, A}, title = {Genetic elucidation of interconnected antibiotic pathways mediating maize innate immunity.}, journal = {Nature plants}, volume = {6}, number = {11}, pages = {1375-1388}, doi = {10.1038/s41477-020-00787-9}, pmid = {33106639}, issn = {2055-0278}, support = {2019-67011-29544//United States Department of Agriculture | National Institute of Food and Agriculture (NIFA)/ ; 2018-67013-28125//United States Department of Agriculture | National Institute of Food and Agriculture (NIFA)/ ; 1936492//NSF | BIO | Division of Integrative Organismal Systems (IOS)/ ; 1546899//NSF | BIO | Division of Integrative Organismal Systems (IOS)/ ; 1758976//NSF | BIO | Division of Integrative Organismal Systems (IOS)/ ; 1943591//NSF | BIO | Division of Integrative Organismal Systems (IOS)/ ; }, abstract = {Specialized metabolites constitute key layers of immunity that underlie disease resistance in crops; however, challenges in resolving pathways limit our understanding of the functions and applications of these metabolites. In maize (Zea mays), the inducible accumulation of acidic terpenoids is increasingly considered to be a defence mechanism that contributes to disease resistance. Here, to understand maize antibiotic biosynthesis, we integrated association mapping, pan-genome multi-omic correlations, enzyme structure-function studies and targeted mutagenesis. We define ten genes in three zealexin (Zx) gene clusters that encode four sesquiterpene synthases and six cytochrome P450 proteins that collectively drive the production of diverse antibiotic cocktails. Quadruple mutants in which the ability to produce zealexins (ZXs) is blocked exhibit a broad-spectrum loss of disease resistance. Genetic redundancies ensuring pathway resiliency to single null mutations are combined with enzyme substrate promiscuity, creating a biosynthetic hourglass pathway that uses diverse substrates and in vivo combinatorial chemistry to yield complex antibiotic blends. The elucidated genetic basis of biochemical phenotypes that underlie disease resistance demonstrates a predominant maize defence pathway and informs innovative strategies for transferring chemical immunity between crops.}, } @article {pmid33105850, year = {2020}, author = {Slizen, MV and Galzitskaya, OV}, title = {Comparative Analysis of Proteomes of a Number of Nosocomial Pathogens by KEGG Modules and KEGG Pathways.}, journal = {International journal of molecular sciences}, volume = {21}, number = {21}, pages = {}, pmid = {33105850}, issn = {1422-0067}, abstract = {Nosocomial (hospital-acquired) infections remain a serious challenge for health systems. The reason for this lies not only in the local imperfection of medical practices and protocols. The frequency of infection with antibiotic-resistant strains of bacteria is growing every year, both in developed and developing countries. In this work, a pangenome and comparative analysis of 201 genomes of Staphylococcus aureus, Enterobacter spp., Pseudomonas aeruginosa, and Mycoplasma spp. was performed on the basis of high-level functional annotations-KEGG pathways and KEGG modules. The first three organisms are serious nosocomial pathogens, often exhibiting multidrug resistance. Analysis of KEGG modules revealed methicillin resistance in 25% of S. aureus strains and resistance to carbapenems in 21% of Enterobacter spp. strains. P. aeruginosa has a wide range of unique efflux systems. One hundred percent of the analyzed strains have at least two drug resistance systems, and 75% of the strains have seven. Each of the organisms has a characteristic set of metabolic features, whose impact on drug resistance can be considered in future studies. Comparing the genomes of nosocomial pathogens with each other and with Mycoplasma genomes can expand our understanding of the versatility of certain metabolic features and mechanisms of drug resistance.}, } @article {pmid33105087, year = {2020}, author = {Zou, W and Ye, G and Zhang, K and Yang, H and Yang, J}, title = {Analysis of the core genome and pangenome of Clostridium butyricum.}, journal = {Genome}, volume = {}, number = {}, pages = {1-11}, doi = {10.1139/gen-2020-0072}, pmid = {33105087}, issn = {1480-3321}, abstract = {Clostridium butyricum is an anaerobic bacterium that inhabits broad niches. Clostridium butyricum is known for its production of butyrate, 1,3-propanediol, and hydrogen. This study aimed to present a comparative pangenome analysis of 24 strains isolated from different niches. We sequenced and annotated the genome of C. butyricum 3-3 isolated from the Chinese baijiu ecosystem. The pangenome of C. butyricum was open. The core genome, accessory genome, and strain-specific genes comprised 1011, 4543, and 1473 genes, respectively. In the core genome, Carbohydrate metabolism was the largest category, and genes in the biosynthetic pathway of butyrate and glycerol metabolism were conserved (in the core or soft-core genome). Furthermore, the 1,3-propanediol operon existed in 20 strains. In the accessory genome, numerous mobile genetic elements belonging to the Replication, recombination, and repair (L) category were identified. In addition, genome islands were identified in all 24 strains, ranging from 2 (strain KNU-L09) to 53 (strain SU1), and phage sequences were found in 17 of the 24 strains. This study provides an important genomic framework that could pave the way for the exploration of C. butyricum and future studies on the genetic diversification of C. butyricum.}, } @article {pmid33068485, year = {2020}, author = {Song, JM and Liu, DX and Xie, WZ and Yang, Z and Guo, L and Liu, K and Yang, QY and Chen, LL}, title = {BnPIR: Brassica napus pan-genome information resource for 1689 accessions.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.13491}, pmid = {33068485}, issn = {1467-7652}, support = {2019CFA014//Hubei Provincial Natural Science Foundation of China/ ; 2662018PY068//Fundamental Research Funds for the Central Universities/ ; }, } @article {pmid33066802, year = {2020}, author = {Li, H and Feng, X and Chu, C}, title = {The design and construction of reference pangenome graphs with minigraph.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {265}, pmid = {33066802}, issn = {1474-760X}, support = {R01 HG010040/HG/NHGRI NIH HHS/United States ; U01 HG010961/HG/NHGRI NIH HHS/United States ; }, abstract = {The recent advances in sequencing technologies enable the assembly of individual genomes to the quality of the reference genome. How to integrate multiple genomes from the same species and make the integrated representation accessible to biologists remains an open challenge. Here, we propose a graph-based data model and associated formats to represent multiple genomes while preserving the coordinate of the linear reference genome. We implement our ideas in the minigraph toolkit and demonstrate that we can efficiently construct a pangenome graph and compactly encode tens of thousands of structural variants missing from the current reference genome.}, } @article {pmid33065016, year = {2020}, author = {De Filippis, F and Pasolli, E and Ercolini, D}, title = {Newly Explored Faecalibacterium Diversity Is Connected to Age, Lifestyle, Geography, and Disease.}, journal = {Current biology : CB}, volume = {30}, number = {24}, pages = {4932-4943.e4}, doi = {10.1016/j.cub.2020.09.063}, pmid = {33065016}, issn = {1879-0445}, abstract = {Faecalibacterium is prevalent in the human gut and a promising microbe for the development of next-generation probiotics (NGPs) or biotherapeutics. Analyzing reference Faecalibacterium genomes and almost 3,000 Faecalibacterium-like metagenome-assembled genomes (MAGs) reconstructed from 7,907 human and 203 non-human primate gut metagenomes, we identified the presence of 22 different Faecalibacterium-like species-level genome bins (SGBs), some further divided in different strains according to the subject geographical origin. Twelve SGBs are globally spread in the human gut and show different genomic potential in the utilization of complex polysaccharides, suggesting that higher SGB diversity may be related with increased utilization of plant-based foods. Moreover, up to 11 different species may co-occur in the same subject, with lower diversity in Western populations, as well as intestinal inflammatory states and obesity. The newly explored Faecalibacterium diversity will be able to support the choice of strains suitable as NGPs, guided by the consideration of the differences existing in their functional potential.}, } @article {pmid33055096, year = {2020}, author = {Zhou, Z and Charlesworth, J and Achtman, M}, title = {Accurate reconstruction of bacterial pan- and core genomes with PEPPAN.}, journal = {Genome research}, volume = {30}, number = {11}, pages = {1667-1679}, pmid = {33055096}, issn = {1549-5469}, support = {/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Bacterial genomes can contain traces of a complex evolutionary history, including extensive homologous recombination, gene loss, gene duplications, and horizontal gene transfer. To reconstruct the phylogenetic and population history of a set of multiple bacteria, it is necessary to examine their pangenome, the composite of all the genes in the set. Here we introduce PEPPAN, a novel pipeline that can reliably construct pangenomes from thousands of genetically diverse bacterial genomes that represent the diversity of an entire genus. PEPPAN outperforms existing pangenome methods by providing consistent gene and pseudogene annotations extended by similarity-based gene predictions, and identifying and excluding paralogs by combining tree- and synteny-based approaches. The PEPPAN package additionally includes PEPPAN_parser, which implements additional downstream analyses, including the calculation of trees based on accessory gene content or allelic differences between core genes. To test the accuracy of PEPPAN, we implemented SimPan, a novel pipeline for simulating the evolution of bacterial pangenomes. We compared the accuracy and speed of PEPPAN with four state-of-the-art pangenome pipelines using both empirical and simulated data sets. PEPPAN was more accurate and more specific than any of the other pipelines and was almost as fast as any of them. As a case study, we used PEPPAN to construct a pangenome of approximately 40,000 genes from 3052 representative genomes spanning at least 80 species of Streptococcus The resulting gene and allelic trees provide an unprecedented overview of the genomic diversity of the entire Streptococcus genus.}, } @article {pmid33050495, year = {2020}, author = {Kumar, R and Register, K and Christopher-Hennings, J and Moroni, P and Gioia, G and Garcia-Fernandez, N and Nelson, J and Jelinski, MD and Lysnyansky, I and Bayles, D and Alt, D and Scaria, J}, title = {Population Genomic Analysis of Mycoplasma bovis Elucidates Geographical Variations and Genes associated with Host-Types.}, journal = {Microorganisms}, volume = {8}, number = {10}, pages = {}, pmid = {33050495}, issn = {2076-2607}, abstract = {: Among more than twenty species belonging to the class Mollecutes, Mycoplasma bovis is the most common cause of bovine mycoplasmosis in North America and Europe. Bovine mycoplasmosis causes significant economic loss in the cattle industry. The number of M. bovis positive herds recently has increased in North America and Europe. Since antibiotic treatment is ineffective and no efficient vaccine is available, M. bovis induced mycoplasmosis is primarily controlled by herd management measures such as the restriction of moving infected animals out of the herds and culling of infected or shedders of M. bovis. To better understand the population structure and genomic factors that may contribute to its transmission, we sequenced 147 M. bovis strains isolated from four different countries viz. USA (n = 121), Canada (n = 22), Israel (n = 3) and Lithuania (n = 1). All except two of the isolates (KRB1 and KRB8) were isolated from two host types i.e., bovine (n = 75) and bison (n = 70). We performed a large-scale comparative analysis of M. bovis genomes by integrating 103 publicly available genomes and our dataset (250 total genomes). Whole genome single nucleotide polymorphism (SNP) based phylogeny using M.agalactiae as an outgroup revealed that M. bovis population structure is composed of five different clades. USA isolates showed a high degree of genomic divergence in comparison to the Australian isolates. Based on host of origin, all the isolates in clade IV was of bovine origin, whereas majority of the isolates in clades III and V was of bison origin. Our comparative genome analysis also revealed that M. bovis has an open pangenome with a large breadth of unexplored diversity of genes. The function based analysis of autogenous vaccine candidates (n = 10) included in this study revealed that their functional diversity does not span the genomic diversity observed in all five clades identified in this study. Our study also found that M. bovis genome harbors a large number of IS elements and their number increases significantly (p = 7.8x10-6) as the genome size increases. Collectively, the genome data and the whole genome-based population analysis in this study may help to develop better understanding of M. bovis induced mycoplasmosis in cattle.}, } @article {pmid33040146, year = {2020}, author = {Eizenga, JM and Novak, AM and Kobayashi, E and Villani, F and Cisar, C and Heumos, S and Hickey, G and Colonna, V and Paten, B and Garrison, E}, title = {Efficient dynamic variation graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btaa640}, pmid = {33040146}, issn = {1367-4811}, abstract = {MOTIVATION: Pangenomics is a growing field within computational genomics. Many pangenomic analyses use bidirected sequence graphs as their core data model. However, implementing and correctly using this data model can be difficult, and the scale of pangenomic datasets can be challenging to work at. These challenges have impeded progress in this field.

RESULTS: Here, we present a stack of two C++ libraries, libbdsg and libhandlegraph, which use a simple, field-proven interface, designed to expose elementary features of these graphs while preventing common graph manipulation mistakes. The libraries also provide a Python binding. Using a diverse collection of pangenome graphs, we demonstrate that these tools allow for efficient construction and manipulation of large genome graphs with dense variation. For instance, the speed and memory usage are up to an order of magnitude better than the prior graph implementation in the VG toolkit, which has now transitioned to using libbdsg's implementations.

libhandlegraph and libbdsg are available under an MIT License from https://github.com/vgteam/libhandlegraph and https://github.com/vgteam/libbdsg.}, } @article {pmid33037962, year = {2020}, author = {Kumar, J and Sen Gupta, D}, title = {Prospects of next generation sequencing in lentil breeding.}, journal = {Molecular biology reports}, volume = {47}, number = {11}, pages = {9043-9053}, doi = {10.1007/s11033-020-05891-9}, pmid = {33037962}, issn = {1573-4978}, abstract = {Lentil is an important food legume crop that has large and complex genome. During past years, considerable attention has been given on the use of next generation sequencing for enriching the genomic resources including identification of SSR and SNP markers, development of unigenes, transcripts, and identification of candidate genes for biotic and abiotic stresses, analysis of genetic diversity and identification of genes/ QTLs for agronomically important traits. However, in other crops including pulses, next generation sequencing has revolutionized the genomic research and helped in genomic assisted breeding rapidly and cost effectively. The present review discuss current status and future prospects of the use NGS based breeding in lentil.}, } @article {pmid33028681, year = {2020}, author = {Muthuirulandi Sethuvel, DP and Mutreja, A and Pragasam, AK and Vasudevan, K and Murugan, D and Anandan, S and Michael, JS and Walia, K and Veeraraghavan, B}, title = {Phylogenetic and Evolutionary Analysis Reveals the Recent Dominance of Ciprofloxacin-Resistant Shigella sonnei and Local Persistence of S. flexneri Clones in India.}, journal = {mSphere}, volume = {5}, number = {5}, pages = {}, pmid = {33028681}, issn = {2379-5042}, support = {/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Shigella is the second leading cause of bacterial diarrhea worldwide. Recently, Shigella sonnei seems to be replacing Shigella flexneri in low- and middle-income countries undergoing economic development. Despite this, studies focusing on these species at the genomic level remain largely unexplored. Here, we compared the genome sequences of S. flexneri and S. sonnei isolates from India with the publicly available genomes of global strains. Our analysis provides evidence for the long-term persistence of all phylogenetic groups (PGs) of S. flexneri and the recent dominance of the ciprofloxacin-resistant S. sonnei lineage in India. Within S. flexneri PGs, the majority of the study isolates belonged to PG3 within the predominance of serotype 2. For S. sonnei, the current pandemic involves globally distributed multidrug-resistant (MDR) clones that belong to Central Asia lineage III. The presence of such epidemiologically dominant lineages in association with stable antimicrobial resistance (AMR) determinants results in successful survival in the community.IMPORTANCEShigella is the second leading cause of bacterial diarrhea worldwide. This has been categorized as a priority pathogen among enteric bacteria by the Global Antimicrobial Resistance Surveillance System (GLASS) of the World Health Organization (WHO). Recently, S. sonnei seems to be replacing S. flexneri in low- and middle-income countries undergoing economic development. Antimicrobial resistance in S. flexneri and S. sonnei is a growing international concern, specifically with the international dominance of the multidrug-resistant (MDR) lineage. Genomic studies focusing on S. flexneri and S. sonnei in India remain largely unexplored. This study provides information on the introduction and expansion of drug-resistant Shigella strains in India for the first time by comparing the genome sequences of S. flexneri and S. sonnei isolates from India with the publicly available genomes of global strains. The study discusses the key differences between the two dominant species of Shigella at the genomic level to understand the evolutionary trends and genome dynamics of emerging and existing resistance clones. The present work demonstrates evidence for the long-term persistence of all PGs of S. flexneri and the recent dominance of a ciprofloxacin-resistant S. sonnei lineage in India.}, } @article {pmid33025059, year = {2020}, author = {Khilyas, IV and Sorokina, AV and Markelova, MI and Belenikin, M and Shafigullina, L and Tukhbatova, RI and Shagimardanova, EI and Blom, J and Sharipova, MR and Cohen, MF}, title = {Genomic and phenotypic analysis of siderophore-producing Rhodococcus qingshengii strain S10 isolated from an arid weathered serpentine rock environment.}, journal = {Archives of microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1007/s00203-020-02057-w}, pmid = {33025059}, issn = {1432-072X}, support = {19-74-00062//Russian Science Foundation/ ; }, abstract = {The success of members of the genus Rhodococcus in colonizing arid rocky environments is owed in part to desiccation tolerance and an ability to extract iron through the secretion and uptake of siderophores. Here, we report a comprehensive genomic and taxonomic analysis of Rhodococcus qingshengii strain S10 isolated from eathered serpentine rock at the arid Khalilovsky massif, Russia. Sequence comparisons of whole genomes and of selected marker genes clearly showed strain S10 to belong to the R. qingshengii species. Four prophage sequences within the R. qingshengii S10 genome were identified, one of which encodes for a putative siderophore-interacting protein. Among the ten non-ribosomal peptides synthase (NRPS) clusters identified in the strain S10 genome, two show high homology to those responsible for siderophore synthesis. Phenotypic analyses demonstrated that R. qingshengii S10 secretes siderophores and possesses adaptive features (tolerance of up to 8% NaCl and pH 9) that should enable survival in its native habitat within dry serpentine rock.}, } @article {pmid33023476, year = {2020}, author = {Sonnenberg, CB and Kahlke, T and Haugen, P}, title = {Vibrionaceae core, shell and cloud genes are non-randomly distributed on Chr 1: An hypothesis that links the genomic location of genes with their intracellular placement.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {695}, pmid = {33023476}, issn = {1471-2164}, abstract = {BACKGROUND: The genome of Vibrionaceae bacteria, which consists of two circular chromosomes, is replicated in a highly ordered fashion. In fast-growing bacteria, multifork replication results in higher gene copy numbers and increased expression of genes located close to the origin of replication of Chr 1 (ori1). This is believed to be a growth optimization strategy to satisfy the high demand of essential growth factors during fast growth. The relationship between ori1-proximate growth-related genes and gene expression during fast growth has been investigated by many researchers. However, it remains unclear which other gene categories that are present close to ori1 and if expression of all ori1-proximate genes is increased during fast growth, or if expression is selectively elevated for certain gene categories.

RESULTS: We calculated the pangenome of all complete genomes from the Vibrionaceae family and mapped the four pangene categories, core, softcore, shell and cloud, to their chromosomal positions. This revealed that core and softcore genes were found heavily biased towards ori1, while shell genes were overrepresented at the opposite part of Chr 1 (i.e., close to ter1). RNA-seq of Aliivibrio salmonicida and Vibrio natriegens showed global gene expression patterns that consistently correlated with chromosomal distance to ori1. Despite a biased gene distribution pattern, all pangene categories contributed to a skewed expression pattern at fast-growing conditions, whereas at slow-growing conditions, softcore, shell and cloud genes were responsible for elevated expression.

CONCLUSION: The pangene categories were non-randomly organized on Chr 1, with an overrepresentation of core and softcore genes around ori1, and overrepresentation of shell and cloud genes around ter1. Furthermore, we mapped our gene distribution data on to the intracellular positioning of chromatin described for V. cholerae, and found that core/softcore and shell/cloud genes appear enriched at two spatially separated intracellular regions. Based on these observations, we hypothesize that there is a link between the genomic location of genes and their cellular placement.}, } @article {pmid33022985, year = {2020}, author = {Malik, A and Kim, YR and Kim, SB}, title = {Genome Mining of the Genus Streptacidiphilus for Biosynthetic and Biodegradation Potential.}, journal = {Genes}, volume = {11}, number = {10}, pages = {}, pmid = {33022985}, issn = {2073-4425}, abstract = {The genus Streptacidiphilus represents a group of acidophilic actinobacteria within the family Streptomycetaceae, and currently encompasses 15 validly named species, which include five recent additions within the last two years. Considering the potential of the related genera within the family, namely Streptomyces and Kitasatospora, these relatively new members of the family can also be a promising source for novel secondary metabolites. At present, 15 genome data for 11 species from this genus are available, which can provide valuable information on their biology including the potential for metabolite production as well as enzymatic activities in comparison to the neighboring taxa. In this study, the genome sequences of 11 Streptacidiphilus species were subjected to the comparative analysis together with selected Streptomyces and Kitasatospora genomes. This study represents the first comprehensive comparative genomic analysis of the genus Streptacidiphilus. The results indicate that the genomes of Streptacidiphilus contained various secondary metabolite (SM) producing biosynthetic gene clusters (BGCs), some of them exclusively identified in Streptacidiphilus only. Several of these clusters may potentially code for SMs that may have a broad range of bioactivities, such as antibacterial, antifungal, antimalarial and antitumor activities. The biodegradation capabilities of Streptacidiphilus were also explored by investigating the hydrolytic enzymes for complex carbohydrates. Although all genomes were enriched with carbohydrate-active enzymes (CAZymes), their numbers in the genomes of some strains such as Streptacidiphilus carbonis NBRC 100919T were higher as compared to well-known carbohydrate degrading organisms. These distinctive features of each Streptacidiphilus species make them interesting candidates for future studies with respect to their potential for SM production and enzymatic activities.}, } @article {pmid33022031, year = {2020}, author = {Chambers, J and Sparks, N and Sydney, N and Livingstone, PG and Cookson, AR and Whitworth, DE}, title = {Comparative Genomics and Pan-Genomics of the Myxococcaceae, including a Description of Five Novel Species: Myxococcus eversor sp. nov., Myxococcus llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogochensis sp. nov., Myxococcus vastator sp. nov., Pyxidicoccus caerfyrddinensis sp. nov., and Pyxidicoccus trucidator sp. nov.}, journal = {Genome biology and evolution}, volume = {12}, number = {12}, pages = {2289-2302}, doi = {10.1093/gbe/evaa212}, pmid = {33022031}, issn = {1759-6653}, abstract = {Members of the predatory Myxococcales (myxobacteria) possess large genomes, undergo multicellular development, and produce diverse secondary metabolites, which are being actively prospected for novel drug discovery. To direct such efforts, it is important to understand the relationships between myxobacterial ecology, evolution, taxonomy, and genomic variation. This study investigated the genomes and pan-genomes of organisms within the Myxococcaceae, including the genera Myxococcus and Corallococcus, the most abundant myxobacteria isolated from soils. Previously, ten species of Corallococcus were known, whereas six species of Myxococcus phylogenetically surrounded a third genus (Pyxidicoccus) composed of a single species. Here, we describe draft genome sequences of five novel species within the Myxococcaceae (Myxococcus eversor, Myxococcus llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogochensis, Myxococcus vastator, Pyxidicoccus caerfyrddinensis, and Pyxidicoccus trucidator) and for the Pyxidicoccus type species strain, Pyxidicoccus fallax DSM 14698T. Genomic and physiological comparisons demonstrated clear differences between the five novel species and every other Myxococcus or Pyxidicoccus spp. type strain. Subsequent analyses of type strain genomes showed that both the Corallococcus pan-genome and the combined Myxococcus and Pyxidicoccus (Myxococcus/Pyxidicoccus) pan-genome are large and open, but with clear differences. Genomes of Corallococcus spp. are generally smaller than those of Myxococcus/Pyxidicoccus spp. but have core genomes three times larger. Myxococcus/Pyxidicoccus spp. genomes are more variable in size, with larger and more unique sets of accessory genes than those of Corallococcus species. In both genera, biosynthetic gene clusters are relatively enriched in the shell pan-genomes, implying they grant a greater evolutionary benefit than other shell genes, presumably by conferring selective advantages during predation.}, } @article {pmid33016627, year = {2020}, author = {Jensen, SE and Charles, JR and Muleta, K and Bradbury, PJ and Casstevens, T and Deshpande, SP and Gore, MA and Gupta, R and Ilut, DC and Johnson, L and Lozano, R and Miller, Z and Ramu, P and Rathore, A and Romay, MC and Upadhyaya, HD and Varshney, RK and Morris, GP and Pressoir, G and Buckler, ES and Ramstein, GP}, title = {A sorghum practical haplotype graph facilitates genome-wide imputation and cost-effective genomic prediction.}, journal = {The plant genome}, volume = {13}, number = {1}, pages = {e20009}, doi = {10.1002/tpg2.20009}, pmid = {33016627}, issn = {1940-3372}, support = {/GATES/Bill & Melinda Gates Foundation/United States ; }, mesh = {Cost-Benefit Analysis ; Genome ; Genomics ; Haplotypes ; *Sorghum/genetics ; }, abstract = {Successful management and utilization of increasingly large genomic datasets is essential for breeding programs to accelerate cultivar development. To help with this, we developed a Sorghum bicolor Practical Haplotype Graph (PHG) pangenome database that stores haplotypes and variant information. We developed two PHGs in sorghum that were used to identify genome-wide variants for 24 founders of the Chibas sorghum breeding program from 0.01x sequence coverage. The PHG called single nucleotide polymorphisms (SNPs) with 5.9% error at 0.01x coverage-only 3% higher than PHG error when calling SNPs from 8x coverage sequence. Additionally, 207 progenies from the Chibas genomic selection (GS) training population were sequenced and processed through the PHG. Missing genotypes were imputed from PHG parental haplotypes and used for genomic prediction. Mean prediction accuracies with PHG SNP calls range from .57-.73 and are similar to prediction accuracies obtained with genotyping-by-sequencing or targeted amplicon sequencing (rhAmpSeq) markers. This study demonstrates the use of a sorghum PHG to impute SNPs from low-coverage sequence data and shows that the PHG can unify genotype calls across multiple sequencing platforms. By reducing input sequence requirements, the PHG can decrease the cost of genotyping, make GS more feasible, and facilitate larger breeding populations. Our results demonstrate that the PHG is a useful research and breeding tool that maintains variant information from a diverse group of taxa, stores sequence data in a condensed but readily accessible format, unifies genotypes across genotyping platforms, and provides a cost-effective option for genomic selection.}, } @article {pmid33014966, year = {2020}, author = {Roe, C and Williamson, CHD and Vazquez, AJ and Kyger, K and Valentine, M and Bowers, JR and Phillips, PD and Harrison, V and Driebe, E and Engelthaler, DM and Sahl, JW}, title = {Bacterial Genome Wide Association Studies (bGWAS) and Transcriptomics Identifies Cryptic Antimicrobial Resistance Mechanisms in Acinetobacter baumannii.}, journal = {Frontiers in public health}, volume = {8}, number = {}, pages = {451}, pmid = {33014966}, issn = {2296-2565}, abstract = {Antimicrobial resistance (AMR) in the nosocomial pathogen, Acinetobacter baumannii, is becoming a serious public health threat. While some mechanisms of AMR have been reported, understanding novel mechanisms of resistance is critical for identifying emerging resistance. One of the first steps in identifying novel AMR mechanisms is performing genotype/phenotype association studies; however, performing these studies is complicated by the plastic nature of the A. baumannii pan-genome. In this study, we compared the antibiograms of 12 antimicrobials associated with multiple drug families for 84 A. baumannii isolates, many isolated in Arizona, USA. in silico screening of these genomes for known AMR mechanisms failed to identify clear correlations for most drugs. We then performed a bacterial genome wide association study (bGWAS) looking for associations between all possible 21-mers; this approach generally failed to identify mechanisms that explained the resistance phenotype. In order to decrease the genomic noise associated with population stratification, we compared four phylogenetically-related pairs of isolates with differing susceptibility profiles. RNA-Sequencing (RNA-Seq) was performed on paired isolates and differentially-expressed genes were identified. In these isolate pairs, five different potential mechanisms were identified, highlighting the difficulty of broad AMR surveillance in this species. To verify and validate differential expression, amplicon sequencing was performed. These results suggest that a diagnostic platform based on gene expression rather than genomics alone may be beneficial in certain surveillance efforts. The implementation of such advanced diagnostics coupled with increased AMR surveillance will potentially improve A. baumannii infection treatment and patient outcomes.}, } @article {pmid33013780, year = {2020}, author = {Yang, Y and Zhang, Y and Cápiro, NL and Yan, J}, title = {Genomic Characteristics Distinguish Geographically Distributed Dehalococcoidia.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {546063}, pmid = {33013780}, issn = {1664-302X}, abstract = {Dehalococcoidia (Dia) class microorganisms are frequently found in various pristine and contaminated environments. Metagenome-assembled genomes (MAGs) and single-cell amplified genomes (SAGs) studies have substantially improved the understanding of Dia microbial ecology and evolution; however, an updated thorough investigation on the genomic and evolutionary characteristics of Dia microorganisms distributed in geographically distinct environments has not been implemented. In this study, we analyzed available genomic data to unravel Dia evolutionary and metabolic traits. Based on the phylogeny of 16S rRNA genes retrieved from sixty-seven genomes, Dia microorganisms can be categorized into three groups, the terrestrial cluster that contains all Dehalococcoides and Dehalogenimonas strains, the marine cluster I, and the marine cluster II. These results reveal that a higher ratio of horizontally transferred genetic materials was found in the Dia marine clusters compared to that of the Dia terrestrial cluster. Pangenome analysis further suggests that Dia microorganisms have evolved cluster-specific enzymes (e.g., dehalogenase in terrestrial Dia, sulfite reductase in marine Dia) and biosynthesis capabilities (e.g., siroheme biosynthesis in marine Dia). Marine Dia microorganisms are likely adapted to versatile metabolisms for energy conservation besides organohalide respiration. The genomic differences between marine and terrestrial Dia may suggest distinct functions and roles in element cycling (e.g., carbon, sulfur, chlorine), which require interdisciplinary approaches to unravel the physiology and evolution of Dia in various environments.}, } @article {pmid33013760, year = {2020}, author = {Kim, HB and Kim, E and Yang, SM and Lee, S and Kim, MJ and Kim, HY}, title = {Development of Real-Time PCR Assay to Specifically Detect 22 Bifidobacterium Species and Subspecies Using Comparative Genomics.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {2087}, pmid = {33013760}, issn = {1664-302X}, abstract = {Bifidobacterium species are used as probiotics to provide beneficial effects to humans. These effects are specific to some species or subspecies of Bifidobacterium. However, some Bifidobacterium species or subspecies are not distinguished because similarity of 16S rRNA and housekeeping gene sequences within Bifidobacterium species is very high. In this study, we developed a real-time polymerase chain reaction (PCR) assay to rapidly and accurately detect 22 Bifidobacterium species by selecting genetic markers using comparative genomic analysis. A total of 210 Bifidobacterium genome sequences were compared to select species- or subspecies-specific genetic markers. A phylogenetic tree based on pan-genomes generated clusters according to Bifidobacterium species or subspecies except that two strains were not grouped with their subspecies. Based on pan-genomes constructed, species- or subspecies-specific genetic markers were selected. The specificity of these markers was confirmed by aligning these genes against 210 genome sequences. Real-time PCR could detect 22 Bifidobacterium specifically. We constructed the criterion for quantification by standard curves. To further test the developed assay for commercial food products, we monitored 26 probiotic products and 7 dairy products. Real-time PCR results and labeling data were then compared. Most of these products (21/33, 63.6%) were consistent with their label claims. Some products labeled at species level only can be detected up to subspecies level through our developed assay.}, } @article {pmid33008825, year = {2020}, author = {Akob, DM and Hallenbeck, M and Beulig, F and Fabisch, M and Küsel, K and Keffer, JL and Woyke, T and Shapiro, N and Lapidus, A and Klenk, HP and Chan, CS}, title = {Mixotrophic Iron-Oxidizing Thiomonas Isolates from an Acid Mine Drainage-Affected Creek.}, journal = {Applied and environmental microbiology}, volume = {86}, number = {24}, pages = {}, pmid = {33008825}, issn = {1098-5336}, mesh = {Burkholderiales/*metabolism ; Germany ; Iron/*metabolism ; Mining ; Oxidation-Reduction ; Rivers/*microbiology ; Waste Water/*microbiology ; }, abstract = {Natural attenuation of heavy metals occurs via coupled microbial iron cycling and metal precipitation in creeks impacted by acid mine drainage (AMD). Here, we describe the isolation, characterization, and genomic sequencing of two iron-oxidizing bacteria (FeOB) species: Thiomonas ferrovorans FB-6 and Thiomonas metallidurans FB-Cd, isolated from slightly acidic (pH 6.3), Fe-rich, AMD-impacted creek sediments. These strains precipitated amorphous iron oxides, lepidocrocite, goethite, and magnetite or maghemite and grew at a pH optimum of 5.5. While Thiomonas spp. are known as mixotrophic sulfur oxidizers and As oxidizers, the FB strains oxidized Fe, which suggests they can efficiently remove Fe and other metals via coprecipitation. Previous evidence for Thiomonas sp. Fe oxidation is largely ambiguous, possibly because of difficulty demonstrating Fe oxidation in heterotrophic/mixotrophic organisms. Therefore, we also conducted a genomic analysis to identify genetic mechanisms of Fe oxidation, other metal transformations, and additional adaptations, comparing the two FB strain genomes with 12 other Thiomonas genomes. The FB strains fall within a relatively novel group of Thiomonas strains that includes another strain (b6) with solid evidence of Fe oxidation. Most Thiomonas isolates, including the FB strains, have the putative iron oxidation gene cyc2, but only the two FB strains possess the putative Fe oxidase genes mtoAB The two FB strain genomes contain the highest numbers of strain-specific gene clusters, greatly increasing the known Thiomonas genetic potential. Our results revealed that the FB strains are two distinct novel species of Thiomonas with the genetic potential for bioremediation of AMD via iron oxidation.IMPORTANCE As AMD moves through the environment, it impacts aquatic ecosystems, but at the same time, these ecosystems can naturally attenuate contaminated waters via acid neutralization and catalyzing metal precipitation. This is the case in the former Ronneburg uranium-mining district, where AMD impacts creek sediments. We isolated and characterized two iron-oxidizing Thiomonas species that are mildly acidophilic to neutrophilic and that have two genetic pathways for iron oxidation. These Thiomonas species are well positioned to naturally attenuate AMD as it discharges across the landscape.}, } @article {pmid33007861, year = {2020}, author = {Harris, LG and Bodger, O and Post, V and Mack, D and Morgenstern, M and Rohde, H and Moriarty, TF and Wilkinson, TS}, title = {Temporal Changes in Patient-Matched Staphylococcus epidermidis Isolates from Infections: towards Defining a 'True' Persistent Infection.}, journal = {Microorganisms}, volume = {8}, number = {10}, pages = {}, pmid = {33007861}, issn = {2076-2607}, abstract = {Staphylococcus epidermidis is found naturally on the skin but is a common cause of persistent orthopaedic device-related infections (ODRIs). This study used a pan-genome and gene-by-gene approach to analyse the clonality of whole genome sequences (WGS) of 115 S. epidermidis isolates from 55 patients with persistent ODRIs. Analysis of the 522 gene core genome revealed that the isolates clustered into three clades, and MLST analysis showed that 83% of the isolates belonged to clonal complex 2 (CC2). Analysis also found 13 isolate pairs had different MLST types and less than 70% similarity within the genes; hence, these were defined as re-infection by a different S. epidermidis strain. Comparison of allelic diversity in the remaining 102 isolates (49 patients) revealed that 6 patients had microevolved infections (>7 allele differences), and only 37 patients (77 isolates) had a 'true' persistent infection. Analysis of the core genomes of isolate pairs from 37 patients found 110/841 genes had variations; mainly in metabolism associated genes. The accessory genome consisted of 2936 genes; with an average size of 1515 genes. To conclude, this study demonstrates the advantage of using WGS for identifying the accuracy of a persistent infection diagnosis. Hence, persistent infections can be defined as 'true' persistent infections if the core genome of paired isolates has ≤7 allele differences; microevolved persistent infection if the paired isolates have >7 allele differences but same MLST type; and polyclonal if they are the same species but a different MLST type.}, } @article {pmid32996243, year = {2020}, author = {Srivastava, AK and Srivastava, R and Sharma, A and Bharati, AP and Tiwari, PK and Singh, AK and Srivastava, AK and Chakdar, H and Kashyap, PL and Saxena, AK}, title = {Pan-genome analysis of Exiguobacterium reveals species delineation and genomic similarity with Exiguobacterium profundum PHM 11.}, journal = {Environmental microbiology reports}, volume = {12}, number = {6}, pages = {639-650}, doi = {10.1111/1758-2229.12890}, pmid = {32996243}, issn = {1758-2229}, support = {//ICAR-CRP Genomics/ ; //ICAR-AMAAS/ ; }, abstract = {The stint of the bacterial species is convoluting, but the new algorithms to calculate genome-to-genome distance (GGD) and DNA-DNA hybridization (DDH) for comparative genome analysis have rejuvenated the exploration of species and sub-species characterization. The present study reports the first whole genome sequence of Exiguobacterium profundum PHM11. PHM11 genome consist of ~ 2.92 Mb comprising 48 contigs, 47.93% G + C content. Functional annotations revealed a total of 3033 protein coding genes and 33 non-protein coding genes. Out of these, only 2316 could be characterized and others reported as hypothetical proteins. The comparative analysis of predicted proteome of PHM11 with five other Exiguobacterium sp. identified 3806 clusters, out of which the PHM11 shared a total of 2723 clusters having 1664 common clusters, 131 singletons and 928 distributed between five species. The pan-genome analysis of 70 different genomic sequences of Exigubacterium strains devoid of a species taxon was done on the basis of GGD and the DDH which identified eight genomes analogous to the PHM11 at species level and may be characterized as E. profundum. The ANI value and phylogenetic tree analysis also support the same. The results regarding pan-genome analysis provide a convincing insight for delineation of these eight strains to species.}, } @article {pmid32983925, year = {2020}, author = {Patel, M and Patel, HM and Vohra, N and Dave, S}, title = {Complete genome sequencing and comparative genome characterization of the lignocellulosic biomass degrading bacterium Pseudomonas stutzeri MP4687 from cattle rumen.}, journal = {Biotechnology reports (Amsterdam, Netherlands)}, volume = {28}, number = {}, pages = {e00530}, pmid = {32983925}, issn = {2215-017X}, abstract = {We report the complete genome sequencing of novel Pseudomonas stutzeri strain MP4687 isolated from cattle rumen. Various strains of P. stutzeri have been reported from different environmental samples including oil-contaminated sites, crop roots, air, and human clinical samples, but not from rumen samples, which is being reported here for the first time. The genome of P. stutzeri MP4687 has a single replicon, 4.75 Mb chromosome and a G + C content of 63.45%. The genome encodes for 4,790 protein coding genes including 164 CAZymes and 345 carbohydrate processing genes. The isolate MP4687 harbors LCB hydrolyzing potential through endoglucanase (4.5 U/mL), xylanase (3.1 U/mL), β-glucosidase (3.3 U/mL) and β-xylosidase (1.9 U/mL) activities. The pangenome analysis further revealed that MP4687 has a very high number of unique genes (>2100) compared to other P. stutzeri genomes, which might have an important role in rumen functioning.}, } @article {pmid32983058, year = {2020}, author = {Verma, DK and Vasudeva, G and Sidhu, C and Pinnaka, AK and Prasad, SE and Thakur, KG}, title = {Biochemical and Taxonomic Characterization of Novel Haloarchaeal Strains and Purification of the Recombinant Halotolerant α-Amylase Discovered in the Isolate.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {2082}, pmid = {32983058}, issn = {1664-302X}, abstract = {Haloarchaea are salt-loving archaea and potential source of industrially relevant halotolerant enzymes. In the present study, three reddish-pink, extremely halophilic archaeal strains, namely wsp1 (wsp-water sample Pondicherry), wsp3, and wsp4, were isolated from the Indian Solar saltern. The phylogenetic analysis based on 16S rRNA gene sequences suggests that both wsp3 and wsp4 strains belong to Halogeometricum borinquense while wsp1 is closely related to Haloferax volcanii species. The comparative genomics revealed an open pangenome for both genera investigated here. Whole-genome sequence analysis revealed that these isolates have multiple copies of industrially/biotechnologically important unique genes and enzymes. Among these unique enzymes, for recombinant expression and purification, we selected four putative α-amylases identified in these three isolates. We successfully purified functional halotolerant recombinant Amy2, from wsp1 using pelB signal sequence-based secretion strategy using Escherichia coli as an expression host. This method may prove useful to produce functional haloarchaeal secretory recombinant proteins suitable for commercial or research applications. Biochemical analysis of Amy2 suggests the halotolerant nature of the enzyme having maximum enzymatic activity observed at 1 M NaCl. We also report the isolation and characterization of carotenoids purified from these isolates. This study highlights the presence of several industrially important enzymes in the haloarchaeal strains which may potentially have improved features like stability and salt tolerance suitable for industrial applications.}, } @article {pmid32979565, year = {2020}, author = {Chen, Y and Song, W and Xie, X and Wang, Z and Guan, P and Peng, H and Jiao, Y and Ni, Z and Sun, Q and Guo, W}, title = {A Collinearity-Incorporating Homology Inference Strategy for Connecting Emerging Assemblies in the Triticeae Tribe as a Pilot Practice in the Plant Pangenomic Era.}, journal = {Molecular plant}, volume = {13}, number = {12}, pages = {1694-1708}, doi = {10.1016/j.molp.2020.09.019}, pmid = {32979565}, issn = {1752-9867}, abstract = {Plant genome sequencing has dramatically increased, and some species even have multiple high-quality reference versions. Demands for clade-specific homology inference and analysis have increased in the pangenomic era. Here we present a novel method, GeneTribe (https://chenym1.github.io/genetribe/), for homology inference among genetically similar genomes that incorporates gene collinearity and shows better performance than traditional sequence-similarity-based methods in terms of accuracy and scalability. The Triticeae tribe is a typical allopolyploid-rich clade with complex species relationships that includes many important crops, such as wheat, barley, and rye. We built Triticeae-GeneTribe (http://wheat.cau.edu.cn/TGT/), a homology database, by integrating 12 Triticeae genomes and 3 outgroup model genomes and implemented versatile analysis and visualization functions. With macrocollinearity analysis, we were able to construct a refined model illustrating the structural rearrangements of the 4A-5A-7B chromosomes in wheat as two major translocation events. With collinearity analysis at both the macro- and microscale, we illustrated the complex evolutionary history of homologs of the wheat vernalization gene Vrn2, which evolved as a combined result of genome translocation, duplication, and polyploidization and gene loss events. Our work provides a useful practice for connecting emerging genome assemblies, with awareness of the extensive polyploidy in plants, and will help researchers efficiently exploit genome sequence resources.}, } @article {pmid32977700, year = {2020}, author = {McCubbin, T and Gonzalez-Garcia, RA and Palfreyman, RW and Stowers, C and Nielsen, LK and Marcellin, E}, title = {A Pan-Genome Guided Metabolic Network Reconstruction of Five Propionibacterium Species Reveals Extensive Metabolic Diversity.}, journal = {Genes}, volume = {11}, number = {10}, pages = {}, pmid = {32977700}, issn = {2073-4425}, abstract = {Propionibacteria have been studied extensively since the early 1930s due to their relevance to industry and importance as human pathogens. Still, their unique metabolism is far from fully understood. This is partly due to their signature high GC content, which has previously hampered the acquisition of quality sequence data, the accurate annotation of the available genomes, and the functional characterization of genes. The recent completion of the genome sequences for several species has led researchers to reassess the taxonomical classification of the genus Propionibacterium, which has been divided into several new genres. Such data also enable a comparative genomic approach to annotation and provide a new opportunity to revisit our understanding of their metabolism. Using pan-genome analysis combined with the reconstruction of the first high-quality Propionibacterium genome-scale metabolic model and a pan-metabolic model of current and former members of the genus Propionibacterium, we demonstrate that despite sharing unique metabolic traits, these organisms have an unexpected diversity in central carbon metabolism and a hidden layer of metabolic complexity. This combined approach gave us new insights into the evolution of Propionibacterium metabolism and led us to propose a novel, putative ferredoxin-linked energy conservation strategy. The pan-genomic approach highlighted key differences in Propionibacterium metabolism that reflect adaptation to their environment. Results were mathematically captured in genome-scale metabolic reconstructions that can be used to further explore metabolism using metabolic modeling techniques. Overall, the data provide a platform to explore Propionibacterium metabolism and a tool for the rational design of strains.}, } @article {pmid32975504, year = {2020}, author = {Feng, Y and Fan, X and Zhu, L and Yang, X and Liu, Y and Gao, S and Jin, X and Liu, D and Ding, J and Guo, Y and Hu, Y}, title = {Phylogenetic and genomic analysis reveals high genomic openness and genetic diversity of Clostridium perfringens.}, journal = {Microbial genomics}, volume = {6}, number = {10}, pages = {}, pmid = {32975504}, issn = {2057-5858}, abstract = {Clostridium perfringens is associated with a variety of diseases in both humans and animals. Recent advances in genomic sequencing make it timely to re-visit this important pathogen. Although the genome sequence of C. perfringens was first determined in 2002, large-scale comparative genomics with isolates of different origins is still lacking. In this study, we used whole-genome sequencing of 45 C. perfringens isolates with isolation time spanning an 80-year period and performed comparative analysis of 173 genomes from worldwide strains. We also conducted phylogenetic lineage analysis and introduced an openness index (OI) to evaluate the openness of bacterial genomes. We classified all these genomes into five lineages and hypothesized that the origin of C. perfringens dates back to ~80 000 years ago. We showed that the pangenome of the 173 C. perfringens strains contained a total of 26 954 genes, while the core genome comprised 1020 genes, accounting for about a third of the genome of each isolate. We demonstrated that C. perfringens had the highest OI compared with 51 other bacterial species. Intact prophage sequences were found in nearly 70.0 % of C. perfringens genomes, while CRISPR sequences were found only in ~40.0 %. Plasmids were prevalent in C. perfringens isolates, and half of the virulence genes and antibiotic resistance genes (ARGs) identified in all the isolates could be found in plasmids. ARG-sharing network analysis showed that C. perfringens shared its 11 ARGs with 55 different bacterial species, and a high frequency of ARG transfer may have occurred between C. perfringens and species in the genera Streptococcus and Staphylococcus. Correlation analysis showed that the ARG number in C. perfringens strains increased with time, while the virulence gene number was relative stable. Our results, taken together with previous studies, revealed the high genome openness and genetic diversity of C. perfringens and provide a comprehensive view of the phylogeny, genomic features, virulence gene and ARG profiles of worldwide strains.}, } @article {pmid32972461, year = {2020}, author = {Rautiainen, M and Marschall, T}, title = {GraphAligner: rapid and versatile sequence-to-graph alignment.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {253}, pmid = {32972461}, issn = {1474-760X}, abstract = {Genome graphs can represent genetic variation and sequence uncertainty. Aligning sequences to genome graphs is key to many applications, including error correction, genome assembly, and genotyping of variants in a pangenome graph. Yet, so far, this step is often prohibitively slow. We present GraphAligner, a tool for aligning long reads to genome graphs. Compared to the state-of-the-art tools, GraphAligner is 13x faster and uses 3x less memory. When employing GraphAligner for error correction, we find it to be more than twice as accurate and over 12x faster than extant tools.Availability: Package manager: https://anaconda.org/bioconda/graphaligner and source code: https://github.com/maickrau/GraphAligner.}, } @article {pmid32969787, year = {2020}, author = {Sánchez-Osuna, M and Cortés, P and Llagostera, M and Barbé, J and Erill, I}, title = {Exploration into the origins and mobilization of di-hydrofolate reductase genes and the emergence of clinical resistance to trimethoprim.}, journal = {Microbial genomics}, volume = {6}, number = {11}, pages = {}, pmid = {32969787}, issn = {2057-5858}, abstract = {Trimethoprim is a synthetic antibacterial agent that targets folate biosynthesis by competitively binding to the di-hydrofolate reductase enzyme (DHFR). Trimethoprim is often administered synergistically with sulfonamide, another chemotherapeutic agent targeting the di-hydropteroate synthase (DHPS) enzyme in the same pathway. Clinical resistance to both drugs is widespread and mediated by enzyme variants capable of performing their biological function without binding to these drugs. These mutant enzymes were assumed to have arisen after the discovery of these synthetic drugs, but recent work has shown that genes conferring resistance to sulfonamide were present in the bacterial pangenome millions of years ago. Here, we apply phylogenetics and comparative genomics methods to study the largest family of mobile trimethoprim-resistance genes (dfrA). We show that most of the dfrA genes identified to date map to two large clades that likely arose from independent mobilization events. In contrast to sulfonamide resistance (sul) genes, we find evidence of recurrent mobilization in dfrA genes. Phylogenetic evidence allows us to identify novel dfrA genes in the emerging pathogen Acinetobacter baumannii, and we confirm their resistance phenotype in vitro. We also identify a cluster of dfrA homologues in cryptic plasmid and phage genomes, but we show that these enzymes do not confer resistance to trimethoprim. Our methods also allow us to pinpoint the chromosomal origin of previously reported dfrA genes, and we show that many of these ancient chromosomal genes also confer resistance to trimethoprim. Our work reveals that trimethoprim resistance predated the clinical use of this chemotherapeutic agent, but that novel mutations have likely also arisen and become mobilized following its widespread use within and outside the clinic. Hence, this work confirms that resistance to novel drugs may already be present in the bacterial pangenome, and stresses the importance of rapid mobilization as a fundamental element in the emergence and global spread of resistance determinants.}, } @article {pmid32968153, year = {2020}, author = {Jin, L and Chen, Y and Yang, W and Qiao, Z and Zhang, X}, title = {Complete genome sequence of fish-pathogenic Aeromonas hydrophila HX-3 and a comparative analysis: insights into virulence factors and quorum sensing.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {15479}, pmid = {32968153}, issn = {2045-2322}, mesh = {Aeromonas hydrophila/*genetics/pathogenicity/ultrastructure ; Animals ; Chromosomes, Bacterial/genetics ; Cloning, Molecular ; Fish Diseases/microbiology ; Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Genomics ; Gram-Negative Bacterial Infections/microbiology/veterinary ; Microscopy, Electron, Scanning ; Microscopy, Electron, Transmission ; Phylogeny ; Quorum Sensing/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing/methods ; }, abstract = {The gram-negative, aerobic, rod-shaped bacterium Aeromonas hydrophila, the causative agent of motile aeromonad septicaemia, has attracted increasing attention due to its high pathogenicity. Here, we constructed the complete genome sequence of a virulent strain, A. hydrophila HX-3 isolated from Pseudosciaena crocea and performed comparative genomics to investigate its virulence factors and quorum sensing features in comparison with those of other Aeromonas isolates. HX-3 has a circular chromosome of 4,941,513 bp with a 61.0% G + C content encoding 4483 genes, including 4318 protein-coding genes, and 31 rRNA, 127 tRNA and 7 ncRNA operons. Seventy interspersed repeat and 153 tandem repeat sequences, 7 transposons, 8 clustered regularly interspaced short palindromic repeats, and 39 genomic islands were predicted in the A. hydrophila HX-3 genome. Phylogeny and pan-genome were also analyzed herein to confirm the evolutionary relationships on the basis of comparisons with other fully sequenced Aeromonas genomes. In addition, the assembled HX-3 genome was successfully annotated against the Cluster of Orthologous Groups of proteins database (76.03%), Gene Ontology database (18.13%), and Kyoto Encyclopedia of Genes and Genome pathway database (59.68%). Two-component regulatory systems in the HX-3 genome and virulence factors profiles through comparative analysis were predicted, providing insights into pathogenicity. A large number of genes related to the AHL-type 1 (ahyI, ahyR), LuxS-type 2 (luxS, pfs, metEHK, litR, luxOQU) and QseBC-type 3 (qseB, qseC) autoinducer systems were also identified. As a result of the expression of the ahyI gene in Escherichia coli BL21 (DE3), combined UPLC-MS/MS profiling led to the identification of several new N-acyl-homoserine lactone compounds synthesized by AhyI. This genomic analysis determined the comprehensive QS systems of A. hydrophila, which might provide novel information regarding the mechanisms of virulence signatures correlated with QS.}, } @article {pmid32958892, year = {2020}, author = {Fang, X and Lloyd, CJ and Palsson, BO}, title = {Reconstructing organisms in silico: genome-scale models and their emerging applications.}, journal = {Nature reviews. Microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41579-020-00440-4}, pmid = {32958892}, issn = {1740-1534}, support = {R01 GM057089/GM/NIGMS NIH HHS/United States ; }, abstract = {Escherichia coli is considered to be the best-known microorganism given the large number of published studies detailing its genes, its genome and the biochemical functions of its molecular components. This vast literature has been systematically assembled into a reconstruction of the biochemical reaction networks that underlie E. coli's functions, a process which is now being applied to an increasing number of microorganisms. Genome-scale reconstructed networks are organized and systematized knowledge bases that have multiple uses, including conversion into computational models that interpret and predict phenotypic states and the consequences of environmental and genetic perturbations. These genome-scale models (GEMs) now enable us to develop pan-genome analyses that provide mechanistic insights, detail the selection pressures on proteome allocation and address stress phenotypes. In this Review, we first discuss the overall development of GEMs and their applications. Next, we review the evolution of the most complete GEM that has been developed to date: the E. coli GEM. Finally, we explore three emerging areas in genome-scale modelling of microbial phenotypes: collections of strain-specific models, metabolic and macromolecular expression models, and simulation of stress responses.}, } @article {pmid32957508, year = {2020}, author = {Phanse, Y and Wu, CW and Venturino, AJ and Hansen, C and Nelson, K and Broderick, SR and Steinberg, H and Talaat, AM}, title = {A Protective Vaccine against Johne's Disease in Cattle.}, journal = {Microorganisms}, volume = {8}, number = {9}, pages = {}, pmid = {32957508}, issn = {2076-2607}, support = {2013-33610-21044//USDA NIFA SBIR/ ; 2013-01151//NIFA Foundational Program on Animal Health/ ; }, abstract = {Johne's disease (JD) caused by Mycobacterium avium subsp. paratuberculosis (M. paratuberculosis) is a chronic infection characterized by the development of granulomatous enteritis in wild and domesticated ruminants. It is one of the most significant livestock diseases not only in the USA but also globally, accounting for USD 200-500 million losses annually for the USA alone with potential link to cases of Crohn's disease in humans. Developing safe and protective vaccines is of a paramount importance for JD control in dairy cows. The current study evaluated the safety, immunity and protective efficacy of a novel live attenuated vaccine (LAV) candidate with and without an adjuvant in comparison to an inactivated vaccine. Results indicated that the LAV, irrespective of the adjuvant presence, induced robust T cell immune responses indicated by proinflammatory cytokine production such as IFN-γ, IFN-α, TNF-α and IL-17 as well as strong response to intradermal skin test against M. paratuberculosis antigens. Furthermore, the LAV was safe with minimal tissue pathology. Finally, calves vaccinated with adjuvanted LAV did not shed M. paratuberculosis post-challenge, a much-desired characteristic of an effective vaccine against JD. Together, this data suggests a strong potential of testing LAV in field trials to curb JD in dairy herds.}, } @article {pmid32939951, year = {2020}, author = {Zhong, C and Wang, L and Ning, K}, title = {Pan-genome study of Thermococcales reveals extensive genetic diversity and genetic evidence of thermophilic adaption.}, journal = {Environmental microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1111/1462-2920.15234}, pmid = {32939951}, issn = {1462-2920}, support = {31871334//National Natural Science Foundation of China/ ; 31671374//National Natural Science Foundation of China/ ; 2018YFC0910502//National Key Research and Development Program of China/ ; }, abstract = {Thermococcales has a strong adaptability to extreme environments, which is of profound interest in explaining how complex life forms emerge on earth. However, their gene composition, thermal stability and evolution in hyperthermal environments are still little known. Here, we characterized the pan-genome architecture of 30 Thermococcales species to gain insight into their genetic properties, evolutionary patterns and specific metabolisms adapted to niches. We revealed an open pan-genome of Thermococcales comprising 6070 gene families that tend to increase with the availability of additional genomes. The genome contents of Thermococcales were flexible, with a series of genes experienced gene duplication, progressive divergence, or gene gain and loss events exhibiting distinct functional features. These archaea had concise types of heat shock proteins, such as HSP20, HSP60 and prefoldin, which were constrained by strong purifying selection that governed their conservative evolution. Furthermore, purifying selection forced genes involved in enzyme, motility, secretion system, defence system and chaperones to differ in functional constraints and their disparity in the rate of evolution may be related to adaptation to specific niche. These results deepened our understanding of genetic diversity and adaptation patterns of Thermococcales, and provided valuable research models for studying the metabolic traits of early life forms.}, } @article {pmid32937932, year = {2020}, author = {Khan, M and Stapleton, F and Summers, S and Rice, SA and Willcox, MDP}, title = {Antibiotic Resistance Characteristics of Pseudomonas aeruginosa Isolated from Keratitis in Australia and India.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {9}, number = {9}, pages = {}, pmid = {32937932}, issn = {2079-6382}, abstract = {This study investigated genomic differences in Australian and Indian Pseudomonas aeruginosa isolates from keratitis (infection of the cornea). Overall, the Indian isolates were resistant to more antibiotics, with some of those isolates being multi-drug resistant. Acquired genes were related to resistance to fluoroquinolones, aminoglycosides, beta-lactams, macrolides, sulphonamides, and tetracycline and were more frequent in Indian (96%) than in Australian (35%) isolates (p = 0.02). Indian isolates had large numbers of gene variations (median 50,006, IQR = 26,967-50,600) compared to Australian isolates (median 26,317, IQR = 25,681-33,780). There were a larger number of mutations in the mutL and uvrD genes associated with the mismatch repair (MMR) system in Indian isolates, which may result in strains losing their efficacy for DNA repair. The number of gene variations were greater in isolates carrying MMR system genes or exoU. In the phylogenetic division, the number of core genes were similar in both groups, but Indian isolates had larger numbers of pan genes (median 6518, IQR = 6040-6935). Clones related to three different sequence types-ST308, ST316, and ST491-were found among Indian isolates. Only one clone, ST233, containing two strains was present in Australian isolates. The most striking differences between Australian and Indian isolates were carriage of exoU (that encodes a cytolytic phospholipase) in Indian isolates and exoS (that encodes for GTPase activator activity) in Australian isolates, large number of acquired resistance genes, greater changes to MMR genes, and a larger pan genome as well as increased overall genetic variation in the Indian isolates.}, } @article {pmid32934114, year = {2020}, author = {Yin, Z and Zhang, S and Wei, Y and Wang, M and Ma, S and Yang, S and Wang, J and Yuan, C and Jiang, L and Du, Y}, title = {Horizontal Gene Transfer Clarifies Taxonomic Confusion and Promotes the Genetic Diversity and Pathogenicity of Plesiomonas shigelloides.}, journal = {mSystems}, volume = {5}, number = {5}, pages = {}, pmid = {32934114}, issn = {2379-5077}, abstract = {Plesiomonas shigelloides is an emerging pathogen that has been shown to be involved in gastrointestinal diseases and extraintestinal infections in humans. However, the taxonomic position, evolutionary dynamics, and pathogenesis of P. shigelloides remain unclear. We reported the draft genome sequences of 12 P. shigelloides strains representing different serogroups. We were able to determine a clear distinction between P. shigelloides and other members of Enterobacterales via core genome phylogeny, Neighbor-Net network, and average genome identity analysis. The pan-genome analysis of P. shigelloides revealed extensive genetic diversity and presented large flexible gene repertoires, while the core genome phylogeny exhibited a low level of clonality. The discordance between the core genome phylogeny and the pan-genome phylogeny indicated that flexible accessory genomes account for an important proportion of the evolution of P. shigelloides, which was subsequently characterized by determinations of hundreds of horizontally transferred genes (horizontal genes), massive gene expansions and contractions, and diverse mobile genetic elements (MGEs). The apparently high levels of horizontal gene transfer (HGT) in P. shigelloides were conferred from bacteria with novel properties from other taxa (mainly Vibrionaceae and Aeromonadaceae), which caused the historical taxonomic confusion and shaped the virulence gene pools. Furthermore, P. shigelloides genomes contain many macromolecular secretion system genes, virulence factor genes, and resistance genes, indicating its potential to cause intestinal and invasive infections. Collectively, our work provides insights into the phylogenetic position, evolutionary dynamic, and pathogenesis of P. shigelloides at the genomic level, which could facilitate the observation and research of this important pathogen.IMPORTANCE The taxonomic position of P. shigelloides has been the subject of debate for a long time, and until now, the evolutionary dynamics and pathogenesis of P. shigelloides were unclear. In this study, pan-genome analysis indicated extensive genetic diversity and the presence of large and variable gene repertoires. Our results revealed that horizontal gene transfer was the focal driving force for the genetic diversity of the P. shigelloides pan-genome and might have contributed to the emergence of novel properties. Vibrionaceae and Aeromonadaceae were found to be the predominant donor taxa for horizontal genes, which might have caused the taxonomic confusion historically. Comparative genomic analysis revealed the potential of P. shigelloides to cause intestinal and invasive diseases. Our results could advance the understanding of the evolution and pathogenesis of P. shigelloides, particularly in elucidating the role of horizontal gene transfer and investigating virulence-related elements.}, } @article {pmid32934112, year = {2020}, author = {Ross, DE and Marshall, CW and Gulliver, D and May, HD and Norman, RS}, title = {Defining Genomic and Predicted Metabolic Features of the Acetobacterium Genus.}, journal = {mSystems}, volume = {5}, number = {5}, pages = {}, pmid = {32934112}, issn = {2379-5077}, abstract = {Acetogens are anaerobic bacteria capable of fixing CO2 or CO to produce acetyl coenzyme A (acetyl-CoA) and ultimately acetate using the Wood-Ljungdahl pathway (WLP). Acetobacterium woodii is the type strain of the Acetobacterium genus and has been critical for understanding the biochemistry and energy conservation in acetogens. Members of the Acetobacterium genus have been isolated from a variety of environments or have had genomes recovered from metagenome data, but no systematic investigation has been done on the unique and various metabolisms of the genus. To gain a better appreciation for the metabolic breadth of the genus, we sequenced the genomes of 4 isolates (A. fimetarium, A. malicum, A. paludosum, and A. tundrae) and conducted a comparative genome analysis (pan-genome) of 11 different Acetobacterium genomes. A unifying feature of the Acetobacterium genus is the carbon-fixing WLP. The methyl (cluster II) and carbonyl (cluster III) branches of the Wood-Ljungdahl pathway are highly conserved across all sequenced Acetobacterium genomes, but cluster I encoding the formate dehydrogenase is not. In contrast to A. woodii, all but four strains encode two distinct Rnf clusters, Rnf being the primary respiratory enzyme complex. Metabolism of fructose, lactate, and H2:CO2 was conserved across the genus, but metabolism of ethanol, methanol, caffeate, and 2,3-butanediol varied. Additionally, clade-specific metabolic potential was observed, such as amino acid transport and metabolism in the psychrophilic species, and biofilm formation in the A. wieringae clade, which may afford these groups an advantage in low-temperature growth or attachment to solid surfaces, respectively.IMPORTANCE Acetogens are anaerobic bacteria capable of fixing CO2 or CO to produce acetyl-CoA and ultimately acetate using the Wood-Ljungdahl pathway (WLP). This autotrophic metabolism plays a major role in the global carbon cycle and, if harnessed, can help reduce greenhouse gas emissions. Overall, the data presented here provide a framework for examining the ecology and evolution of the Acetobacterium genus and highlight the potential of these species as a source for production of fuels and chemicals from CO2 feedstocks.}, } @article {pmid32928108, year = {2020}, author = {Chen, Z and Erickson, DL and Meng, J}, title = {Benchmarking hybrid assembly approaches for genomic analyses of bacterial pathogens using Illumina and Oxford Nanopore sequencing.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {631}, pmid = {32928108}, issn = {1471-2164}, abstract = {BACKGROUND: We benchmarked the hybrid assembly approaches of MaSuRCA, SPAdes, and Unicycler for bacterial pathogens using Illumina and Oxford Nanopore sequencing by determining genome completeness and accuracy, antimicrobial resistance (AMR), virulence potential, multilocus sequence typing (MLST), phylogeny, and pan genome. Ten bacterial species (10 strains) were tested for simulated reads of both mediocre- and low-quality, whereas 11 bacterial species (12 strains) were tested for real reads.

RESULTS: Unicycler performed the best for achieving contiguous genomes, closely followed by MaSuRCA, while all SPAdes assemblies were incomplete. MaSuRCA was less tolerant of low-quality long reads than SPAdes and Unicycler. The hybrid assemblies of five antimicrobial-resistant strains with simulated reads provided consistent AMR genotypes with the reference genomes. The MaSuRCA assembly of Staphylococcus aureus with real reads contained msr(A) and tet(K), while the reference genome and SPAdes and Unicycler assemblies harbored blaZ. The AMR genotypes of the reference genomes and hybrid assemblies were consistent for the other five antimicrobial-resistant strains with real reads. The numbers of virulence genes in all hybrid assemblies were similar to those of the reference genomes, irrespective of simulated or real reads. Only one exception existed that the reference genome and hybrid assemblies of Pseudomonas aeruginosa with mediocre-quality long reads carried 241 virulence genes, whereas 184 virulence genes were identified in the hybrid assemblies of low-quality long reads. The MaSuRCA assemblies of Escherichia coli O157:H7 and Salmonella Typhimurium with mediocre-quality long reads contained 126 and 118 virulence genes, respectively, while 110 and 107 virulence genes were detected in their MaSuRCA assemblies of low-quality long reads, respectively. All approaches performed well in our MLST and phylogenetic analyses. The pan genomes of the hybrid assemblies of S. Typhimurium with mediocre-quality long reads were similar to that of the reference genome, while SPAdes and Unicycler were more tolerant of low-quality long reads than MaSuRCA for the pan-genome analysis. All approaches functioned well in the pan-genome analysis of Campylobacter jejuni with real reads.

CONCLUSIONS: Our research demonstrates the hybrid assembly pipeline of Unicycler as a superior approach for genomic analyses of bacterial pathogens using Illumina and Oxford Nanopore sequencing.}, } @article {pmid32924924, year = {2020}, author = {Psomopoulos, FE and van Helden, J and Médigue, C and Chasapi, A and Ouzounis, CA}, title = {Ancestral state reconstruction of metabolic pathways across pangenome ensembles.}, journal = {Microbial genomics}, volume = {6}, number = {11}, pages = {}, pmid = {32924924}, issn = {2057-5858}, abstract = {As genome sequencing efforts are unveiling the genetic diversity of the biosphere with an unprecedented speed, there is a need to accurately describe the structural and functional properties of groups of extant species whose genomes have been sequenced, as well as their inferred ancestors, at any given taxonomic level of their phylogeny. Elaborate approaches for the reconstruction of ancestral states at the sequence level have been developed, subsequently augmented by methods based on gene content. While these approaches of sequence or gene-content reconstruction have been successfully deployed, there has been less progress on the explicit inference of functional properties of ancestral genomes, in terms of metabolic pathways and other cellular processes. Herein, we describe PathTrace, an efficient algorithm for parsimony-based reconstructions of the evolutionary history of individual metabolic pathways, pivotal representations of key functional modules of cellular function. The algorithm is implemented as a five-step process through which pathways are represented as fuzzy vectors, where each enzyme is associated with a taxonomic conservation value derived from the phylogenetic profile of its protein sequence. The method is evaluated with a selected benchmark set of pathways against collections of genome sequences from key data resources. By deploying a pangenome-driven approach for pathway sets, we demonstrate that the inferred patterns are largely insensitive to noise, as opposed to gene-content reconstruction methods. In addition, the resulting reconstructions are closely correlated with the evolutionary distance of the taxa under study, suggesting that a diligent selection of target pangenomes is essential for maintaining cohesiveness of the method and consistency of the inference, serving as an internal control for an arbitrary selection of queries. The PathTrace method is a first step towards the large-scale analysis of metabolic pathway evolution and our deeper understanding of functional relationships reflected in emerging pangenome collections.}, } @article {pmid32920913, year = {2020}, author = {Gardon, H and Biderre-Petit, C and Jouan-Dufournel, I and Bronner, G}, title = {A drift-barrier model drives the genomic landscape of a structured bacterial population.}, journal = {Molecular ecology}, volume = {29}, number = {21}, pages = {4143-4156}, doi = {10.1111/mec.15628}, pmid = {32920913}, issn = {1365-294X}, abstract = {Bacterial populations differentiate over time and space to form distinct genetic units. The mechanisms governing this diversification are presumed to result from the ecological context of living units to adapt to specific niches. Recently, a model assuming the acquisition of advantageous genes among populations rather than whole genome sweeps has emerged to explain population differentiation. However, the characteristics of these exchanged, or flexible, genes and whether their evolution is driven by adaptive or neutral processes remain controversial. By analysing the flexible genome of single-amplified genomes of co-occurring populations of the marine Prochlorococcus HLII ecotype, we highlight that genomic compartments - rather than population units - are characterized by different evolutionary trajectories. The dynamics of gene fluxes vary across genomic compartments and therefore the effectiveness of selection depends on the fluctuation of the effective population size along the genome. Taken together, these results support the drift-barrier model of bacterial evolution.}, } @article {pmid32913678, year = {2020}, author = {Christian, RW and Hewitt, SL and Nelson, G and Roalson, EH and Dhingra, A}, title = {Plastid transit peptides-where do they come from and where do they all belong? Multi-genome and pan-genomic assessment of chloroplast transit peptide evolution.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9772}, pmid = {32913678}, issn = {2167-8359}, abstract = {Subcellular relocalization of proteins determines an organism's metabolic repertoire and thereby its survival in unique evolutionary niches. In plants, the plastid and its various morphotypes import a large and varied number of nuclear-encoded proteins to orchestrate vital biochemical reactions in a spatiotemporal context. Recent comparative genomics analysis and high-throughput shotgun proteomics data indicate that there are a large number of plastid-targeted proteins that are either semi-conserved or non-conserved across different lineages. This implies that homologs are differentially targeted across different species, which is feasible only if proteins have gained or lost plastid targeting peptides during evolution. In this study, a broad, multi-genome analysis of 15 phylogenetically diverse genera and in-depth analyses of pangenomes from Arabidopsis and Brachypodium were performed to address the question of how proteins acquire or lose plastid targeting peptides. The analysis revealed that random insertions or deletions were the dominant mechanism by which novel transit peptides are gained by proteins. While gene duplication was not a strict requirement for the acquisition of novel subcellular targeting, 40% of novel plastid-targeted genes were found to be most closely related to a sequence within the same genome, and of these, 30.5% resulted from alternative transcription or translation initiation sites. Interestingly, analysis of the distribution of amino acids in the transit peptides of known and predicted chloroplast-targeted proteins revealed monocot and eudicot-specific preferences in residue distribution.}, } @article {pmid32903853, year = {2020}, author = {Zhang, X and Li, F and Cui, S and Mao, L and Li, X and Awan, F and Lv, W and Zeng, Z}, title = {Prevalence and Distribution Characteristics of blaKPC-2 and blaNDM-1 Genes in Klebsiella pneumoniae.}, journal = {Infection and drug resistance}, volume = {13}, number = {}, pages = {2901-2910}, pmid = {32903853}, issn = {1178-6973}, abstract = {Background: Carbapenem-resistant Klebsiella pneumoniae infections have caused major concern and posed a global threat to public health. As blaKPC-2 and blaNDM-1 genes are the most widely reported carbapenem resistant genes in K. pneumonia, it is crucial to study the prevalence and geographical distribution of these two genes for further understanding of their transmission mode and mechanism.

Purpose: Here, we investigated the prevalence and distribution of blaKPC-2 and blaNDM-1 genes in carbapenem-resistant K. pneumoniae strains from a tertiary hospital and from 1579 genomes available in the NCBI database, and further analyzed the possible core structure of blaKPC-2 or blaNDM-1 genes among global genome data.

Materials and Methods: K. pneumoniae strains from a tertiary hospital in China during 2013-2018 were collected and their antimicrobial susceptibility testing for 28 antibiotics was determined. Whole-genome sequencing of carbapenem-resistant K. pneumoniae strains was used to investigate the genetic characterization. The phylogenetic relationships of these strains were investigated through pan-genome analysis. The epidemiology and distribution of blaKPC-2 and blaNDM-1 genes in K. pneumoniae based on 1579 global genomes and carbapenem-resistant K. pneumoniae strains from hospital were analyzed using bioinformatics. The possible core structure carrying blaKPC-2 or blaNDM-1 genes was investigated among global data.

Results: A total of 19 carbapenem-resistant K. pneumoniae were isolated in a tertiary hospital. All isolates had a multi-resistant pattern and eight kinds of resistance genes. The phylogenetic analysis showed all isolates in the hospital were dominated by two lineages composed of ST11 and ST25, respectively. ST11 and ST25 were the major ST type carrying blaKPC-2 and blaNDM-1 genes, respectively. Among 1579 global genomes data, 147 known ST types (1195 genomes) have been identified, while ST258 (23.6%) and ST11 (22.1%) were the globally prevalent clones among the known ST types. Genetic environment analysis showed that the ISKpn7-dnaA/ISKpn27 -blaKPC-2-ISkpn6 and blaNDM-1-ble-trpf-nagA may be the core structure in the horizontal transfer of blaKPC-2 and blaNDM-1 , respectively. In addition, DNA transferase (hin) may be involved in the horizontal transfer or the expression of blaNDM-1 .

Conclusion: There was clonal transmission of carbapenem-resistant K. pneumoniae in the tertiary hospital in China. The prevalence and distribution of blaKPC-2 and blaNDM-1 varied by countries and were driven by different transposons carrying the core structure. This study shed light on the genetic environment of blaKPC-2 and blaNDM-1 and offered basic information about the mechanism of carbapenem-resistant K. pneumoniae dissemination.}, } @article {pmid32902773, year = {2020}, author = {Liu, Y and Tian, Z}, title = {From one linear genome to a graph-based pan-genome: a new era for genomics.}, journal = {Science China. Life sciences}, volume = {}, number = {}, pages = {}, doi = {10.1007/s11427-020-1808-0}, pmid = {32902773}, issn = {1869-1889}, } @article {pmid32901388, year = {2020}, author = {González-Dominici, LI and Saati-Santamaría, Z and García-Fraile, P}, title = {Genome Analysis and Genomic Comparison of the Novel Species Arthrobacter ipsi Reveal Its Potential Protective Role in Its Bark Beetle Host.}, journal = {Microbial ecology}, volume = {}, number = {}, pages = {}, doi = {10.1007/s00248-020-01593-8}, pmid = {32901388}, issn = {1432-184X}, support = {19-09072S//Grantová Agentura České Republiky/ ; CLU-2018-04//Junta de Castilla y León (ES)/ ; }, abstract = {The pine engraver beetle, Ips acuminatus Gyll, is a bark beetle that causes important damages in Scots pine (Pinus sylvestris) forests and plantations. As almost all higher organisms, Ips acuminatus harbours a microbiome, although the role of most members of its microbiome is not well understood. As part of a work in which we analysed the bacterial diversity associated to Ips acuminatus, we isolated the strain Arthrobacter sp. IA7. In order to study its potential role within the bark beetle holobiont, we sequenced and explored its genome and performed a pan-genome analysis of the genus Arthrobacter, showing specific genes of strain IA7 that might be related with its particular role in its niche. Based on these investigations, we suggest several potential roles of the bacterium within the beetle. Analysis of genes related to secondary metabolism indicated potential antifungal capability, confirmed by the inhibition of several entomopathogenic fungal strains (Metarhizium anisopliae CCF0966, Lecanicillium muscarium CCF6041, L. muscarium CCF3297, Isaria fumosorosea CCF4401, I. farinosa CCF4808, Beauveria bassiana CCF4422 and B. brongniartii CCF1547). Phylogenetic analyses of the 16S rRNA gene, six concatenated housekeeping genes (tuf-secY-rpoB-recA-fusA-atpD) and genome sequences indicated that strain IA7 is closely related to A. globiformis NBRC 12137T but forms a new species within the genus Arthrobacter; this was confirmed by digital DNA-DNA hybridization (37.10%) and average nucleotide identity (ANIb) (88.9%). Based on phenotypic and genotypic features, we propose strain IA7T as the novel species Arthrobacter ipsi sp. nov. (type strain IA7T = CECT 30100T = LMG 31782T) and suggest its protective role for its host.}, } @article {pmid32898134, year = {2020}, author = {Boisen, N and Østerlund, MT and Joensen, KG and Santiago, AE and Mandomando, I and Cravioto, A and Chattaway, MA and Gonyar, LA and Overballe-Petersen, S and Stine, OC and Rasko, DA and Scheutz, F and Nataro, JP}, title = {Redefining enteroaggregative Escherichia coli (EAEC): Genomic characterization of epidemiological EAEC strains.}, journal = {PLoS neglected tropical diseases}, volume = {14}, number = {9}, pages = {e0008613}, pmid = {32898134}, issn = {1935-2735}, mesh = {Adhesins, Bacterial/genetics ; Bacterial Adhesion/*genetics/physiology ; Case-Control Studies ; Cell Line ; Child, Preschool ; Diarrhea/microbiology ; Escherichia coli/classification/*genetics/isolation & purification/*pathogenicity ; Escherichia coli Infections/*epidemiology ; Escherichia coli Proteins/*genetics ; Fimbriae Proteins/*genetics ; Fimbriae, Bacterial/*genetics ; Genome, Bacterial/genetics ; Genomics ; Humans ; Infant ; Infant, Newborn ; Trans-Activators/genetics ; Virulence/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Although enteroaggregative E. coli (EAEC) has been implicated as a common cause of diarrhea in multiple settings, neither its essential genomic nature nor its role as an enteric pathogen are fully understood. The current definition of this pathotype requires demonstration of cellular adherence; a working molecular definition encompasses E. coli which do not harbor the heat-stable or heat-labile toxins of enterotoxigenic E. coli (ETEC) and harbor the genes aaiC, aggR, and/or aatA. In an effort to improve the definition of this pathotype, we report the most definitive characterization of the pan-genome of EAEC to date, applying comparative genomics and functional characterization on a collection of 97 EAEC strains isolated in the course of a multicenter case-control diarrhea study (Global Enteric Multi-Center Study, GEMS). Genomic analysis revealed that the EAEC strains mapped to all phylogenomic groups of E. coli. Circa 70% of strains harbored one of the five described AAF variants; there were no additional AAF variants identified, and strains that lacked an identifiable AAF generally did not have an otherwise complete AggR regulon. An exception was strains that harbored an ETEC colonization factor (CF) CS22, like AAF a member of the chaperone-usher family of adhesins, but not phylogenetically related to the AAF family. Of all genes scored, sepA yielded the strongest association with diarrhea (P = 0.002) followed by the increased serum survival gene, iss (p = 0.026), and the outer membrane protease gene ompT (p = 0.046). Notably, the EAEC genomes harbored several genes characteristically associated with other E. coli pathotypes. Our data suggest that a molecular definition of EAEC could comprise E. coli strains harboring AggR and a complete AAF(I-V) or CS22 gene cluster. Further, it is possible that strains meeting this definition could be both enteric bacteria and urinary/systemic pathogens.}, } @article {pmid32893299, year = {2020}, author = {Bonnici, V and Maresi, E and Giugno, R}, title = {Challenges in gene-oriented approaches for pangenome content discovery.}, journal = {Briefings in bioinformatics}, volume = {}, number = {}, pages = {}, doi = {10.1093/bib/bbaa198}, pmid = {32893299}, issn = {1477-4054}, abstract = {Given a group of genomes, represented as the sets of genes that belong to them, the discovery of the pangenomic content is based on the search of genetic homology among the genes for clustering them into families. Thus, pangenomic analyses investigate the membership of the families to the given genomes. This approach is referred to as the gene-oriented approach in contrast to other definitions of the problem that takes into account different genomic features. In the past years, several tools have been developed to discover and analyse pangenomic contents. Because of the hardness of the problem, each tool applies a different strategy for discovering the pangenomic content. This results in a differentiation of the performance of each tool that depends on the composition of the input genomes. This review reports the main analysis instruments provided by the current state of the art tools for the discovery of pangenomic contents. Moreover, unlike previous works, the presented study compares pangenomic tools from a methodological perspective, analysing the causes that lead a given methodology to outperform other tools. The analysis is performed by taking into account different bacterial populations, which are synthetically generated by changing evolutionary parameters. The benchmarks used to compare the pangenomic tools, in addition to the computational pipeline developed for this purpose, are available at https://github.com/InfOmics/pangenes-review. Contact: V. Bonnici, R. Giugno Supplementary information: Supplementary data are available at Briefings in Bioinformatics online.}, } @article {pmid32880768, year = {2020}, author = {Zhu, Z and Wang, L and Qian, H and Gu, F and Li, Y and Zhang, H and Chen, Y and Shi, J and Ma, P and Bao, C and Gu, B}, title = {Comparative genome analysis of 12 Shigella sonnei strains: virulence, resistance, and their interactions.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1007/s10123-020-00145-x}, pmid = {32880768}, issn = {1618-1905}, support = {81902040//National Natural Science Foundation of China/ ; 81871734//National Natural Science Foundation of China/ ; 81701390//National Natural Science Foundation of China/ ; BK20170250//Natural Science Foundation of Jiangsu Province/ ; BK20180997//Natural Science Foundation of Jiangsu Province/ ; }, abstract = {Shigellosis is a highly infectious disease that is mainly transmitted via fecal-oral contact of the bacteria Shigella. Four species have been identified in Shigella genus, among which Shigella flexneri is used to be the most prevalent species globally and commonly isolated from developing countries. However, it is being replaced by Shigella sonnei that is currently the main causative agent for dysentery pandemic in many emerging industrialized countries such as Asia and the Middle East. For a better understanding of S. sonnei virulence and antibiotic resistance, we sequenced 12 clinical S. sonnei strains with varied antibiotic-resistance profiles collected from four cities in Jiangsu Province, China. Phylogenomic analysis clustered antibiotic-sensitive and resistant S. sonnei into two distinct groups while pan-genome analysis reveals the presence and absence of unique genes in each group. Screening of 31 classes of virulence factors found out that type 2 secretion system is doubled in resistant strains. Further principle component analysis based on the interactions between virulence and resistance indicated that abundant virulence factors are associated with higher levels of antibiotic resistance. The result present here is based on statistical analysis of a small sample size and serves basically as a guidance for further experimental and theoretical studies.}, } @article {pmid32879462, year = {2020}, author = {Muñoz-Ramirez, ZY and Pascoe, B and Mendez-Tenorio, A and Mourkas, E and Sandoval-Motta, S and Perez-Perez, G and Morgan, DR and Dominguez, RL and Ortiz-Princz, D and Cavazza, ME and Rocha, G and Queiroz, DMM and Catalano, M and Palma, GZ and Goldman, CG and Venegas, A and Alarcon, T and Oleastro, M and Vale, FF and Goodman, KJ and Torres, RC and Berthenet, E and Hitchings, MD and Blaser, MJ and Sheppard, SK and Thorell, K and Torres, J}, title = {A 500-year tale of co-evolution, adaptation, and virulence: Helicobacter pylori in the Americas.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41396-020-00758-0}, pmid = {32879462}, issn = {1751-7370}, abstract = {Helicobacter pylori is a common component of the human stomach microbiota, possibly dating back to the speciation of Homo sapiens. A history of pathogen evolution in allopatry has led to the development of genetically distinct H. pylori subpopulations, associated with different human populations, and more recent admixture among H. pylori subpopulations can provide information about human migrations. However, little is known about the degree to which some H. pylori genes are conserved in the face of admixture, potentially indicating host adaptation, or how virulence genes spread among different populations. We analyzed H. pylori genomes from 14 countries in the Americas, strains from the Iberian Peninsula, and public genomes from Europe, Africa, and Asia, to investigate how admixture varies across different regions and gene families. Whole-genome analyses of 723 H. pylori strains from around the world showed evidence of frequent admixture in the American strains with a complex mosaic of contributions from H. pylori populations originating in the Americas as well as other continents. Despite the complex admixture, distinctive genomic fingerprints were identified for each region, revealing novel American H. pylori subpopulations. A pan-genome Fst analysis showed that variation in virulence genes had the strongest fixation in America, compared with non-American populations, and that much of the variation constituted non-synonymous substitutions in functional domains. Network analyses suggest that these virulence genes have followed unique evolutionary paths in the American populations, spreading into different genetic backgrounds, potentially contributing to the high risk of gastric cancer in the region.}, } @article {pmid32879348, year = {2020}, author = {Carroll, LM and Huisman, JS and Wiedmann, M}, title = {Twentieth-century emergence of antimicrobial resistant human- and bovine-associated Salmonella enterica serotype Typhimurium lineages in New York State.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {14428}, pmid = {32879348}, issn = {2045-2322}, abstract = {Salmonella enterica serotype Typhimurium (S. Typhimurium) boasts a broad host range and can be transmitted between livestock and humans. While members of this serotype can acquire resistance to antimicrobials, the temporal dynamics of this acquisition is not well understood. Using New York State (NYS) and its dairy cattle farms as a model system, 87 S. Typhimurium strains isolated from 1999 to 2016 from either human clinical or bovine-associated sources in NYS were characterized using whole-genome sequencing. More than 91% of isolates were classified into one of four major lineages, two of which were largely susceptible to antimicrobials but showed sporadic antimicrobial resistance (AMR) gene acquisition, and two that were largely multidrug-resistant (MDR). All four lineages clustered by presence and absence of elements in the pan-genome. The two MDR lineages, one of which resembled S. Typhimurium DT104, were predicted to have emerged circa 1960 and 1972. The two largely susceptible lineages emerged earlier, but showcased sporadic AMR determinant acquisition largely after 1960, including acquisition of cephalosporin resistance-conferring genes after 1985. These results confine the majority of AMR acquisition events in NYS S. Typhimurium to the twentieth century, largely within the era of antibiotic usage.}, } @article {pmid32879312, year = {2020}, author = {Bellas, CM and Schroeder, DC and Edwards, A and Barker, G and Anesio, AM}, title = {Flexible genes establish widespread bacteriophage pan-genomes in cryoconite hole ecosystems.}, journal = {Nature communications}, volume = {11}, number = {1}, pages = {4403}, pmid = {32879312}, issn = {2041-1723}, support = {M 2299/FWF_/Austrian Science Fund FWF/Austria ; }, mesh = {Bacteriophages/*genetics ; Cyanobacteria/virology ; Ecosystem ; Gene Transfer, Horizontal ; Genes, Viral ; Genome, Viral ; Host Microbial Interactions/genetics ; Ice Cover/microbiology/*virology ; *Metagenome ; Metagenomics ; Phylogeny ; }, abstract = {Bacteriophage genomes rapidly evolve via mutation and horizontal gene transfer to counter evolving bacterial host defenses; such arms race dynamics should lead to divergence between phages from similar, geographically isolated ecosystems. However, near-identical phage genomes can reoccur over large geographical distances and several years apart, conversely suggesting many are stably maintained. Here, we show that phages with near-identical core genomes in distant, discrete aquatic ecosystems maintain diversity by possession of numerous flexible gene modules, where homologous genes present in the pan-genome interchange to create new phage variants. By repeatedly reconstructing the core and flexible regions of phage genomes from different metagenomes, we show a pool of homologous gene variants co-exist for each module in each location, however, the dominant variant shuffles independently in each module. These results suggest that in a natural community, recombination is the largest contributor to phage diversity, allowing a variety of host recognition receptors and genes to counter bacterial defenses to co-exist for each phage.}, } @article {pmid32850499, year = {2020}, author = {Alam, I and Kamau, AA and Kulmanov, M and Jaremko, Ł and Arold, ST and Pain, A and Gojobori, T and Duarte, CM}, title = {Functional Pangenome Analysis Shows Key Features of E Protein Are Preserved in SARS and SARS-CoV-2.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {405}, pmid = {32850499}, issn = {2235-2988}, mesh = {Betacoronavirus/*chemistry ; COVID-19 ; Coronavirus Envelope Proteins ; Coronavirus Infections/virology ; Genes, Essential ; Genes, Viral ; Genome, Viral ; Humans ; Middle East Respiratory Syndrome Coronavirus/chemistry/genetics ; Mutation ; Open Reading Frames ; PDZ Domains ; Pandemics ; Pneumonia, Viral/virology ; Protein Domains ; SARS Virus/chemistry ; SARS-CoV-2 ; Viral Envelope Proteins/*chemistry/*genetics ; Viroporin Proteins ; }, abstract = {The spread of the novel coronavirus (SARS-CoV-2) has triggered a global emergency, that demands urgent solutions for detection and therapy to prevent escalating health, social, and economic impacts. The spike protein (S) of this virus enables binding to the human receptor ACE2, and hence presents a prime target for vaccines preventing viral entry into host cells. The S proteins from SARS and SARS-CoV-2 are similar, but structural differences in the receptor binding domain (RBD) preclude the use of SARS-specific neutralizing antibodies to inhibit SARS-CoV-2. Here we used comparative pangenomic analysis of all sequenced reference Betacoronaviruses, complemented with functional and structural analyses. This analysis reveals that, among all core gene clusters present in these viruses, the envelope protein E shows a variant cluster shared by SARS and SARS-CoV-2 with two completely-conserved key functional features, namely an ion-channel, and a PDZ-binding motif (PBM). These features play a key role in the activation of the inflammasome causing the acute respiratory distress syndrome, the leading cause of death in SARS and SARS-CoV-2 infections. Together with functional pangenomic analysis, mutation tracking, and previous evidence, on E protein as a determinant of pathogenicity in SARS, we suggest E protein as an alternative therapeutic target to be considered for further studies to reduce complications of SARS-CoV-2 infections in COVID-19.}, } @article {pmid32849479, year = {2020}, author = {Kumar, R and Bröms, JE and Sjöstedt, A}, title = {Exploring the Diversity Within the Genus Francisella - An Integrated Pan-Genome and Genome-Mining Approach.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1928}, pmid = {32849479}, issn = {1664-302X}, abstract = {Pan-genome analysis is a powerful method to explore genomic heterogeneity and diversity of bacterial species. Here we present a pan-genome analysis of the genus Francisella, comprising a dataset of 63 genomes and encompassing clinical as well as environmental isolates from distinct geographic locations. To determine the evolutionary relationship within the genus, we performed phylogenetic whole-genome studies utilizing the average nucleotide identity, average amino acid identity, core genes and non-recombinant loci markers. Based on the analyses, the phylogenetic trees obtained identified two distinct clades, A and B and a diverse cluster designated C. The sizes of the pan-, core-, cloud-, and shell-genomes of Francisella were estimated and compared to those of two other facultative intracellular pathogens, Legionella and Piscirickettsia. Francisella had the smallest core-genome, 692 genes, compared to 886 and 1,732 genes for Legionella and Piscirickettsia respectively, while the pan-genome of Legionella was more than twice the size of that of the other two genera. Also, the composition of the Francisella Type VI secretion system (T6SS) was analyzed. Distinct differences in the gene content of the T6SS were identified. In silico approaches performed to identify putative substrates of these systems revealed potential effectors targeting the cell wall, inner membrane, cellular nucleic acids as well as proteins, thus constituting attractive targets for site-directed mutagenesis. The comparative analysis performed here provides a comprehensive basis for the assessment of the phylogenomic relationship of members of the genus Francisella and for the identification of putative T6SS virulence traits.}, } @article {pmid32849358, year = {2020}, author = {Bannantine, JP and Conde, C and Bayles, DO and Branger, M and Biet, F}, title = {Genetic Diversity Among Mycobacterium avium Subspecies Revealed by Analysis of Complete Genome Sequences.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1701}, pmid = {32849358}, issn = {1664-302X}, abstract = {Mycobacterium avium comprises four subspecies that contain both human and veterinary pathogens. At the inception of this study, twenty-eight M. avium genomes had been annotated as RefSeq genomes, facilitating direct comparisons. These genomes represent strains from around the world and provided a unique opportunity to examine genome dynamics in this species. Each genome was confirmed to be classified correctly based on SNP genotyping, nucleotide identity and presence/absence of repetitive elements or other typing methods. The Mycobacterium avium subspecies paratuberculosis (Map) genome size and organization was remarkably consistent, averaging 4.8 Mb with a variance of only 29.6 kb among the 13 strains. Comparing recombination events along with the larger genome size and variance observed among Mycobacterium avium subspecies avium (Maa) and Mycobacterium avium subspecies hominissuis (Mah) strains (collectively termed non-Map) suggests horizontal gene transfer occurs in non-Map, but not in Map strains. Overall, M. avium subspecies could be divided into two major sub-divisions, with the Map type II (bovine strains) clustering tightly on one end of a phylogenetic spectrum and Mah strains clustering more loosely together on the other end. The most evolutionarily distinct Map strain was an ovine strain, designated Telford, which had >1,000 SNPs and showed large rearrangements compared to the bovine type II strains. The Telford strain clustered with Maa strains as an intermediate between Map type II and Mah. SNP analysis and genome organization analyses repeatedly demonstrated the conserved nature of Map versus the mosaic nature of non-Map M. avium strains. Finally, core and pangenomes were developed for Map and non-Map strains. A total of 80% Map genes belonged to the Map core genome, while only 40% of non-Map genes belonged to the non-Map core genome. These genomes provide a more complete and detailed comparison of these subspecies strains as well as a blueprint for how genetic diversity originated.}, } @article {pmid32843837, year = {2020}, author = {Costa, SS and Guimarães, LC and Silva, A and Soares, SC and Baraúna, RA}, title = {First Steps in the Analysis of Prokaryotic Pan-Genomes.}, journal = {Bioinformatics and biology insights}, volume = {14}, number = {}, pages = {1177932220938064}, pmid = {32843837}, issn = {1177-9322}, abstract = {Pan-genome is defined as the set of orthologous and unique genes of a specific group of organisms. The pan-genome is composed by the core genome, accessory genome, and species- or strain-specific genes. The pan-genome is considered open or closed based on the alpha value of the Heap law. In an open pan-genome, the number of gene families will continuously increase with the addition of new genomes to the analysis, while in a closed pan-genome, the number of gene families will not increase considerably. The first step of a pan-genome analysis is the homogenization of genome annotation. The same software should be used to annotate genomes, such as GeneMark or RAST. Subsequently, several software are used to calculate the pan-genome such as BPGA, GET_HOMOLOGUES, PGAP, among others. This review presents all these initial steps for those who want to perform a pan-genome analysis, explaining key concepts of the area. Furthermore, we present the pan-genomic analysis of 9 bacterial species. These are the species with the highest number of genomes deposited in GenBank. We also show the influence of the identity and coverage parameters on the prediction of orthologous and paralogous genes. Finally, we cite the perspectives of several research areas where pan-genome analysis can be used to answer important issues.}, } @article {pmid32816227, year = {2020}, author = {Zhou, L and Zhang, T and Tang, S and Fu, X and Yu, S}, title = {Pan-genome analysis of Paenibacillus polymyxa strains reveals the mechanism of plant growth promotion and biocontrol.}, journal = {Antonie van Leeuwenhoek}, volume = {113}, number = {11}, pages = {1539-1558}, doi = {10.1007/s10482-020-01461-y}, pmid = {32816227}, issn = {1572-9699}, support = {No. 20181BBF6003//Jiangxi Provincial Department of Science and Technology (CN)/ ; No. 31760547//National Natural Science Foundation of China/ ; }, abstract = {Rapid development of gene sequencing technologies has led to an exponential increase in microbial sequencing data. Genome research of a single organism does not capture the changes in the characteristics of genetic information within a species. Pan-genome analysis gives us a broader perspective to study the complete genetic information of a species. Paenibacillus polymyxa is a Gram-positive bacterium and an important plant growth-promoting rhizobacterium with the ability to produce multiple antibiotics, such as fusaricidin, lantibiotic, paenilan, and polymyxin. Our study explores the pan-genome of 14 representative P. polymyxa strains isolated from around the world. Heap's law model and curve fitting confirmed an open pan-genome of P. polymyxa. The phylogenetic and collinearity analyses reflected that the evolutionary classification of P. polymyxa strains are not associated with geographical area and ecological niches. Few genes related to phytohormone synthesis and phosphate solubilization were conserved; however, the nif cluster gene associated with nitrogen fixation exists only in some strains. This finding is indicative of nitrogen fixing ability is not stable in P. polymyxa. Analysis of antibiotic gene clusters in P. polymyxa revealed the presence of these genes in both core and accessory genomes. This observation indicates that the difference in living environment led to loss of ability to synthesize antibiotics in some strains. The current pan-genomic analysis of P. polymyxa will help us understand the mechanisms of biological control and plant growth promotion. It will also promote the use of P. polymyxa in agriculture.}, } @article {pmid32804605, year = {2020}, author = {Ouyabe, M and Tanaka, N and Shiwa, Y and Fujita, N and Kikuno, H and Babil, P and Shiwachi, H}, title = {Rhizobium dioscoreae sp. nov., a plant growth-promoting bacterium isolated from yam (Dioscorea species).}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {9}, pages = {5054-5062}, pmid = {32804605}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Dioscorea/*microbiology ; Endophytes ; Fatty Acids/chemistry ; Japan ; Nitrogen Fixation ; Nucleic Acid Hybridization ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Rhizobium/*classification/isolation & purification ; Sequence Analysis, DNA ; }, abstract = {This study investigated endophytic nitrogen-fixing bacteria isolated from two species of yam (water yam, Dioscorea alata L.; lesser yam, Dioscorea esculenta L.) grown in nutrient-poor alkaline soil conditions on Miyako Island, Okinawa, Japan. Two bacterial strains of the genus Rhizobium, S-93T and S-62, were isolated. The phylogenetic tree, based on the almost-complete 16S rRNA gene sequences (1476 bp for each strain), placed them in a distinct clade, with Rhizobium miluonense CCBAU 41251T, Rhizobium hainanense I66T, Rhizobium multihospitium HAMBI 2975T, Rhizobium freirei PRF 81T and Rhizobium tropici CIAT 899T being their closest species. Their bacterial fatty acid profile, with major components of C19 : 0 cyclo ω8c and summed feature 8, as well as other phenotypic characteristics and DNA G+C content (59.65 mol%) indicated that the novel strains belong to the genus Rhizobium. Pairwise average nucleotide identity analyses separated the novel strains from their most closely related species with similarity values of 90.5, 88.9, 88.5, 84.5 and 84.4 % for R. multihospitium HAMBI 2975T, R. tropici CIAT 899T, R. hainanense CCBAU 57015T, R. miluonense HAMBI 2971T and R. freirei PRF 81T, respectively; digital DNA-DNA hybridization values were in the range of 26-42 %. Considering the phenotypic characteristics as well as the genomic data, it is suggested that strains S-93T and S-62 represent a new species, for which the name Rhizobium dioscoreae is proposed. The type strain is S-93T (=NRIC 0988T=NBRC 114257T=DSM 110498T).}, } @article {pmid32787780, year = {2020}, author = {Clawson, ML and Schuller, G and Dickey, AM and Bono, JL and Murray, RW and Sweeney, MT and Apley, MD and DeDonder, KD and Capik, SF and Larson, RL and Lubbers, BV and White, BJ and Blom, J and Chitko-McKown, CG and Brichta-Harhay, DM and Smith, TPL}, title = {Differences between predicted outer membrane proteins of genotype 1 and 2 Mannheimia haemolytica.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {250}, pmid = {32787780}, issn = {1471-2180}, support = {CRIS# 3040-32000-034-00D//Agricultural Research Service/International ; }, abstract = {BACKGROUND: Mannheimia haemolytica strains isolated from North American cattle have been classified into two genotypes (1 and 2). Although members of both genotypes have been isolated from the upper and lower respiratory tracts of cattle with or without bovine respiratory disease (BRD), genotype 2 strains are much more frequently isolated from diseased lungs than genotype 1 strains. The mechanisms behind the increased association of genotype 2 M. haemolytica with BRD are not fully understood. To address that, and to search for interventions against genotype 2 M. haemolytica, complete, closed chromosome assemblies for 35 genotype 1 and 34 genotype 2 strains were generated and compared. Searches were conducted for the pan genome, core genes shared between the genotypes, and for genes specific to either genotype. Additionally, genes encoding outer membrane proteins (OMPs) specific to genotype 2 M. haemolytica were identified, and the diversity of their protein isoforms was characterized with predominantly unassembled, short-read genomic sequences for up to 1075 additional strains.

RESULTS: The pan genome of the 69 sequenced M. haemolytica strains consisted of 3111 genes, of which 1880 comprised a shared core between the genotypes. A core of 112 and 179 genes or gene variants were specific to genotype 1 and 2, respectively. Seven genes encoding predicted OMPs; a peptidase S6, a ligand-gated channel, an autotransporter outer membrane beta-barrel domain-containing protein (AOMB-BD-CP), a porin, and three different trimeric autotransporter adhesins were specific to genotype 2 as their genotype 1 homologs were either pseudogenes, or not detected. The AOMB-BD-CP gene, however, appeared to be truncated across all examined genotype 2 strains and to likely encode dysfunctional protein. Homologous gene sequences from additional M. haemolytica strains confirmed the specificity of the remaining six genotype 2 OMP genes and revealed they encoded low isoform diversity at the population level.

CONCLUSION: Genotype 2 M. haemolytica possess genes encoding conserved OMPs not found intact in more commensally prone genotype 1 strains. Some of the genotype 2 specific genes identified in this study are likely to have important biological roles in the pathogenicity of genotype 2 M. haemolytica, which is the primary bacterial cause of BRD.}, } @article {pmid32782425, year = {2020}, author = {Xu, S and Cheng, J and Meng, X and Xu, Y and Mu, Y}, title = {Complete Genome and Comparative Genome Analysis of Lactobacillus reuteri YSJL-12, a Potential Probiotics Strain Isolated From Healthy Sow Fresh Feces.}, journal = {Evolutionary bioinformatics online}, volume = {16}, number = {}, pages = {1176934320942192}, pmid = {32782425}, issn = {1176-9343}, abstract = {Lactobacillus reuteri YSJL-12 was isolated from healthy sow fresh feces and used as probiotics additives previously. To investigate the genetic basis on probiotic potential and identify the genes in the strain, the complete genome of YSJL-12 was sequenced. Then comparative genome analysis on 9 strains of Lactobacillus reuteri was performed. The genome of YSJL-12 consisted of a circular 2,084,748 bp chromosome and 2 circular plasmids (51,906 and 15,134 bp). From among the 2065 protein-coding sequences (CDSs), the genes resistant to the environmental stress were identified. The function of COG (Clusters of Orthologous Group) protein genes was predicted, and the KEGG (Kyoto Encyclopedia of Genes and Genomes) pathways were analyzed. The comparative genome analysis indicated that the pan-genome contained a core genome of 1257 orthologous gene clusters, an accessory genome of 1064 orthologous gene clusters, and 1148 strain-specific genes, and the antibacterial mechanism among Lactobacillus reuteri strains might be different. The phylogenetic analysis and genomic collinearity revealed that the phylogenetic relationship among 9 strains of Lactobacillus reuteri was connected with host species and showed host specificity. The research could help us to better predict genes function and understand genetic basis on adapting to host gut in Lactobacillus reuteri YSJL-12.}, } @article {pmid32779519, year = {2020}, author = {Bernardes, JS and Eberle, RJ and Vieira, FRJ and Coronado, MA}, title = {A comparative pan-genomic analysis of 53 C. pseudotuberculosis strains based on functional domains.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-13}, doi = {10.1080/07391102.2020.1805017}, pmid = {32779519}, issn = {1538-0254}, abstract = {Corynebacterium pseudotuberculosis is a pathogenic bacterium with great veterinary and economic importance. It is classified into two biovars: ovis, nitrate-negative, that causes lymphadenitis in small ruminants and equi, nitrate-positive, causing ulcerative lymphangitis in equines. With the explosive growth of available genomes of several strains, pan-genome analysis has opened new opportunities for understanding the dynamics and evolution of C. pseudotuberculosis. However, few pan-genomic studies have compared biovars equi and ovis. Such studies have considered a reduced number of strains and compared entire genomes. Here we conducted an original pan-genome analysis based on protein sequences and their functional domains. We considered 53 C. pseudotuberculosis strains from both biovars isolated from different hosts and countries. We have analysed conserved domains, common domains more frequently found in each biovar and biovar-specific (unique) domains. Our results demonstrated that biovar equi is more variable; there is a significant difference in the number of proteins per strains, probably indicating the occurrence of more gene loss/gain events. Moreover, strains of biovar equi presented a higher number of biovar-specific domains, 77 against only eight in biovar ovis, most of them are associated with virulence mechanisms. With this domain analysis, we have identified functional differences among strains of biovars ovis and equi that could be related to niche-adaptation and probably help to better understanding mechanisms of virulence and pathogenesis. The distribution patterns of functional domains identified in this work might have impacts on bacterial physiology and lifestyle, encouraging the development of new diagnoses, vaccines, and treatments for C. pseudotuberculosis diseases. Communicated by Ramaswamy H. Sarma.}, } @article {pmid32766786, year = {2020}, author = {Pan, Y and Awan, F and Zhenbao, M and Zhang, X and Zeng, J and Zeng, Z and Xiong, W}, title = {Preliminary view of the global distribution and spread of the tet(X) family of tigecycline resistance genes.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {75}, number = {10}, pages = {2797-2803}, doi = {10.1093/jac/dkaa284}, pmid = {32766786}, issn = {1460-2091}, abstract = {BACKGROUND: The emergence of plasmid-mediated tet(X3)/tet(X4) genes is threatening the role of tigecycline as a last-resort antibiotic to treat clinical infections caused by XDR bacteria. Considering the possible public health threat posed by tet(X) and its variants [which we collectively call 'tet(X) genes' in this study], global monitoring and surveillance are urgently required.

OBJECTIVES: Here we conducted a worldwide survey of the global distribution and spread of tet(X) genes.

METHODS: We analysed a comprehensive dataset of bacterial genomes in conjunction with surveillance data from our laboratory and the NCBI database, as well as sufficient metadata to characterize the results.

RESULTS: The global distribution features of tet(X) genes were revealed. We clustered three types of genetic backbones of tet(X) genes embedded or transferred in bacterial genomes. Our pan-genome analyses revealed a large genetic pool composed of tet(X)-carrying sequences. Moreover, phylogenetic trees of tet(X) genes and tet(X)-like proteins were built.

CONCLUSIONS: To the best of our knowledge, our results provide the first view of the global distribution of tet(X) genes, demonstrate the features of tet(X)-carrying fragments and highlight the possible evolution of tigecycline-inactivation enzymes in diverse bacterial species and habitats.}, } @article {pmid32762606, year = {2020}, author = {Santos, DDS and Calaça, PRA and Porto, ALF and de Souza, PRE and de Freitas, NSA and Cavalcanti Vieira Soares, MT}, title = {What Differentiates Probiotic from Pathogenic Bacteria? The Genetic Mobility of Enterococcus faecium Offers New Molecular Insights.}, journal = {Omics : a journal of integrative biology}, volume = {24}, number = {12}, pages = {706-713}, doi = {10.1089/omi.2020.0078}, pmid = {32762606}, issn = {1557-8100}, abstract = {Enterococcus faecium is a lactic acid bacterium with applications in food engineering and nutrigenomics, including as starter cultures in fermented foods. To differentiate the E. faecium probiotic from pathogenic bacteria, physiological analyses are often used but they do not guarantee that a bacterial strain is not pathogenic. We report here new findings and an approach based on comparison of the genetic mobility of (1) probiotic, (2) pathogenic, and (3) nonpathogenic and non-probiotic strains, so as to differentiate probiotics, and inform their safe use. The region of the 16S ribosomal DNA (rDNA) genes of different E. faecium strains native to Pernambuco-Brazil was used with the GenBank query sequence. Complete genomes were selected and divided into three groups as noted above to identify the mobile genetic elements (MGEs) (transposase, integrase, conjugative transposon protein and phage) and antibiotic resistance genes (ARGs), and to undertake pan-genome analysis and multiple genome alignment. Differences in the number of MGEs were found in ARGs, in the presence and absence of the genes that differentiate E. faecium probiotics and pathogenic bacteria genetically. Our data suggest that genetic mobility appears to be informative in differentiating between probiotic and pathogenic strains. While the present findings are not necessarily applicable to all probiotics, they offer novel molecular insights to guide future research in nutrigenomics, clinical medicine, and food engineering on new ways to differentiate pathogenic from probiotic bacteria.}, } @article {pmid32761525, year = {2020}, author = {Son, S and Oh, JD and Lee, SH and Shin, D and Kim, Y}, title = {Comparative genomics of canine Lactobacillus reuteri reveals adaptation to a shared environment with humans.}, journal = {Genes & genomics}, volume = {42}, number = {9}, pages = {1107-1116}, doi = {10.1007/s13258-020-00978-w}, pmid = {32761525}, issn = {2092-9293}, abstract = {BACKGROUND: Lactobacillus reuteri is a gram-positive, non-motile bacterial species that has been used as a representative microorganism model to describe the ecology and evolution of vertebrate gut symbionts.

OBJECTIVE: Because the genetic features and evolutionary strategies of L. reuteri from the gastrointestinal tract of canines remain unknown, we tried to construct draft genome canine L. reuteri and investigate modified, acquired, or lost genetic features that have facilitated the evolution and adaptation of strains to specific environmental niches by this study.

METHODS: To examine canine L. reuteri, we sequenced an L. reuteri strain isolated from a dog in Korea. A comparative genomic approach was used to assess genetic diversity and gain insight into the distinguishing features related to different hosts based on 27 published genomic sequences.

RESULTS: The pan-genome of 28 L. reuteri strains contained 7,369 gene families, and the core genome contained 1070 gene families. The ANI tree based on the core genes in the canine L. reuteri strain (C1) was very close to those for three strains (IRT, DSM20016, JCM1112) from humans. Evolutionarily, these four strains formed one clade, which we regarded as C1-clade in this study. We could investigate a total of 32,050 amino acid substitutions among the 28 L. reuteri strain genomes. In this comparison, 283 amino acid substitutions were specific to strain C1 and four strains in C1-clade shared most of these 283 C1-strain specific amino acid substitutions, suggesting strongly similar selective pressure. In accessory genes, we could identify 127 C1-clade host-specific genes and found that several genes were closely related to replication, recombination, and repair.

CONCLUSION: This study provides new insights into the adaptation of L. reuteri to the canine intestinal habitat, and suggests that the genome of L. reuteri from canines is closely associated with their living and shared environment with humans.}, } @article {pmid32759827, year = {2020}, author = {Botelho, J and Grosso, F and Peixe, L}, title = {ICEs Are the Main Reservoirs of the Ciprofloxacin-Modifying crpP Gene in Pseudomonas aeruginosa.}, journal = {Genes}, volume = {11}, number = {8}, pages = {}, pmid = {32759827}, issn = {2073-4425}, support = {UIDB/04378/2020//Fundação para a Ciência e a Tecnologia/ ; }, abstract = {The ciprofloxacin-modifying crpP gene was recently identified in a plasmid isolated from a Pseudomonas aeruginosa clinical isolate. Homologues of this gene were also identified in Escherichia coli, Klebsiella pneumoniae and Acinetobacter baumannii. We set out to explore the mobile elements involved in the acquisition and spread of this gene in publicly available and complete genomes of Pseudomonas spp. All Pseudomonas complete genomes were downloaded from NCBI's Refseq library and were inspected for the presence of the crpP gene. The mobile elements carrying this gene were further characterized. The crpP gene was identified only in P. aeruginosa, in more than half of the complete chromosomes (61.9%, n = 133/215) belonging to 52 sequence types, of which the high-risk clone ST111 was the most frequent. We identified 136 crpP-harboring integrative and conjugative elements (ICEs), with 93.4% belonging to the mating-pair formation G (MPFG) family. The ICEs were integrated at the end of a tRNALys gene and were all flanked by highly conserved 45-bp direct repeats. The crpP-carrying ICEs contain 26 core genes (2.2% of all 1193 genes found in all the ICEs together), which are present in 99% or more of the crpP-harboring ICEs. The most frequently encoded traits on these ICEs include replication, transcription, intracellular trafficking and cell motility. Our work suggests that ICEs are the main vectors promoting the dissemination of the ciprofloxacin-modifying crpP gene in P. aeruginosa.}, } @article {pmid32753501, year = {2020}, author = {Petit, RA and Read, TD}, title = {Bactopia: a Flexible Pipeline for Complete Analysis of Bacterial Genomes.}, journal = {mSystems}, volume = {5}, number = {4}, pages = {}, pmid = {32753501}, issn = {2379-5077}, support = {U54 CK000485/CK/NCEZID CDC HHS/United States ; U54CK000485/ACL/ACL HHS/United States ; }, abstract = {Sequencing of bacterial genomes using Illumina technology has become such a standard procedure that often data are generated faster than can be conveniently analyzed. We created a new series of pipelines called Bactopia, built using Nextflow workflow software, to provide efficient comparative genomic analyses for bacterial species or genera. Bactopia consists of a data set setup step (Bactopia Data Sets [BaDs]), which creates a series of customizable data sets for the species of interest, the Bactopia Analysis Pipeline (BaAP), which performs quality control, genome assembly, and several other functions based on the available data sets and outputs the processed data to a structured directory format, and a series of Bactopia Tools (BaTs) that perform specific postprocessing on some or all of the processed data. BaTs include pan-genome analysis, computing average nucleotide identity between samples, extracting and profiling the 16S genes, and taxonomic classification using highly conserved genes. It is expected that the number of BaTs will increase to fill specific applications in the future. As a demonstration, we performed an analysis of 1,664 public Lactobacillus genomes, focusing on Lactobacillus crispatus, a species that is a common part of the human vaginal microbiome. Bactopia is an open source system that can scale from projects as small as one bacterial genome to ones including thousands of genomes and that allows for great flexibility in choosing comparison data sets and options for downstream analysis. Bactopia code can be accessed at https://www.github.com/bactopia/bactopiaIMPORTANCE It is now relatively easy to obtain a high-quality draft genome sequence of a bacterium, but bioinformatic analysis requires organization and optimization of multiple open source software tools. We present Bactopia, a pipeline for bacterial genome analysis, as an option for processing bacterial genome data. Bactopia also automates downloading of data from multiple public sources and species-specific customization. Because the pipeline is written in the Nextflow language, analyses can be scaled from individual genomes on a local computer to thousands of genomes using cloud resources. As a usage example, we processed 1,664 Lactobacillus genomes from public sources and used comparative analysis workflows (Bactopia Tools) to identify and analyze members of the L. crispatus species.}, } @article {pmid32745560, year = {2020}, author = {Tao, Y and Jordan, DR and Mace, ES}, title = {A Graph-Based Pan-Genome Guides Biological Discovery.}, journal = {Molecular plant}, volume = {13}, number = {9}, pages = {1247-1249}, doi = {10.1016/j.molp.2020.07.020}, pmid = {32745560}, issn = {1752-9867}, mesh = {*Genome ; Genomics ; *Soybeans ; }, } @article {pmid32744423, year = {2020}, author = {Correia, K and Mahadevan, R}, title = {Pan-Genome-Scale Network Reconstruction: Harnessing Phylogenomics Increases the Quantity and Quality of Metabolic Models.}, journal = {Biotechnology journal}, volume = {15}, number = {10}, pages = {e1900519}, doi = {10.1002/biot.201900519}, pmid = {32744423}, issn = {1860-7314}, support = {Research Excellence//Ontario Ministry of Research, Innovation and Science/ ; //Genome Canada/ ; }, abstract = {A genome-scale network reconstruction (GENRE) is a knowledgebase for an organism and has various applications. Available genome sequences have risen in recent years, but the number of curated GENREs has not kept pace. Existing yeast GENREs contain significant commission and omission errors. Current practices limit the quantity and quality of GENREs. An open and transparent phylogenomic-driven framework is outlined to address these issues. The method is demonstrated with 33 yeasts and fungi in Dikarya. A pan-fungal metabolic network called FYRMENT (Fungal and Yeast Metabolic Network) (https://github.com/LMSE/FYRMENT) is created, and annotated with ortholog groups from AYbRAH (https://github.com/LMSE/AYbRAH). Metabolic models for lower-level taxons are compiled. The fungal pan-GENRE contains 1553 orthologs, 2759 reactions, 2251 metabolites. The GENREs have higher genomic and metabolic coverage than existing yeast and fungal GENREs created with other methods. Metabolic simulations show the maximum amino acid yields from glucose differs between yeast lineages, indicating metabolic networks have evolved. Curating genomes and reactions at higher taxonomic-levels increases the quantity and quality of GENREs than conventional approaches. This approach can scale to other branches in the tree of life.}, } @article {pmid32742815, year = {2020}, author = {Parlikar, A and Kalia, K and Sinha, S and Patnaik, S and Sharma, N and Vemuri, SG and Sharma, G}, title = {Understanding genomic diversity, pan-genome, and evolution of SARS-CoV-2.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9576}, pmid = {32742815}, issn = {2167-8359}, abstract = {Coronovirus disease 2019 (COVID-19) infection, which originated from Wuhan, China, has seized the whole world in its grasp and created a huge pandemic situation before humanity. Since December 2019, genomes of numerous isolates have been sequenced and analyzed for testing confirmation, epidemiology, and evolutionary studies. In the first half of this article, we provide a detailed review of the history and origin of COVID-19, followed by the taxonomy, nomenclature and genome organization of its causative agent Severe Acute Respiratory Syndrome-related Coronavirus-2 (SARS-CoV-2). In the latter half, we analyze subgenus Sarbecovirus (167 SARS-CoV-2, 312 SARS-CoV, and 5 Pangolin CoV) genomes to understand their diversity, origin, and evolution, along with pan-genome analysis of genus Betacoronavirus members. Whole-genome sequence-based phylogeny of subgenus Sarbecovirus genomes reasserted the fact that SARS-CoV-2 strains evolved from their common ancestors putatively residing in bat or pangolin hosts. We predicted a few country-specific patterns of relatedness and identified mutational hotspots with high, medium and low probability based on genome alignment of 167 SARS-CoV-2 strains. A total of 100-nucleotide segment-based homology studies revealed that the majority of the SARS-CoV-2 genome segments are close to Bat CoV, followed by some to Pangolin CoV, and some are unique ones. Open pan-genome of genus Betacoronavirus members indicates the diversity contributed by the novel viruses emerging in this group. Overall, the exploration of the diversity of these isolates, mutational hotspots and pan-genome will shed light on the evolution and pathogenicity of SARS-CoV-2 and help in developing putative methods of diagnosis and treatment.}, } @article {pmid32735209, year = {2020}, author = {Söderlund, R and Formenti, N and Caló, S and Chiari, M and Zoric, M and Alborali, GL and Sørensen Dalgaard, T and Wattrang, E and Eriksson, H}, title = {Comparative genome analysis of Erysipelothrix rhusiopathiae isolated from domestic pigs and wild boars suggests host adaptation and selective pressure from the use of antibiotics.}, journal = {Microbial genomics}, volume = {6}, number = {8}, pages = {}, pmid = {32735209}, issn = {2057-5858}, abstract = {The disease erysipelas caused by Erysipelothrix rhusiopathiae (ER) is a major concern in pig production. In the present study the genomes of ER from pigs (n=87), wild boars (n=71) and other sources (n=85) were compared in terms of whole-genome SNP variation, accessory genome content and the presence of genetic antibiotic resistance determinants. The aim was to investigate if genetic features among ER were associated with isolate origin in order to better estimate the risk of transmission of porcine-adapted strains from wild boars to free-range pigs and to increase our understanding of the evolution of ER. Pigs and wild boars carried isolates representing all ER clades, but clade one only occurred in healthy wild boars and healthy pigs. Several accessory genes or gene variants were found to be significantly associated with the pig and wild boar hosts, with genes predicted to encode cell wall-associated or extracellular proteins overrepresented. Gene variants associated with serovar determination and capsule production in serovars known to be pathogenic for pigs were found to be significantly associated with pigs as hosts. In total, 30 % of investigated pig isolates but only 6 % of wild boar isolates carried resistance genes, most commonly tetM (tetracycline) and lsa(E) together with lnu(B) (lincosamides, pleuromutilin and streptogramin A). The incidence of variably present genes including resistance determinants was weakly linked to phylogeny, indicating that host adaptation in ER has evolved multiple times in diverse lineages mediated by recombination and the acquisition of mobile genetic elements. The presented results support the occurrence of host-adapted ER strains, but they do not indicate frequent transmission between wild boars and domestic pigs. This article contains data hosted by Microreact.}, } @article {pmid32727443, year = {2020}, author = {Derakhshani, H and Bernier, SP and Marko, VA and Surette, MG}, title = {Completion of draft bacterial genomes by long-read sequencing of synthetic genomic pools.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {519}, pmid = {32727443}, issn = {1471-2164}, support = {OGI-146//Genome Canada/ ; }, abstract = {BACKGROUND: Illumina technology currently dominates bacterial genomics due to its high read accuracy and low sequencing cost. However, the incompleteness of draft genomes generated by Illumina reads limits their application in comprehensive genomics analyses. Alternatively, hybrid assembly using both Illumina short reads and long reads generated by single molecule sequencing technologies can enable assembly of complete bacterial genomes, yet the high per-genome cost of long-read sequencing limits the widespread use of this approach in bacterial genomics. Here we developed a protocol for hybrid assembly of complete bacterial genomes using miniaturized multiplexed Illumina sequencing and non-barcoded PacBio sequencing of a synthetic genomic pool (SGP), thus significantly decreasing the overall per-genome cost of sequencing.

RESULTS: We evaluated the performance of SGP hybrid assembly on the genomes of 20 bacterial isolates with different genome sizes, a wide range of GC contents, and varying levels of phylogenetic relatedness. By improving the contiguity of Illumina assemblies, SGP hybrid assembly generated 17 complete and 3 nearly complete bacterial genomes. Increased contiguity of SGP hybrid assemblies resulted in considerable improvement in gene prediction and annotation. In addition, SGP hybrid assembly was able to resolve repeat elements and identify intragenomic heterogeneities, e.g. different copies of 16S rRNA genes, that would otherwise go undetected by short-read-only assembly. Comprehensive comparison of SGP hybrid assemblies with those generated using multiplexed PacBio long reads (long-read-only assembly) also revealed the relative advantage of SGP hybrid assembly in terms of assembly quality. In particular, we observed that SGP hybrid assemblies were completely devoid of both small (i.e. single base substitutions) and large assembly errors. Finally, we show the ability of SGP hybrid assembly to differentiate genomes of closely related bacterial isolates, suggesting its potential application in comparative genomics and pangenome analysis.

CONCLUSION: Our results indicate the superiority of SGP hybrid assembly over both short-read and long-read assemblies with respect to completeness, contiguity, accuracy, and recovery of small replicons. By lowering the per-genome cost of sequencing, our parallel sequencing and hybrid assembly pipeline could serve as a cost effective and high throughput approach for completing high-quality bacterial genomes.}, } @article {pmid32719517, year = {2020}, author = {Haberer, G and Kamal, N and Bauer, E and Gundlach, H and Fischer, I and Seidel, MA and Spannagl, M and Marcon, C and Ruban, A and Urbany, C and Nemri, A and Hochholdinger, F and Ouzunova, M and Houben, A and Schön, CC and Mayer, KFX}, title = {European maize genomes highlight intraspecies variation in repeat and gene content.}, journal = {Nature genetics}, volume = {52}, number = {9}, pages = {950-957}, pmid = {32719517}, issn = {1546-1718}, mesh = {Breeding/methods ; Chromosome Mapping ; Genetic Variation/*genetics ; Genome, Plant/*genetics ; Genotype ; Hybrid Vigor/genetics ; Phenotype ; Zea mays/*genetics ; }, abstract = {The diversity of maize (Zea mays) is the backbone of modern heterotic patterns and hybrid breeding. Historically, US farmers exploited this variability to establish today's highly productive Corn Belt inbred lines from blends of dent and flint germplasm pools. Here, we report de novo genome sequences of four European flint lines assembled to pseudomolecules with scaffold N50 ranging from 6.1 to 10.4 Mb. Comparative analyses with two US Corn Belt lines explains the pronounced differences between both germplasms. While overall syntenic order and consolidated gene annotations reveal only moderate pangenomic differences, whole-genome alignments delineating the core and dispensable genome, and the analysis of heterochromatic knobs and orthologous long terminal repeat retrotransposons unveil the dynamics of the maize genome. The high-quality genome sequences of the flint pool complement the maize pangenome and provide an important tool to study maize improvement at a genome scale and to enhance modern hybrid breeding.}, } @article {pmid32719416, year = {2020}, author = {Muqaddasi, QH and Brassac, J and Ebmeyer, E and Kollers, S and Korzun, V and Argillier, O and Stiewe, G and Plieske, J and Ganal, MW and Röder, MS}, title = {Prospects of GWAS and predictive breeding for European winter wheat's grain protein content, grain starch content, and grain hardness.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {12541}, pmid = {32719416}, issn = {2045-2322}, mesh = {Alleles ; Genetic Markers ; Genetic Variation ; Genetics, Population ; *Genome-Wide Association Study ; Grain Proteins/*metabolism ; Haplotypes/genetics ; Hardness ; Linkage Disequilibrium/genetics ; Molecular Sequence Annotation ; Phenotype ; Physical Chromosome Mapping ; *Plant Breeding ; Principal Component Analysis ; Quantitative Trait Loci/genetics ; Starch/*metabolism ; Triticum/*genetics/*growth & development ; }, abstract = {Grain quality traits determine the classification of registered wheat (Triticum aestivum L.) varieties. Although environmental factors and crop management practices exert a considerable influence on wheat quality traits, a significant proportion of the variance is attributed to the genetic factors. To identify the underlying genetic factors of wheat quality parameters viz., grain protein content (GPC), grain starch content (GSC), and grain hardness (GH), we evaluated 372 diverse European wheat varieties in replicated field trials in up to eight environments. We observed that all of the investigated traits hold a wide and significant genetic variation, and a significant negative correlation exists between GPC and GSC plus grain yield. Our association analyses based on 26,694 high-quality single nucleotide polymorphic markers revealed a strong quantitative genetic nature of GPC and GSC with associations on groups 2, 3, and 6 chromosomes. The identification of known Puroindoline-b gene for GH provided a positive analytic proof for our studies. We report that a locus QGpc.ipk-6A controls both GPC and GSC with opposite allelic effects. Based on wheat's reference and pan-genome sequences, the physical characterization of two loci viz., QGpc.ipk-2B and QGpc.ipk-6A facilitated the identification of the candidate genes for GPC. Furthermore, by exploiting additive and epistatic interactions of loci, we evaluated the prospects of predictive breeding for the investigated traits that suggested its efficient use in the breeding programs.}, } @article {pmid32719405, year = {2020}, author = {Flament-Simon, SC and de Toro, M and Chuprikova, L and Blanco, M and Moreno-González, J and Salas, M and Blanco, J and Redrejo-Rodríguez, M}, title = {High diversity and variability of pipolins among a wide range of pathogenic Escherichia coli strains.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {12452}, pmid = {32719405}, issn = {2045-2322}, mesh = {Animals ; *DNA Transposable Elements ; Escherichia coli/classification/*genetics/isolation & purification/metabolism ; Escherichia coli Infections/*microbiology/*veterinary ; Genetic Variation ; Genome, Bacterial ; Humans ; Phylogeny ; }, abstract = {Self-synthesizing transposons are integrative mobile genetic elements (MGEs) that encode their own B-family DNA polymerase (PolB). Discovered a few years ago, they are proposed as key players in the evolution of several groups of DNA viruses and virus-host interaction machinery. Pipolins are the most recent addition to the group, are integrated in the genomes of bacteria from diverse phyla and also present as circular plasmids in mitochondria. Remarkably, pipolins-encoded PolBs are proficient DNA polymerases endowed with DNA priming capacity, hence the name, primer-independent PolB (piPolB). We have now surveyed the presence of pipolins in a collection of 2,238 human and animal pathogenic Escherichia coli strains and found that, although detected in only 25 positive isolates (1.1%), they are present in E. coli strains from a wide variety of pathotypes, serotypes, phylogenetic groups and sequence types. Overall, the pangenome of strains carrying pipolins is highly diverse, despite the fact that a considerable number of strains belong to only three clonal complexes (CC10, CC23 and CC32). Comparative analysis with a set of 67 additional pipolin-harboring genomes from GenBank database spanning strains from diverse origin, further confirmed these results. The genetic structure of pipolins shows great flexibility and variability, with the piPolB gene and the attachment sites being the only common features. Most pipolins contain one or more recombinases that would be involved in excision/integration of the element in the same conserved tRNA gene. This mobilization mechanism might explain the apparent incompatibility of pipolins with other integrative MGEs such as integrons. In addition, analysis of cophylogeny between pipolins and pipolin-harboring strains showed a lack of congruence between several pipolins and their host strains, in agreement with horizontal transfer between hosts. Overall, these results indicate that pipolins can serve as a vehicle for genetic transfer among circulating E. coli and possibly also among other pathogenic bacteria.}, } @article {pmid32718320, year = {2020}, author = {Crysnanto, D and Pausch, H}, title = {Bovine breed-specific augmented reference graphs facilitate accurate sequence read mapping and unbiased variant discovery.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {184}, pmid = {32718320}, issn = {1474-760X}, abstract = {BACKGROUND: The current bovine genomic reference sequence was assembled from a Hereford cow. The resulting linear assembly lacks diversity because it does not contain allelic variation, a drawback of linear references that causes reference allele bias. High nucleotide diversity and the separation of individuals by hundreds of breeds make cattle ideally suited to investigate the optimal composition of variation-aware references.

RESULTS: We augment the bovine linear reference sequence (ARS-UCD1.2) with variants filtered for allele frequency in dairy (Brown Swiss, Holstein) and dual-purpose (Fleckvieh, Original Braunvieh) cattle breeds to construct either breed-specific or pan-genome reference graphs using the vg toolkit. We find that read mapping is more accurate to variation-aware than linear references if pre-selected variants are used to construct the genome graphs. Graphs that contain random variants do not improve read mapping over the linear reference sequence. Breed-specific augmented and pan-genome graphs enable almost similar mapping accuracy improvements over the linear reference. We construct a whole-genome graph that contains the Hereford-based reference sequence and 14 million alleles that have alternate allele frequency greater than 0.03 in the Brown Swiss cattle breed. Our novel variation-aware reference facilitates accurate read mapping and unbiased sequence variant genotyping for SNPs and Indels.

CONCLUSIONS: We develop the first variation-aware reference graph for an agricultural animal (https://doi.org/10.5281/zenodo.3759712). Our novel reference structure improves sequence read mapping and variant genotyping over the linear reference. Our work is a first step towards the transition from linear to variation-aware reference structures in species with high genetic diversity and many sub-populations.}, } @article {pmid32718035, year = {2020}, author = {Yin, Z and Liu, J and Du, B and Ruan, HH and Huo, YX and Du, Y and Qiao, J}, title = {Whole-Genome-Based Survey for Polyphyletic Serovars of Salmonella enterica subsp. enterica Provides New Insights into Public Health Surveillance.}, journal = {International journal of molecular sciences}, volume = {21}, number = {15}, pages = {}, pmid = {32718035}, issn = {1422-0067}, support = {2019M660475//China Postdoctoral Science Foundation/ ; 21621004, 31870122//National Natural Science Foundation of China/ ; 18JCYBJC96000//Natural Science Foundation of Tianjin City/ ; 18JCTPJC65000//Tianjin Enterprise Science and Technology Commissioner Project/ ; }, abstract = {Serotyping has traditionally been considered the basis for surveillance of Salmonella, but it cannot distinguish distinct lineages sharing the same serovar that vary in host range, pathogenicity and epidemiology. However, polyphyletic serovars have not been extensively investigated. Public health microbiology is currently being transformed by whole-genome sequencing (WGS) data, which promote the lineage determination using a more powerful and accurate technique than serotyping. The focus in this study is to survey and analyze putative polyphyletic serovars. The multi-locus sequence typing (MLST) phylogenetic analysis identified four putative polyphyletic serovars, namely, Montevideo, Bareilly, Saintpaul, and Muenchen. Whole-genome-based phylogeny and population structure highlighted the polyphyletic nature of Bareilly and Saintpaul and the multi-lineage nature of Montevideo and Muenchen. The population of these serovars was defined by extensive genetic diversity, the open pan genome and the small core genome. Source niche metadata revealed putative existence of lineage-specific niche adaptation (host-preference and environmental-preference), exhibited by lineage-specific genomic contents associated with metabolism and transport. Meanwhile, differences in genetic profiles relating to virulence and antimicrobial resistance within each lineage may contribute to pathogenicity and epidemiology. The results also showed that recombination events occurring at the H1-antigen loci may be an important reason for polyphyly. The results presented here provide the genomic basis of simple, rapid, and accurate identification of phylogenetic lineages of these serovars, which could have important implications for public health.}, } @article {pmid32715552, year = {2020}, author = {Fang, H and Xu, JB and Nie, Y and Wu, XL}, title = {Pan-genomic analysis reveals that the evolution of Dietzia species depends on their living habitats.}, journal = {Environmental microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1111/1462-2920.15176}, pmid = {32715552}, issn = {1462-2920}, support = {2018YFA0902100//National Key R&D Program of China/ ; 2018YFA0902103//National Key R&D Program of China/ ; 31770118//National Natural Science Foundation of China/ ; 31770120//National Natural Science Foundation of China/ ; }, abstract = {The bacterial genus Dietzia is widely distributed in various environments. The genomes of 26 diverse strains of Dietzia, including almost all the type strains, were analysed in this study. This analysis revealed a lipid metabolism gene richness, which could explain the ability of Dietzia to live in oil related environments. The pan-genome consists of 83,976 genes assigned into 10,327 gene families, 792 of which are shared by all the genomes of Dietzia. Mathematical extrapolation of the data suggests that the Dietzia pan-genome is open. Both gene duplication and gene loss contributed to the open pan-genome, while horizontal gene transfer was limited. Dietzia strains primarily gained their diverse metabolic capacity through more ancient gene duplications. Phylogenetic analysis of Dietzia isolated from aquatic and terrestrial environments showed two distinct clades from the same ancestor. The genome sizes of Dietzia strains from aquatic environments were significantly larger than those from terrestrial environments, which was mainly due to the occurrence of more gene loss events during the evolutionary progress of the strains from terrestrial environments. The evolutionary history of Dietzia was tightly coupled to environmental conditions, and iron concentrations should be one of the key factors shaping the genomes of the Dietzia lineages.}, } @article {pmid32714356, year = {2020}, author = {Moreno-Pérez, A and Pintado, A and Murillo, J and Caballo-Ponce, E and Tegli, S and Moretti, C and Rodríguez-Palenzuela, P and Ramos, C}, title = {Host Range Determinants of Pseudomonas savastanoi Pathovars of Woody Hosts Revealed by Comparative Genomics and Cross-Pathogenicity Tests.}, journal = {Frontiers in plant science}, volume = {11}, number = {}, pages = {973}, pmid = {32714356}, issn = {1664-462X}, abstract = {The study of host range determinants within the Pseudomonas syringae complex is gaining renewed attention due to its widespread distribution in non-agricultural environments, evidence of large variability in intra-pathovar host range, and the emergence of new epidemic diseases. This requires the establishment of appropriate model pathosystems facilitating integration of phenotypic, genomic and evolutionary data. Pseudomonas savastanoi pv. savastanoi is a model pathogen of the olive tree, and here we report a closed genome of strain NCPPB 3335, plus draft genome sequences of three strains isolated from oleander (pv. nerii), ash (pv. fraxini) and broom plants (pv. retacarpa). We then conducted a comparative genomic analysis of these four new genomes plus 16 publicly available genomes, representing 20 strains of these four P. savastanoi pathovars of woody hosts. Despite overlapping host ranges, cross-pathogenicity tests using four plant hosts clearly separated these pathovars and lead to pathovar reassignment of two strains. Critically, these functional assays were pivotal to reconcile phylogeny with host range and to define pathovar-specific genes repertoires. We report a pan-genome of 7,953 ortholog gene families and a total of 45 type III secretion system effector genes, including 24 core genes, four genes exclusive of pv. retacarpa and several genes encoding pathovar-specific truncations. Noticeably, the four pathovars corresponded with well-defined genetic lineages, with core genome phylogeny and hierarchical clustering of effector genes closely correlating with pathogenic specialization. Knot-inducing pathovars encode genes absent in the canker-inducing pv. fraxini, such as those related to indole acetic acid, cytokinins, rhizobitoxine, and a bacteriophytochrome. Other pathovar-exclusive genes encode type I, type II, type IV, and type VI secretion system proteins, the phytotoxine phevamine A, a siderophore, c-di-GMP-related proteins, methyl chemotaxis proteins, and a broad collection of transcriptional regulators and transporters of eight different superfamilies. Our combination of pathogenicity analyses and genomics tools allowed us to correctly assign strains to pathovars and to propose a repertoire of host range-related genes in the P. syringae complex.}, } @article {pmid32706329, year = {2020}, author = {Kc, R and Leong, KWC and Harkness, NM and Lachowicz, J and Gautam, SS and Cooley, LA and McEwan, B and Petrovski, S and Karupiah, G and O'Toole, RF}, title = {Whole-genome analyses reveal gene content differences between nontypeable Haemophilus influenzae isolates from chronic obstructive pulmonary disease compared to other clinical phenotypes.}, journal = {Microbial genomics}, volume = {6}, number = {8}, pages = {}, pmid = {32706329}, issn = {2057-5858}, abstract = {Nontypeable Haemophilus influenzae (NTHi) colonizes human upper respiratory airways and plays a key role in the course and pathogenesis of acute exacerbations of chronic obstructive pulmonary disease (COPD). Currently, it is not possible to distinguish COPD isolates of NTHi from other clinical isolates of NTHi using conventional genotyping methods. Here, we analysed the core and accessory genome of 568 NTHi isolates, including 40 newly sequenced isolates, to look for genetic distinctions between NTHi isolates from COPD with respect to other illnesses, including otitis media, meningitis and pneumonia. Phylogenies based on polymorphic sites in the core-genome did not show discrimination between NTHi strains collected from different clinical phenotypes. However, pan-genome-wide association studies identified 79 unique NTHi accessory genes that were significantly associated with COPD. Furthermore, many of the COPD-related NTHi genes have known or predicted roles in virulence, transmembrane transport of metal ions and nutrients, cellular respiration and maintenance of redox homeostasis. This indicates that specific genes may be required by NTHi for its survival or virulence in the COPD lung. These results advance our understanding of the pathogenesis of NTHi infection in COPD lungs.}, } @article {pmid32698896, year = {2020}, author = {Tonkin-Hill, G and MacAlasdair, N and Ruis, C and Weimann, A and Horesh, G and Lees, JA and Gladstone, RA and Lo, S and Beaudoin, C and Floto, RA and Frost, SDW and Corander, J and Bentley, SD and Parkhill, J}, title = {Producing polished prokaryotic pangenomes with the Panaroo pipeline.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {180}, pmid = {32698896}, issn = {1474-760X}, support = {206194/WT_/Wellcome Trust/United Kingdom ; 107032/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; 204016/WT_/Wellcome Trust/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Population-level comparisons of prokaryotic genomes must take into account the substantial differences in gene content resulting from horizontal gene transfer, gene duplication and gene loss. However, the automated annotation of prokaryotic genomes is imperfect, and errors due to fragmented assemblies, contamination, diverse gene families and mis-assemblies accumulate over the population, leading to profound consequences when analysing the set of all genes found in a species. Here, we introduce Panaroo, a graph-based pangenome clustering tool that is able to account for many of the sources of error introduced during the annotation of prokaryotic genome assemblies. Panaroo is available at https://github.com/gtonkinhill/panaroo .}, } @article {pmid32690893, year = {2020}, author = {Bayer, PE and Golicz, AA and Scheben, A and Batley, J and Edwards, D}, title = {Plant pan-genomes are the new reference.}, journal = {Nature plants}, volume = {6}, number = {8}, pages = {914-920}, doi = {10.1038/s41477-020-0733-0}, pmid = {32690893}, issn = {2055-0278}, abstract = {Recent years have seen a surge in plant genome sequencing projects and the comparison of multiple related individuals. The high degree of genomic variation observed led to the realization that single reference genomes do not represent the diversity within a species, and led to the expansion of the pan-genome concept. Pan-genomes represent the genomic diversity of a species and includes core genes, found in all individuals, as well as variable genes, which are absent in some individuals. Variable gene annotations often show similarities across plant species, with genes for biotic and abiotic stress commonly enriched within variable gene groups. Here we review the growth of pan-genomics in plants, explore the origins of gene presence and absence variation, and show how pan-genomes can support plant breeding and evolution studies.}, } @article {pmid32687170, year = {2020}, author = {Yang, LL and Jiang, Z and Li, Y and Wang, ET and Zhi, XY}, title = {Plasmids Related to the Symbiotic Nitrogen Fixation Are Not Only Cooperated Functionally but Also May Have Evolved over a Time Span in Family Rhizobiaceae.}, journal = {Genome biology and evolution}, volume = {12}, number = {11}, pages = {2002-2014}, pmid = {32687170}, issn = {1759-6653}, abstract = {Rhizobia are soil bacteria capable of forming symbiotic nitrogen-fixing nodules associated with leguminous plants. In fast-growing legume-nodulating rhizobia, such as the species in the family Rhizobiaceae, the symbiotic plasmid is the main genetic basis for nitrogen-fixing symbiosis, and is susceptible to horizontal gene transfer. To further understand the symbioses evolution in Rhizobiaceae, we analyzed the pan-genome of this family based on 92 genomes of type/reference strains and reconstructed its phylogeny using a phylogenomics approach. Intriguingly, although the genetic expansion that occurred in chromosomal regions was the main reason for the high proportion of low-frequency flexible gene families in the pan-genome, gene gain events associated with accessory plasmids introduced more genes into the genomes of nitrogen-fixing species. For symbiotic plasmids, although horizontal gene transfer frequently occurred, transfer may be impeded by, such as, the host's physical isolation and soil conditions, even among phylogenetically close species. During coevolution with leguminous hosts, the plasmid system, including accessory and symbiotic plasmids, may have evolved over a time span, and provided rhizobial species with the ability to adapt to various environmental conditions and helped them achieve nitrogen fixation. These findings provide new insights into the phylogeny of Rhizobiaceae and advance our understanding of the evolution of symbiotic nitrogen fixation.}, } @article {pmid32677889, year = {2020}, author = {Coulton, A and Edwards, KJ}, title = {AutoCloner: automatic homologue-specific primer design for full-gene cloning in polyploids.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {311}, pmid = {32677889}, issn = {1471-2105}, support = {BB/M009122/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/T/000PR9814/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Amino Acid Substitution/genetics ; Base Sequence ; *Cloning, Molecular ; Computational Biology/*methods ; DNA Primers/genetics/*metabolism ; Polymerase Chain Reaction/methods ; Polymorphism, Single Nucleotide/genetics ; *Polyploidy ; *Sequence Homology ; *Software ; Triticum/*genetics ; }, abstract = {BACKGROUND: Polyploid organisms such as wheat complicate even the simplest of procedures in molecular biology. Whilst knowledge of genomic sequences in crops is increasing rapidly, the scientific community is still a long way from producing a full pan-genome for every species. Polymerase chain reaction and Sanger sequencing therefore remain widely used as methods for characterizing gene sequences in many varieties of crops. High sequence similarity between genomes in polyploids means that if primers are not homeologue-specific via the incorporation of a SNP at the 3' tail, sequences other than the target sequence will also be amplified. Current consensus for gene cloning in wheat is to manually perform many steps in a long bioinformatics pipeline.

RESULTS: Here we present AutoCloner (www.autocloner.com), a fully automated pipeline for crop gene cloning that includes a free-to-use web interface for users. AutoCloner takes a sequence of interest from the user and performs a basic local alignment search tool (BLAST) search against the genome assembly for their particular polyploid crop. Homologous sequences are then compiled with the input sequence into a multiple sequence alignment which is mined for single-nucleotide polymorphisms (SNPs). Various combinations of potential primers that cover the entire gene of interest are then created and evaluated by Primer3; the set of primers with the highest score, as well as all possible primers at every SNP location, are then returned to the user for polymerase chain reaction (PCR). We have successfully used AutoCloner to clone various genes of interest in the Apogee wheat variety, which has no current genome sequence. In addition, we have successfully run the pipeline on ~ 80,000 high-confidence gene models from a wheat genome assembly.

CONCLUSION: AutoCloner is the first tool to fully-automate primer design for gene cloning in polyploids, where previously the consensus within the wheat community was to perform this process manually. The web interface for AutoCloner provides a simple and effective polyploid primer-design method for gene cloning, with no need for researchers to download software or input any other details other than their sequence of interest.}, } @article {pmid32670207, year = {2020}, author = {Castro-Jaimes, S and Bello-López, E and Velázquez-Acosta, C and Volkow-Fernández, P and Lozano-Zarain, P and Castillo-Ramírez, S and Cevallos, MA}, title = {Chromosome Architecture and Gene Content of the Emergent Pathogen Acinetobacter haemolyticus.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {926}, pmid = {32670207}, issn = {1664-302X}, abstract = {Acinetobacter haemolyticus is a Gammaproteobacterium that has been involved in serious diseases frequently linked to the nosocomial environment. Most of the strains causing such infections are sensitive to a wide variety of antibiotics, but recent reports indicate that this pathogen is acquiring very efficiently carbapenem-resistance determinants like the blaNDM-1 gene, all over the world. With this work we contribute with a collection set of 31 newly sequenced nosocomial A. haemolyticus isolates. Genome analysis of these sequences and others collected from RefSeq indicates that their chromosomes are organized in 12 syntenic blocks that contain most of the core genome genes. These blocks are separated by hypervariable regions that are rich in unique gene families, but also have signals of horizontal gene transfer. Genes involved in virulence or encoding different secretion systems are located inside syntenic regions and have recombination signals. The relative order of the synthetic blocks along the A. haemolyticus chromosome can change, indicating that they have been subject to several kinds of inversions. Genomes of this microorganism show large differences in gene content even if they are in the same clade. Here we also show that A. haemolyticus has an open pan-genome.}, } @article {pmid32669327, year = {2020}, author = {Chandrasekar, SS and Kingstad-Bakke, B and Wu, CW and Suresh, M and Talaat, AM}, title = {A Novel Mucosal Adjuvant System for Immunization against Avian Coronavirus Causing Infectious Bronchitis.}, journal = {Journal of virology}, volume = {94}, number = {19}, pages = {}, pmid = {32669327}, issn = {1098-5514}, mesh = {Adjuvants, Immunologic/pharmacology ; Animals ; Bronchitis/*prevention & control/virology ; Chickens ; Coronavirus Infections/immunology/prevention & control/*veterinary ; Disease Models, Animal ; Gammacoronavirus/*immunology ; Immunity, Cellular ; Immunization ; Infectious bronchitis virus/immunology ; Mucous Membrane/*immunology ; Nucleocapsid/immunology ; Poultry Diseases/immunology/*prevention & control/virology ; Recombinant Proteins/immunology ; Vaccines, DNA/immunology ; Viral Load ; Viral Vaccines/*immunology ; }, abstract = {Infectious bronchitis (IB) caused by infectious bronchitis virus (IBV) is currently a major threat to chicken health, with multiple outbreaks being reported in the United States over the past decade. Modified live virus (MLV) vaccines used in the field can persist and provide the genetic material needed for recombination and emergence of novel IBV serotypes. Inactivated and subunit vaccines overcome some of the limitations of MLV with no risk of virulence reversion and emergence of new virulent serotypes. However, these vaccines are weakly immunogenic and poorly protective. There is an urgent need to develop more effective vaccines that can elicit a robust, long-lasting immune response. In this study, we evaluate a novel adjuvant system developed from Quil-A and chitosan (QAC) for the intranasal delivery of nucleic acid immunogens to improve protective efficacy. The QAC adjuvant system forms nanocarriers (<100 nm) that efficiently encapsulate nucleic acid cargo, exhibit sustained release of payload, and can stably transfect cells. Encapsulation of plasmid DNA vaccine expressing IBV nucleocapsid (N) protein by the QAC adjuvant system (pQAC-N) enhanced immunogenicity, as evidenced by robust induction of adaptive humoral and cellular immune responses postvaccination and postchallenge. Birds immunized with pQAC-N showed reduced clinical severity and viral shedding postchallenge on par with protection observed with current commercial vaccines without the associated safety concerns. Presented results indicate that the QAC adjuvant system can offer a safer alternative to the use of live vaccines against avian and other emerging coronaviruses.IMPORTANCE According to 2017 U.S. agriculture statistics, the combined value of production and sales from broilers, eggs, turkeys, and chicks was $42.8 billion. Of this number, broiler sales comprised 67% of the industry value, with the production of >50 billion pounds of chicken meat. The economic success of the poultry industry in the United States hinges on the extensive use of vaccines to control infectious bronchitis virus (IBV) and other poultry pathogens. The majority of vaccines currently licensed for poultry health include both modified live vaccine and inactivated pathogens. Despite their proven efficacy, modified live vaccine constructs take time to produce and could revert to virulence, which limits their safety. The significance of our research stems from the development of a safer and potent alternative mucosal vaccine to replace live vaccines against IBV and other emerging coronaviruses.}, } @article {pmid32656099, year = {2020}, author = {Bohr, LL and Mortimer, TD and Pepperell, CS}, title = {Lateral Gene Transfer Shapes Diversity of Gardnerella spp.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {293}, pmid = {32656099}, issn = {2235-2988}, support = {R01 AI113287/AI/NIAID NIH HHS/United States ; T32 GM007215/GM/NIGMS NIH HHS/United States ; }, abstract = {Gardnerella spp. are pathognomonic for bacterial vaginosis, which increases the risk of preterm birth and the transmission of sexually transmitted infections. Gardnerella spp. are genetically diverse, comprising what have recently been defined as distinct species with differing functional capacities. Disease associations with Gardnerella spp. are not straightforward: patients with BV are usually infected with multiple species, and Gardnerella spp. are also found in the vaginal microbiome of healthy women. Genome comparisons of Gardnerella spp. show evidence of lateral gene transfer (LGT), but patterns of LGT have not been characterized in detail. Here we sought to define the role of LGT in shaping the genetic structure of Gardnerella spp. We analyzed whole genome sequencing data for 106 Gardnerella strains and used these data for pan genome analysis and to characterize LGT in the core and accessory genomes, over recent and remote timescales. In our diverse sample of Gardnerella strains, we found that both the core and accessory genomes are clearly differentiated in accordance with newly defined species designations. We identified putative competence and pilus assembly genes across most species; we also found them to be differentiated between species. Competence machinery has diverged in parallel with the core genome, with selection against deleterious mutations as a predominant influence on their evolution. By contrast, the virulence factor vaginolysin, which encodes a toxin, appears to be readily exchanged among species. We identified five distinct prophage clusters in Gardnerella genomes, two of which appear to be exchanged between Gardnerella species. Differences among species are apparent in their patterns of LGT, including their exchange with diverse gene pools. Despite frequent LGT and co-localization in the same niche, our results show that Gardnerella spp. are clearly genetically differentiated and yet capable of exchanging specific genetic material. This likely reflects complex interactions within bacterial communities associated with the vaginal microbiome. Our results provide insight into how such interactions evolve and are maintained, allowing these multi-species communities to colonize and invade human tissues and adapt to antibiotics and other stressors.}, } @article {pmid32655519, year = {2020}, author = {Han, M and Liu, G and Chen, Y and Wang, D and Zhang, Y}, title = {Comparative Genomics Uncovers the Genetic Diversity and Characters of Veillonella atypica and Provides Insights Into Its Potential Applications.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {1219}, pmid = {32655519}, issn = {1664-302X}, abstract = {Veillonella atypica is a bacterium that is present in the gut and the oral cavity of mammals and plays diverse roles in different niches. A recent study demonstrated that Veillonella is highly associated with marathon running and approved that V. atypica gavage improves treadmill run time in mice, revealing that V. atypica has a high biotechnological potential in improving athlete performance. However, a comprehensive analysis of the genetic diversity, function traits, and genome editing method of V. atypica remains elusive. In the present study, we conducted a systemically comparative analysis of the genetic datasets of nine V. atypica strains. The pan-genome of V. atypica consisted of 2,065 homologous clusters and exhibited an open pan-genome structure. A phylogenetic analysis of V. atypica with two different categories revealed that V. atypica OK5 was the most distant from the other eight V. atypica strains. A total of 43 orthologous genes were identified as CAZyme genes and grouped into 23 CAZyme families. The CAZyme components derived from accessory clusters contributed to the differences in the ability of the nine V. atypica strains to utilize carbohydrates. An integrated analysis of the metabolic pathways of V. atypica suggested that V. atypica strains harbored vancomycin resistance and were involved in several biosynthesis pathways of secondary metabolites. The V. atypica strains harbored four main Cas proteins, namely, CAS-Type IIIA, CAS-Type IIA, CAS-Type IIC, and CAS-Type IIID. This pilot study provides an in-depth understanding of and a fundamental knowledge about the biology of V. atypica that allow the possibility to increase the biotechnological potential of this bacterium.}, } @article {pmid32654396, year = {2020}, author = {Guo, G and Du, D and Yu, Y and Zhang, Y and Qian, Y and Zhang, W}, title = {Pan-genome analysis of Streptococcus suis serotype 2 revealed genomic diversity among strains of different virulence.}, journal = {Transboundary and emerging diseases}, volume = {}, number = {}, pages = {}, doi = {10.1111/tbed.13725}, pmid = {32654396}, issn = {1865-1682}, support = {2018YFC1602500//National Key R&D Program of China/ ; 2017E10010//Opening Fund of Key Laboratory of microorganism technology and bioinformatics research of Zhejiang Province/ ; 31772751//National Natural Science Foundation of China/ ; }, abstract = {Streptococcus suis (SS) is an emerging zoonotic pathogen that causes severe infections in swine and humans. Among the 33 known serotypes, serotype 2 is most frequently associated with infections in pigs and humans. To better understand the virulence characterization of S. suis serotype 2 (SS2) and discriminate the difference between virulent and avirulent strains in SS2, characterization of the genomic features of strains with different virulence is required. The result showed that Streptococcus suis have an open pan-genome. The pan-genome shared by the 19 S. suis serotype 2 strains was composed of 1,239 core genes and 2,436 accessory genes. COG analysis indicated that core genes are involved in the basic physiological function, but accessory genes related to tachytely evolution. Comparative analysis between core genomes of virulent strains and 9 avirulent strains suggested that srtBCD pilus cluster was a significant discrepancy between virulent and avirulent strains. Analysis between high virulent and group B low virulent strains showed 53 and 58 genes specific to each other. Moreover, genomes of avirulent strains tend to be larger than virulent strains; avirulent strains tend to possess more prophages sequences than virulent strains. Our findings could be contributed to a better understanding of the genomics of S. suis serotype 2.}, } @article {pmid32636251, year = {2020}, author = {Lees, JA and Mai, TT and Galardini, M and Wheeler, NE and Horsfield, ST and Parkhill, J and Corander, J}, title = {Improved Prediction of Bacterial Genotype-Phenotype Associations Using Interpretable Pangenome-Spanning Regressions.}, journal = {mBio}, volume = {11}, number = {4}, pages = {}, pmid = {32636251}, issn = {2150-7511}, support = {MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Discovery of genetic variants underlying bacterial phenotypes and the prediction of phenotypes such as antibiotic resistance are fundamental tasks in bacterial genomics. Genome-wide association study (GWAS) methods have been applied to study these relations, but the plastic nature of bacterial genomes and the clonal structure of bacterial populations creates challenges. We introduce an alignment-free method which finds sets of loci associated with bacterial phenotypes, quantifies the total effect of genetics on the phenotype, and allows accurate phenotype prediction, all within a single computationally scalable joint modeling framework. Genetic variants covering the entire pangenome are compactly represented by extended DNA sequence words known as unitigs, and model fitting is achieved using elastic net penalization, an extension of standard multiple regression. Using an extensive set of state-of-the-art bacterial population genomic data sets, we demonstrate that our approach performs accurate phenotype prediction, comparable to popular machine learning methods, while retaining both interpretability and computational efficiency. Compared to those of previous approaches, which test each genotype-phenotype association separately for each variant and apply a significance threshold, the variants selected by our joint modeling approach overlap substantially.IMPORTANCE Being able to identify the genetic variants responsible for specific bacterial phenotypes has been the goal of bacterial genetics since its inception and is fundamental to our current level of understanding of bacteria. This identification has been based primarily on painstaking experimentation, but the availability of large data sets of whole genomes with associated phenotype metadata promises to revolutionize this approach, not least for important clinical phenotypes that are not amenable to laboratory analysis. These models of phenotype-genotype association can in the future be used for rapid prediction of clinically important phenotypes such as antibiotic resistance and virulence by rapid-turnaround or point-of-care tests. However, despite much effort being put into adapting genome-wide association study (GWAS) approaches to cope with bacterium-specific problems, such as strong population structure and horizontal gene exchange, current approaches are not yet optimal. We describe a method that advances methodology for both association and generation of portable prediction models.}, } @article {pmid32634612, year = {2020}, author = {Shahi, N and Mallik, SK}, title = {Emerging bacterial fish pathogen Lactococcus garvieae RTCLI04, isolated from rainbow trout (Oncorhynchus mykiss): Genomic features and comparative genomics.}, journal = {Microbial pathogenesis}, volume = {147}, number = {}, pages = {104368}, doi = {10.1016/j.micpath.2020.104368}, pmid = {32634612}, issn = {1096-1208}, abstract = {Lactococcus garvieae is one of the emerging zoonotic bacterial pathogen, causes fatal hemorrhagic septicemia in cultured fish species, animals and humans, worldwide. Here, we report the genomic features of whole-genome sequence (WGS) of L. garvieae strain RTCLI04, recovered from lower intestine of farmed rainbow trout, Oncorhynchus mykiss in the northwest Himalayan region India. The genome of L. garvieae RTCLI04 is a single circular chromosome of 2,054,885 base pairs (bp), which encodes 1993 proteins and has G + C content of 39%. The bioinformatics analysis of WGS of RTCLI04, confirmed the presence of 51 tRNAs genes (including two pseudogenes), six rRNAs genes (four genes for 5S rRNA; one gene for 16S rRNA and one gene for 23S rRNA), five virulent domains, and twenty eight different genetic pathways. A Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR) finder tool indicates that three different CRISPR and one cas system with common spacer was present in the genome of L. garvieae RTCLI04. Pan-genome analysis of RTCLI04 and all the other reference L. garvieae strains shows that pan-genome of this bacterium consisted of 2239 putative protein-coding genes in which 1850 genes are core gene, 389 genes are dispensable gene, and 221 genes are unique to RTCLI04. L. garvieae RTCLI04 lacks genomic island of 16.5 Kb capsule gene cluster. In addition, 39 virulence-associated genes (VAGs) including hly1,-2,-3; PavA, PsaA; eno; LPxTG containing surface proteins 1, 2, 3 and 4; pgm, sod and 29 antimicrobial resistant genes (ARGs) including mefE (clindamycin), srmB (lincomycin), dfrA26 (trimethoprim), gyrB (nalidixic acid), arr-3 (rifampin), otrB (tetracycline), aac(6)-Ic (tobramycin), IrgB (penicillin), mecA (oxacillin), vanRB (vancomycin) and mfpA (fluoroquinolone) were also predicted in the genome of L. garvieae RTCLI04. Our study provides new insight into understanding the virulence mechanism, antimicrobial resistance, and development of effective therapeutic measures against L. garvieae during a disease outbreak in aquaculture.}, } @article {pmid32632274, year = {2020}, author = {Lyu, J}, title = {Pan-genome upgrade.}, journal = {Nature plants}, volume = {6}, number = {7}, pages = {732}, doi = {10.1038/s41477-020-0731-2}, pmid = {32632274}, issn = {2055-0278}, } @article {pmid32631215, year = {2020}, author = {Hurel, J and Schbath, S and Bougeard, S and Rolland, M and Petrillo, M and Touzain, F}, title = {DUGMO: tool for the detection of unknown genetically modified organisms with high-throughput sequencing data for pure bacterial samples.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {284}, pmid = {32631215}, issn = {1471-2105}, mesh = {Bacteria/*chemistry ; Computational Biology/*methods ; High-Throughput Nucleotide Sequencing/*methods ; Humans ; Organisms, Genetically Modified/*genetics ; Polymerase Chain Reaction/*methods ; }, abstract = {BACKGROUND: The European Community has adopted very restrictive policies regarding the dissemination and use of genetically modified organisms (GMOs). In fact, a maximum threshold of 0.9% of contaminating GMOs is tolerated for a "GMO-free" label. In recent years, imports of undescribed GMOs have been detected. Their sequences are not described and therefore not detectable by conventional approaches, such as PCR.

RESULTS: We developed DUGMO, a bioinformatics pipeline for the detection of genetically modified (GM) bacteria, including unknown GM bacteria, based on Illumina paired-end sequencing data. The method is currently focused on the detection of GM bacteria with - possibly partial - transgenes in pure bacterial samples. In the preliminary steps, coding sequences (CDSs) are aligned through two successive BLASTN against the host pangenome with relevant tuned parameters to discriminate CDSs belonging to the wild type genome (wgCDS) from potential GM coding sequences (pgmCDSs). Then, Bray-Curtis distances are calculated between the wgCDS and each pgmCDS, based on the difference of genomic vocabulary. Finally, two machine learning methods, namely the Random Forest and Generalized Linear Model, are carried out to target true GM CDS(s), based on six variables including Bray-Curtis distances and GC content. Tests carried out on a GM Bacillus subtilis showed 25 positive CDSs corresponding to the chloramphenicol resistance gene and CDSs of the inserted plasmids. On a wild type B. subtilis, no false positive sequences were detected.

CONCLUSION: DUGMO detects exogenous CDS, truncated, fused or highly mutated wild CDSs in high-throughput sequencing data, and was shown to be efficient at detecting GM sequences, but it might also be employed for the identification of recent horizontal gene transfers.}, } @article {pmid32619577, year = {2020}, author = {Kaushal, G and Singh, SP}, title = {Comparative genome analysis provides shreds of molecular evidence for reclassification of Leuconostoc mesenteroides MTCC 10508 as a strain of Leu. suionicum.}, journal = {Genomics}, volume = {112}, number = {6}, pages = {4023-4031}, doi = {10.1016/j.ygeno.2020.06.040}, pmid = {32619577}, issn = {1089-8646}, abstract = {This study presents the whole-genome comparative analysis of a Leuconostoc sp. strain, previously documented as Leu. mesenteroides MTCC 10508. The ANI, dDDH, dot plot, and MAUVE analyses suggested its reclassification as a strain of Leu. suionicum. Functional annotation identified a total of 1971 genes, out of which, 265 genes were mapped to CAZymes, evincing its carbohydrate transforming capability. The genome comparison with 59 Leu. mesenteroides and Leu. suionicum strains generated the core and pan-genome profiles, divulging the unique genes in Leuconostoc sp. MTCC 10508. For the first time, this study reports the genes encoding alpha-xylosidase and copper oxidase in a strain of Leu. suionicum. The genetic information for any possible allergenic molecule could not be detected in the genome, advocating the safety of the strain. The present investigation provides the genomic evidence for reclassification of the Leuconostoc sp. strain and also promulgates the molecular insights into its metabolic potential.}, } @article {pmid32615922, year = {2020}, author = {Duru, IC and Andreevskaya, M and Laine, P and Rode, TM and Ylinen, A and Løvdal, T and Bar, N and Crauwels, P and Riedel, CU and Bucur, FI and Nicolau, AI and Auvinen, P}, title = {Genomic characterization of the most barotolerant Listeria monocytogenes RO15 strain compared to reference strains used to evaluate food high pressure processing.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {455}, pmid = {32615922}, issn = {1471-2164}, support = {307856//Academy of Finland/ ; 311717//Academy of Finland/ ; ERA-IB-16-247 014//ERA-IB2/ ; 250 Subprogramme 3.2 - Horizon 2020 - Contract No. 15/2017//International and European Cooperation/ ; 031B0268//German Ministry for Education and Research/ ; }, abstract = {BACKGROUND: High pressure processing (HPP; i.e. 100-600 MPa pressure depending on product) is a non-thermal preservation technique adopted by the food industry to decrease significantly foodborne pathogens, including Listeria monocytogenes, from food. However, susceptibility towards pressure differs among diverse strains of L. monocytogenes and it is unclear if this is due to their intrinsic characteristics related to genomic content. Here, we tested the barotolerance of 10 different L. monocytogenes strains, from food and food processing environments and widely used reference strains including clinical isolate, to pressure treatments with 400 and 600 MPa. Genome sequencing and genome comparison of the tested L. monocytogenes strains were performed to investigate the relation between genomic profile and pressure tolerance.

RESULTS: None of the tested strains were tolerant to 600 MPa. A reduction of more than 5 log10 was observed for all strains after 1 min 600 MPa pressure treatment. L. monocytogenes strain RO15 showed no significant reduction in viable cell counts after 400 MPa for 1 min and was therefore defined as barotolerant. Genome analysis of so far unsequenced L. monocytogenes strain RO15, 2HF33, MB5, AB199, AB120, C7, and RO4 allowed us to compare the gene content of all strains tested. This revealed that the three most pressure tolerant strains had more than one CRISPR system with self-targeting spacers. Furthermore, several anti-CRISPR genes were detected in these strains. Pan-genome analysis showed that 10 prophage genes were significantly associated with the three most barotolerant strains.

CONCLUSIONS: L. monocytogenes strain RO15 was the most pressure tolerant among the selected strains. Genome comparison suggests that there might be a relationship between prophages and pressure tolerance in L. monocytogenes.}, } @article {pmid32615401, year = {2020}, author = {Steinbrenner, AD}, title = {The evolving landscape of cell surface pattern recognition across plant immune networks.}, journal = {Current opinion in plant biology}, volume = {56}, number = {}, pages = {135-146}, doi = {10.1016/j.pbi.2020.05.001}, pmid = {32615401}, issn = {1879-0356}, mesh = {Cell Membrane ; *Oomycetes ; Plant Immunity/genetics ; *Plants/genetics ; Receptors, Pattern Recognition/genetics ; Signal Transduction ; }, abstract = {To recognize diverse threats, plants monitor extracellular molecular patterns and transduce intracellular immune signaling through receptor complexes at the plasma membrane. Pattern recognition occurs through a prototypical network of interacting proteins, comprising A) receptors that recognize inputs associated with a growing number of pest and pathogen classes (bacteria, fungi, oomycetes, caterpillars), B) co-receptor kinases that participate in binding and signaling, and C) cytoplasmic kinases that mediate first stages of immune output. While this framework has been elucidated in reference accessions of model organisms, network components are part of gene families with widespread variation, potentially tuning immunocompetence for specific contexts. Most dramatically, variation in receptor repertoires determines the range of ligands acting as immunogenic inputs for a given plant. Diversification of receptor kinase (RK) and related receptor-like protein (RLP) repertoires may tune responses even within a species. Comparative genomics at pangenome scale will reveal patterns and features of immune network variation.}, } @article {pmid32614888, year = {2020}, author = {Chen, Z and Kuang, D and Xu, X and González-Escalona, N and Erickson, DL and Brown, E and Meng, J}, title = {Genomic analyses of multidrug-resistant Salmonella Indiana, Typhimurium, and Enteritidis isolates using MinION and MiSeq sequencing technologies.}, journal = {PloS one}, volume = {15}, number = {7}, pages = {e0235641}, pmid = {32614888}, issn = {1932-6203}, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial/*genetics ; *Genome, Bacterial ; Genotype ; Microbial Sensitivity Tests ; Phenotype ; Phylogeny ; Plasmids/genetics/metabolism ; Point Mutation ; Salmonella enterica/classification/drug effects/*genetics/pathogenicity ; Salmonella enteritidis/classification/drug effects/genetics/pathogenicity ; Salmonella typhimurium/classification/drug effects/genetics/pathogenicity ; Virulence ; Whole Genome Sequencing/*methods ; }, abstract = {We sequenced 25 isolates of phenotypically multidrug-resistant Salmonella Indiana (n = 11), Typhimurium (n = 8), and Enteritidis (n = 6) using both MinION long-read [SQK-LSK109 and flow cell (R9.4.1)] and MiSeq short-read (Nextera XT and MiSeq Reagent Kit v2) sequencing technologies to determine the advantages of each approach in terms of the characteristics of genome structure, antimicrobial resistance (AMR), virulence potential, whole-genome phylogeny, and pan-genome. The MinION reads were base-called in real-time using MinKnow 3.4.8 integrated with Guppy 3.0.7. The long-read-only assembly, Illumina-only assembly, and hybrid assembly pipelines of Unicycler 0.4.8 were used to generate the MinION, MiSeq, and hybrid assemblies, respectively. The MinION assemblies were highly contiguous compared to the MiSeq assemblies but lacked accuracy, a deficiency that was mitigated by adding the MiSeq short reads through the Unicycler hybrid assembly which corrected erroneous single nucleotide polymorphisms (SNPs). The MinION assemblies provided similar predictions of AMR and virulence potential compared to the MiSeq and hybrid assemblies, although they produced more total false negatives of AMR genotypes, primarily due to failure in identifying tetracycline resistance genes in 11 of the 19 MinION assemblies of tetracycline-resistant isolates. The MinION assemblies displayed a large genetic distance from their corresponding MiSeq and hybrid assemblies on the whole-genome phylogenetic tree, indicating that the lower read accuracy of MinION sequencing caused incorrect clustering. The pan-genome of the MinION assemblies contained significantly more accessory genes and less core genes compared to the MiSeq and hybrid assemblies, suggesting that although these assemblies were more contiguous, their sequencing errors reduced accurate genome annotations. Our research demonstrates that MinION sequencing by itself provides an efficient assessment of the genome structure, antimicrobial resistance, and virulence potential of Salmonella; however, it is not sufficient for whole-genome phylogenetic and pan-genome analyses. MinION in combination with MiSeq facilitated the most accurate genomic analyses.}, } @article {pmid32611933, year = {2020}, author = {Wang, B and Cheng, H and Qian, W and Zhao, W and Liang, C and Liu, C and Cui, G and Liu, H and Zhang, L}, title = {Comparative genome analysis and mining of secondary metabolites of Paenibacillus polymyxa.}, journal = {Genes & genetic systems}, volume = {95}, number = {3}, pages = {141-150}, doi = {10.1266/ggs.19-00053}, pmid = {32611933}, issn = {1880-5779}, abstract = {Paenibacillus polymyxa is a well-known Gram-positive biocontrol bacterium. It has been reported that many P. polymyxa strains can inhibit bacteria, fungi and other plant pathogens. Paenibacillus polymyxa employs a variety of mechanisms to promote plant growth, so it is necessary to understand the biocontrol ability of bacteria at the genome level. In the present study, thanks to the widespread availability of Paenibacillus genome data and the development of bioinformatics tools, we were able to analyze and mine the genomes of 43 P. polymyxa strains. The strain NCTC4744 was determined not to be P. polymyxa according to digital DNA-DNA hybridization and average nucleotide identity. By analysis of the pan-genome and the core genome, we found that the pan-genome of P. polymyxa was open and that there were 3,192 core genes. In a gene cluster analysis of secondary metabolites, 797 secondary metabolite gene clusters were found, of which 343 are not similar to known clusters and are expected to reveal a large number of new secondary metabolites. We also analyzed the plant growth-promoting genes that were mined and found, surpisingly, that these genes are highly conserved. The results of the present study not only reveal a large number of unknown potential secondary metabolite gene clusters in P. polymyxa, but also suggest that plant growth promotion characteristics are evolutionary adaptations of P. polymyxa to plant-related habitats.}, } @article {pmid32610480, year = {2020}, author = {Fodor, A and Abate, BA and Deák, P and Fodor, L and Gyenge, E and Klein, MG and Koncz, Z and Muvevi, J and Ötvös, L and Székely, G and Vozik, D and Makrai, L}, title = {Multidrug Resistance (MDR) and Collateral Sensitivity in Bacteria, with Special Attention to Genetic and Evolutionary Aspects and to the Perspectives of Antimicrobial Peptides-A Review.}, journal = {Pathogens (Basel, Switzerland)}, volume = {9}, number = {7}, pages = {}, pmid = {32610480}, issn = {2076-0817}, support = {1214102//Hungarian Ministry of Human Capacities/ ; }, abstract = {Antibiotic poly-resistance (multidrug-, extreme-, and pan-drug resistance) is controlled by adaptive evolution. Darwinian and Lamarckian interpretations of resistance evolution are discussed. Arguments for, and against, pessimistic forecasts on a fatal "post-antibiotic era" are evaluated. In commensal niches, the appearance of a new antibiotic resistance often reduces fitness, but compensatory mutations may counteract this tendency. The appearance of new antibiotic resistance is frequently accompanied by a collateral sensitivity to other resistances. Organisms with an expanding open pan-genome, such as Acinetobacterbaumannii, Pseudomonas aeruginosa, and Klebsiella pneumoniae, can withstand an increased number of resistances by exploiting their evolutionary plasticity and disseminating clonally or poly-clonally. Multidrug-resistant pathogen clones can become predominant under antibiotic stress conditions but, under the influence of negative frequency-dependent selection, are prevented from rising to dominance in a population in a commensal niche. Antimicrobial peptides have a great potential to combat multidrug resistance, since antibiotic-resistant bacteria have shown a high frequency of collateral sensitivity to antimicrobial peptides. In addition, the mobility patterns of antibiotic resistance, and antimicrobial peptide resistance, genes are completely different. The integron trade in commensal niches is fortunately limited by the species-specificity of resistance genes. Hence, we theorize that the suggested post-antibiotic era has not yet come, and indeed might never come.}, } @article {pmid32605102, year = {2020}, author = {Roder, T and Wüthrich, D and Bär, C and Sattari, Z and Ah, UV and Ronchi, F and Macpherson, AJ and Ganal-Vonarburg, SC and Bruggmann, R and Vergères, G}, title = {In Silico Comparison Shows that the Pan-Genome of a Dairy-Related Bacterial Culture Collection Covers Most Reactions Annotated to Human Microbiomes.}, journal = {Microorganisms}, volume = {8}, number = {7}, pages = {}, pmid = {32605102}, issn = {2076-2607}, support = {GRS-070/17//Gebert Rüf Stiftung/ ; }, abstract = {The diversity of the human microbiome is positively associated with human health. However, this diversity is endangered by Westernized dietary patterns that are characterized by a decreased nutrient variety. Diversity might potentially be improved by promoting dietary patterns rich in microbial strains. Various collections of bacterial cultures resulting from a century of dairy research are readily available worldwide, and could be exploited to contribute towards this end. We have conducted a functional in silico analysis of the metagenome of 24 strains, each representing one of the species in a bacterial culture collection composed of 626 sequenced strains, and compared the pathways potentially covered by this metagenome to the intestinal metagenome of four healthy, although overweight, humans. Remarkably, the pan-genome of the 24 strains covers 89% of the human gut microbiome's annotated enzymatic reactions. Furthermore, the dairy microbial collection covers biological pathways, such as methylglyoxal degradation, sulfate reduction, g-aminobutyric (GABA) acid degradation and salicylate degradation, which are differently covered among the four subjects and are involved in a range of cardiometabolic, intestinal, and neurological disorders. We conclude that microbial culture collections derived from dairy research have the genomic potential to complement and restore functional redundancy in human microbiomes.}, } @article {pmid32600255, year = {2020}, author = {Motyka-Pomagruk, A and Zoledowska, S and Misztak, AE and Sledz, W and Mengoni, A and Lojkowska, E}, title = {Comparative genomics and pangenome-oriented studies reveal high homogeneity of the agronomically relevant enterobacterial plant pathogen Dickeya solani.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {449}, pmid = {32600255}, issn = {1471-2164}, support = {UMO-2014/14/M/NZ8/00501//Narodowe Centrum Nauki/ ; UMO-2016/21/N/NZ1/02783//Narodowe Centrum Nauki/ ; Polish-Italian Collaborative Program Canaletto//Ministerstwo Nauki i Szkolnictwa Wyższego/ ; }, abstract = {BACKGROUND: Dickeya solani is an important plant pathogenic bacterium causing severe losses in European potato production. This species draws a lot of attention due to its remarkable virulence, great devastating potential and easier spread in contrast to other Dickeya spp. In view of a high need for extensive studies on economically important soft rot Pectobacteriaceae, we performed a comparative genomics analysis on D. solani strains to search for genetic foundations that would explain the differences in the observed virulence levels within the D. solani population.

RESULTS: High quality assemblies of 8 de novo sequenced D. solani genomes have been obtained. Whole-sequence comparison, ANIb, ANIm, Tetra and pangenome-oriented analyses performed on these genomes and the sequences of 14 additional strains revealed an exceptionally high level of homogeneity among the studied genetic material of D. solani strains. With the use of 22 genomes, the pangenome of D. solani, comprising 84.7% core, 7.2% accessory and 8.1% unique genes, has been almost completely determined, suggesting the presence of a nearly closed pangenome structure. Attribution of the genes included in the D. solani pangenome fractions to functional COG categories showed that higher percentages of accessory and unique pangenome parts in contrast to the core section are encountered in phage/mobile elements- and transcription- associated groups with the genome of RNS 05.1.2A strain having the most significant impact. Also, the first D. solani large-scale genome-wide phylogeny computed on concatenated core gene alignments is herein reported.

CONCLUSIONS: The almost closed status of D. solani pangenome achieved in this work points to the fact that the unique gene pool of this species should no longer expand. Such a feature is characteristic of taxa whose representatives either occupy isolated ecological niches or lack efficient mechanisms for gene exchange and recombination, which seems rational concerning a strictly pathogenic species with clonal population structure. Finally, no obvious correlations between the geographical origin of D. solani strains and their phylogeny were found, which might reflect the specificity of the international seed potato market.}, } @article {pmid32596333, year = {2020}, author = {Yang, F and Feng, H and Massey, IY and Huang, F and Guo, J and Zhang, X}, title = {Genome-Wide Analysis Reveals Genetic Potential for Aromatic Compounds Biodegradation of Sphingopyxis.}, journal = {BioMed research international}, volume = {2020}, number = {}, pages = {5849123}, pmid = {32596333}, issn = {2314-6141}, abstract = {Members of genus Sphingopyxis are frequently found in diverse eco-environments worldwide and have been traditionally considered to play vital roles in the degradation of aromatic compounds. Over recent decades, many aromatic-degrading Sphingopyxis strains have been isolated and recorded, but little is known about their genetic nature related to aromatic compounds biodegradation. In this study, bacterial genomes of 19 Sphingopyxis strains were used for comparative analyses. Phylogeny showed an ambiguous relatedness between bacterial strains and their habitat specificity, while clustering based on Cluster of Orthologous Groups suggested the potential link of functional profile with substrate-specific traits. Pan-genome analysis revealed that 19 individuals were predicted to share 1,066 orthologous genes, indicating a high genetic homogeneity among Sphingopyxis strains. Notably, KEGG Automatic Annotation Server results suggested that most genes pertaining aromatic compounds biodegradation were predicted to be involved in benzoate, phenylalanine, and aminobenzoate metabolism. Among them, β-ketoadipate biodegradation might be the main pathway in Sphingopyxis strains. Further inspection showed that a number of mobile genetic elements varied in Sphingopyxis genomes, and plasmid-mediated gene transfer coupled with prophage- and transposon-mediated rearrangements might play prominent roles in the evolution of bacterial genomes. Collectively, our findings presented that Sphingopyxis isolates might be the promising candidates for biodegradation of aromatic compounds in pollution sites.}, } @article {pmid32596166, year = {2020}, author = {Sun, Z and Zhou, D and Zhang, X and Li, Q and Lin, H and Lu, W and Liu, H and Lu, J and Lin, X and Li, K and Xu, T and Bao, Q and Zhang, H}, title = {Determining the Genetic Characteristics of Resistance and Virulence of the "Epidermidis Cluster Group" Through Pan-Genome Analysis.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {274}, pmid = {32596166}, issn = {2235-2988}, abstract = {Staphylococcus caprae, Staphylococcus capitis, and Staphylococcus epidermidis belong to the "Epidermidis Cluster Group" (ECG) and are generally opportunistic pathogens. In this work, whole genome sequencing, molecular cloning and pan-genome analysis were performed to investigate the genetic characteristics of the resistance, virulence and genome structures of 69 ECG strains, including a clinical isolate (S. caprae SY333) obtained in this work. Two resistance genes (blaZ and aadD2) encoded on the plasmids pSY333-41 and pSY333-45 of S. caprae SY333 were confirmed to be functional. The bla region in ECG exhibited three distinct structures, and these chromosome- and plasmid-encoded bla operons seemed to follow two different evolutionary paths. Pan-genome analysis revealed their pan-genomes tend to be "open." For the virulence-related factors, the genes involved in primary attachment were observed almost exclusively in S. epidermidis, while the genes associated with intercellular aggregation were observed more frequently in S. caprae and S. capitis. The type VII secretion system was present in all strains of S. caprae and some of S. epidermidis but not in S. capitis. Moreover, the isd locus (iron regulated surface determinant) was first found to be encoded on the genomes of S. caprae and S. capitis. These findings suggested that the plasmid and chromosome encoded bla operons of ECG species underwent different evolution paths, as well as they differed in the abundance of virulence genes associated with adherence, invasion, secretion system and immune evasion. Identification of isd loci in S. caprae and S. capitis indicated their ability to acquire heme as nutrient iron during infection.}, } @article {pmid32586267, year = {2020}, author = {Nishitsuji, K and Arimoto, A and Yonashiro, Y and Hisata, K and Fujie, M and Kawamitsu, M and Shoguchi, E and Satoh, N}, title = {Comparative genomics of four strains of the edible brown alga, Cladosiphon okamuranus.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {422}, pmid = {32586267}, issn = {1471-2164}, abstract = {BACKGROUND: The brown alga, Cladosiphon okamuranus (Okinawa mozuku), is one of the most important edible seaweeds, and it is cultivated for market primarily in Okinawa, Japan. Four strains, denominated S, K, O, and C, with distinctively different morphologies, have been cultivated commercially since the early 2000s. We previously reported a draft genome of the S-strain. To facilitate studies of seaweed biology for future aquaculture, we here decoded and analyzed genomes of the other three strains (K, O, and C).

RESULTS: Here we improved the genome of the S-strain (ver. 2, 130 Mbp, 12,999 genes), and decoded the K-strain (135 Mbp, 12,511 genes), the O-strain (140 Mbp, 12,548 genes), and the C-strain (143 Mbp, 12,182 genes). Molecular phylogenies, using mitochondrial and nuclear genes, showed that the S-strain diverged first, followed by the K-strain, and most recently the C- and O-strains. Comparisons of genome architecture among the four strains document the frequent occurrence of inversions. In addition to gene acquisitions and losses, the S-, K-, O-, and C-strains possess 457, 344, 367, and 262 gene families unique to each strain, respectively. Comprehensive Blast searches showed that most genes have no sequence similarity to any entries in the non-redundant protein sequence database, although GO annotation suggested that they likely function in relation to molecular and biological processes and cellular components.

CONCLUSIONS: Our study compares the genomes of four strains of C. okamuranus and examines their phylogenetic relationships. Due to global environmental changes, including temperature increases, acidification, and pollution, brown algal aquaculture is facing critical challenges. Genomic and phylogenetic information reported by the present research provides useful tools for isolation of novel strains.}, } @article {pmid32584859, year = {2020}, author = {Collis, RM and Biggs, PJ and Midwinter, AC and Browne, AS and Wilkinson, DA and Irshad, H and French, NP and Brightwell, G and Cookson, AL}, title = {Genomic epidemiology and carbon metabolism of Escherichia coli serogroup O145 reflect contrasting phylogenies.}, journal = {PloS one}, volume = {15}, number = {6}, pages = {e0235066}, pmid = {32584859}, issn = {1932-6203}, mesh = {Animals ; Carbon/*metabolism ; *Escherichia coli Infections/epidemiology/genetics/metabolism ; *Genotype ; Humans ; Malates/metabolism ; New Zealand/epidemiology ; *Phylogeny ; Serine/genetics/metabolism ; *Serogroup ; *Shiga-Toxigenic Escherichia coli/genetics/isolation & purification/metabolism ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) are a leading cause of foodborne outbreaks of human disease, but they reside harmlessly as an asymptomatic commensal in the ruminant gut. STEC serogroup O145 are difficult to isolate as routine diagnostic methods are unable to distinguish non-O157 serogroups due to their heterogeneous metabolic characteristics, resulting in under-reporting which is likely to conceal their true prevalence. In light of these deficiencies, the purpose of this study was a twofold approach to investigate enhanced STEC O145 diagnostic culture-based methods: firstly, to use a genomic epidemiology approach to understand the genetic diversity and population structure of serogroup O145 at both a local (New Zealand) (n = 47) and global scale (n = 75) and, secondly, to identify metabolic characteristics that will help the development of a differential media for this serogroup. Analysis of a subset of E. coli serogroup O145 strains demonstrated considerable diversity in carbon utilisation, which varied in association with eae subtype and sequence type. Several carbon substrates, such as D-serine and D-malic acid, were utilised by the majority of serogroup O145 strains, which, when coupled with current molecular and culture-based methods, could aid in the identification of presumptive E. coli serogroup O145 isolates. These carbon substrates warrant subsequent testing with additional serogroup O145 strains and non-O145 strains. Serogroup O145 strains displayed extensive genetic heterogeneity that was correlated with sequence type and eae subtype, suggesting these genetic markers are good indicators for distinct E. coli phylogenetic lineages. Pangenome analysis identified a core of 3,036 genes and an open pangenome of >14,000 genes, which is consistent with the identification of distinct phylogenetic lineages. Overall, this study highlighted the phenotypic and genotypic heterogeneity within E. coli serogroup O145, suggesting that the development of a differential media targeting this serogroup will be challenging.}, } @article {pmid32571204, year = {2020}, author = {Vázquez-Rosas-Landa, M and Ponce-Soto, GY and Aguirre-Liguori, JA and Thakur, S and Scheinvar, E and Barrera-Redondo, J and Ibarra-Laclette, E and Guttman, DS and Eguiarte, LE and Souza, V}, title = {Population genomics of Vibrionaceae isolated from an endangered oasis reveals local adaptation after an environmental perturbation.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {418}, pmid = {32571204}, issn = {1471-2164}, support = {IG200215//PAPIIT/ ; 238245//CONACYT/ ; }, abstract = {BACKGROUND: In bacteria, pan-genomes are the result of an evolutionary "tug of war" between selection and horizontal gene transfer (HGT). High rates of HGT increase the genetic pool and the effective population size (Ne), resulting in open pan-genomes. In contrast, selective pressures can lead to local adaptation by purging the variation introduced by HGT and mutation, resulting in closed pan-genomes and clonal lineages. In this study, we explored both hypotheses, elucidating the pan-genome of Vibrionaceae isolates after a perturbation event in the endangered oasis of Cuatro Ciénegas Basin (CCB), Mexico, and looking for signals of adaptation to the environments in their genomes.

RESULTS: We obtained 42 genomes of Vibrionaceae distributed in six lineages, two of them did not showed any close reference strain in databases. Five of the lineages showed closed pan-genomes and were associated to either water or sediment environment; their high Ne estimates suggest that these lineages are not from a recent origin. The only clade with an open pan-genome was found in both environments and was formed by ten genetic groups with low Ne, suggesting a recent origin. The recombination and mutation estimators (r/m) ranged from 0.005 to 2.725, which are similar to oceanic Vibrionaceae estimations. However, we identified 367 gene families with signals of positive selection, most of them found in the core genome; suggesting that despite recombination, natural selection moves the Vibrionaceae CCB lineages to local adaptation, purging the genomes and keeping closed pan-genome patterns. Moreover, we identify 598 SNPs associated with an unstructured environment; some of the genes associated with these SNPs were related to sodium transport.

CONCLUSIONS: Different lines of evidence suggest that the sampled Vibrionaceae, are part of the rare biosphere usually living under famine conditions. Two of these lineages were reported for the first time. Most Vibrionaceae lineages of CCB are adapted to their micro-habitats rather than to the sampled environments. This pattern of adaptation is concordant with the association of closed pan-genomes and local adaptation.}, } @article {pmid32562810, year = {2020}, author = {Anani, H and Zgheib, R and Hasni, I and Raoult, D and Fournier, PE}, title = {Interest of bacterial pangenome analyses in clinical microbiology.}, journal = {Microbial pathogenesis}, volume = {149}, number = {}, pages = {104275}, doi = {10.1016/j.micpath.2020.104275}, pmid = {32562810}, issn = {1096-1208}, abstract = {Thanks to the progress and decreasing costs in genome sequencing technologies, more than 250,000 bacterial genomes are currently available in public databases, covering most, if not all, of the major human-associated phylogenetic groups of these microorganisms, pathogenic or not. In addition, for many of them, sequences from several strains of a given species are available, thus enabling to evaluate their genetic diversity and study their evolution. In addition, the significant cost reduction of bacterial whole genome sequencing as well as the rapid increase in the number of available bacterial genomes have prompted the development of pangenomic software tools. The study of bacterial pangenome has many applications in clinical microbiology. It can unveil the pathogenic potential and ability of bacteria to resist antimicrobials as well identify specific sequences and predict antigenic epitopes that allow molecular or serologic assays and vaccines to be designed. Bacterial pangenome constitutes a powerful method for understanding the history of human bacteria and relating these findings to diagnosis in clinical microbiology laboratories in order to optimize patient management.}, } @article {pmid32553274, year = {2020}, author = {Liu, Y and Du, H and Li, P and Shen, Y and Peng, H and Liu, S and Zhou, GA and Zhang, H and Liu, Z and Shi, M and Huang, X and Li, Y and Zhang, M and Wang, Z and Zhu, B and Han, B and Liang, C and Tian, Z}, title = {Pan-Genome of Wild and Cultivated Soybeans.}, journal = {Cell}, volume = {182}, number = {1}, pages = {162-176.e13}, doi = {10.1016/j.cell.2020.05.023}, pmid = {32553274}, issn = {1097-4172}, abstract = {Soybean is one of the most important vegetable oil and protein feed crops. To capture the entire genomic diversity, it is needed to construct a complete high-quality pan-genome from diverse soybean accessions. In this study, we performed individual de novo genome assemblies for 26 representative soybeans that were selected from 2,898 deeply sequenced accessions. Using these assembled genomes together with three previously reported genomes, we constructed a graph-based genome and performed pan-genome analysis, which identified numerous genetic variations that cannot be detected by direct mapping of short sequence reads onto a single reference genome. The structural variations from the 2,898 accessions that were genotyped based on the graph-based genome and the RNA sequencing (RNA-seq) data from the representative 26 accessions helped to link genetic variations to candidate genes that are responsible for important traits. This pan-genome resource will promote evolutionary and functional genomics studies in soybean.}, } @article {pmid32531278, year = {2020}, author = {Ellegaard, KM and Suenami, S and Miyazaki, R and Engel, P}, title = {Vast Differences in Strain-Level Diversity in the Gut Microbiota of Two Closely Related Honey Bee Species.}, journal = {Current biology : CB}, volume = {30}, number = {13}, pages = {2520-2531.e7}, doi = {10.1016/j.cub.2020.04.070}, pmid = {32531278}, issn = {1879-0445}, abstract = {Most bacterial species encompass strains with vastly different gene content. Strain diversity in microbial communities is therefore considered to be of functional importance. Yet little is known about the extent to which related microbial communities differ in diversity at this level and which underlying mechanisms may constrain and maintain strain-level diversity. Here, we used shotgun metagenomics to characterize and compare the gut microbiota of two honey bee species, Apis mellifera and Apis cerana, which diverged about 6 mya. Although the host species are colonized largely by the same bacterial 16S rRNA phylotypes, we find that their communities are host specific when analyzed with genomic resolution. Moreover, despite their similar ecology, A. mellifera displayed a much higher diversity of strains and functional gene content in the microbiota compared to A. cerana, both per colony and per individual bee. In particular, the gene repertoire for polysaccharide degradation was massively expanded in the microbiota of A. mellifera relative to A. cerana. Bee management practices, divergent ecological adaptation, or habitat size may have contributed to the observed differences in microbiota genomic diversity of these key pollinator species. Our results illustrate that the gut microbiota of closely related animal hosts can differ vastly in genomic diversity while displaying similar levels of diversity based on the 16S rRNA gene. Such differences are likely to have consequences for gut microbiota functioning and host-symbiont interactions, highlighting the need for metagenomic studies to understand the ecology and evolution of microbial communities.}, } @article {pmid32522778, year = {2020}, author = {Crouse, A and Schramm, C and Emond-Rheault, JG and Herod, A and Kerhoas, M and Rohde, J and Gruenheid, S and Kukavica-Ibrulj, I and Boyle, B and Greenwood, CMT and Goodridge, LD and Garduno, R and Levesque, RC and Malo, D and Daigle, F}, title = {Combining Whole-Genome Sequencing and Multimodel Phenotyping To Identify Genetic Predictors of Salmonella Virulence.}, journal = {mSphere}, volume = {5}, number = {3}, pages = {}, pmid = {32522778}, issn = {2379-5042}, abstract = {Salmonella comprises more than 2,600 serovars. Very few environmental and uncommon serovars have been characterized for their potential role in virulence and human infections. A complementary in vitro and in vivo systematic high-throughput analysis of virulence was used to elucidate the association between genetic and phenotypic variations across Salmonella isolates. The goal was to develop a strategy for the classification of isolates as a benchmark and predict virulence levels of isolates. Thirty-five phylogenetically distant strains of unknown virulence were selected from the Salmonella Foodborne Syst-OMICS (SalFoS) collection, representing 34 different serovars isolated from various sources. Isolates were evaluated for virulence in 4 complementary models of infection to compare virulence traits with the genomics data, including interactions with human intestinal epithelial cells, human macrophages, and amoeba. In vivo testing was conducted using the mouse model of Salmonella systemic infection. Significant correlations were identified between the different models. We identified a collection of novel hypothetical and conserved proteins associated with isolates that generate a high burden. We also showed that blind prediction of virulence of 33 additional strains based on the pan-genome was high in the mouse model of systemic infection (82% agreement) and in the human epithelial cell model (74% agreement). These complementary approaches enabled us to define virulence potential in different isolates and present a novel strategy for risk assessment of specific strains and for better monitoring and source tracking during outbreaks.IMPORTANCESalmonella species are bacteria that are a major source of foodborne disease through contamination of a diversity of foods, including meat, eggs, fruits, nuts, and vegetables. More than 2,600 different Salmonella enterica serovars have been identified, and only a few of them are associated with illness in humans. Despite the fact that they are genetically closely related, there is enormous variation in the virulence of different isolates of Salmonella enterica Identification of foodborne pathogens is a lengthy process based on microbiological, biochemical, and immunological methods. Here, we worked toward new ways of integrating whole-genome sequencing (WGS) approaches into food safety practices. We used WGS to build associations between virulence and genetic diversity within 83 Salmonella isolates representing 77 different Salmonella serovars. Our work demonstrates the potential of combining a genomics approach and virulence tests to improve the diagnostics and assess risk of human illness associated with specific Salmonella isolates.}, } @article {pmid32518186, year = {2020}, author = {Gori, A and Harrison, OB and Mlia, E and Nishihara, Y and Chan, JM and Msefula, J and Mallewa, M and Dube, Q and Swarthout, TD and Nobbs, AH and Maiden, MCJ and French, N and Heyderman, RS}, title = {Pan-GWAS of Streptococcus agalactiae Highlights Lineage-Specific Genes Associated with Virulence and Niche Adaptation.}, journal = {mBio}, volume = {11}, number = {3}, pages = {}, pmid = {32518186}, issn = {2150-7511}, support = {MR/N023129/1/MRC_/Medical Research Council/United Kingdom ; 106846/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; /DH_/Department of Health/United Kingdom ; }, abstract = {Streptococcus agalactiae (group B streptococcus; GBS) is a colonizer of the gastrointestinal and urogenital tracts, and an opportunistic pathogen of infants and adults. The worldwide population of GBS is characterized by clonal complexes (CCs) with different invasive potentials. CC17, for example, is a hypervirulent lineage commonly associated with neonatal sepsis and meningitis, while CC1 is less invasive in neonates and more commonly causes invasive disease in adults with comorbidities. The genetic basis of GBS virulence and the extent to which different CCs have adapted to different host environments remain uncertain. We have therefore applied a pan-genome-wide association study (GWAS) approach to 1,988 GBS strains isolated from different hosts and countries. Our analysis identified 279 CC-specific genes associated with virulence, disease, metabolism, and regulation of cellular mechanisms that may explain the differential virulence potential of particular CCs. In CC17 and CC23, for example, we have identified genes encoding pilus, quorum-sensing proteins, and proteins for the uptake of ions and micronutrients which are absent in less invasive lineages. Moreover, in CC17, carriage and disease strains were distinguished by the allelic variants of 21 of these CC-specific genes. Together our data highlight the lineage-specific basis of GBS niche adaptation and virulence.IMPORTANCE GBS is a leading cause of mortality in newborn babies in high- and low-income countries worldwide. Different strains of GBS are characterized by different degrees of virulence, where some are harmlessly carried by humans or animals and others are much more likely to cause disease.The genome sequences of almost 2,000 GBS samples isolated from both animals and humans in high- and low- income countries were analyzed using a pan-genome-wide association study approach. This allowed us to identify 279 genes which are associated with different lineages of GBS, characterized by a different virulence and preferred host. Additionally, we propose that the GBS now carried in humans may have first evolved in animals before expanding clonally once adapted to the human host.These findings are essential to help understand what is causing GBS disease and how the bacteria have evolved and are transmitted.}, } @article {pmid32509595, year = {2020}, author = {Gonzales-Siles, L and Karlsson, R and Schmidt, P and Salvà-Serra, F and Jaén-Luchoro, D and Skovbjerg, S and Moore, ERB and Gomila, M}, title = {A Pangenome Approach for Discerning Species-Unique Gene Markers for Identifications of Streptococcus pneumoniae and Streptococcus pseudopneumoniae.}, journal = {Frontiers in cellular and infection microbiology}, volume = {10}, number = {}, pages = {222}, pmid = {32509595}, issn = {2235-2988}, abstract = {Correct identifications of isolates and strains of the Mitis-Group of the genus Streptococcus are particularly difficult, due to high genetic similarity, resulting from horizontal gene transfer and homologous recombination, and unreliable phenotypic and genotypic biomarkers for differentiating the species. Streptococcus pneumoniae and Streptococcus pseudopneumoniae are the most closely related species of the clade. In this study, publicly-available genome sequences for Streptococcus pneumoniae and S. pseudopneumoniae were analyzed, using a pangenomic approach, to find candidates for species-unique gene markers; ten species-unique genes for S. pneumoniae and nine for S. pseudopneumoniae were identified. These species-unique gene marker candidates were verified by PCR assays for identifying S. pneumoniae and S. pseudopneumoniae strains isolated from clinical samples. All determined species-level unique gene markers for S. pneumoniae were detected in all S. pneumoniae clinical isolates, whereas fewer of the unique S. pseudopneumoniae gene markers were present in more than 95% of the clinical isolates. In parallel, taxonomic identifications of the clinical isolates were confirmed, using conventional optochin sensitivity testing, targeted PCR-detection for the "Xisco" gene, as well as genomic ANIb similarity analyses for the genome sequences of selected strains. Using mass spectrometry-proteomics, species-specific peptide matches were observed for four of the S. pneumoniae gene markers and for three of the S. pseudopneumoniae gene markers. Application of multiple species-level unique biomarkers of S. pneumoniae and S. pseudopneumoniae, is proposed as a protocol for the routine clinical laboratory for improved, reliable differentiation, and identification of these pathogenic and commensal species.}, } @article {pmid32509458, year = {2020}, author = {Nasr Azadani, D and Zhang, D and Hatherill, JR and Silva, D and Turner, JW}, title = {Isolation, characterization, and comparative genomic analysis of a phage infecting high-level aminoglycoside-resistant (HLAR) Enterococcus faecalis.}, journal = {PeerJ}, volume = {8}, number = {}, pages = {e9171}, pmid = {32509458}, issn = {2167-8359}, abstract = {Enterococcus is a genus of Gram-positive bacteria that are commensal to the gastrointestinal tracts of humans but some species have been increasingly implicated as agents of nosocomial infections. The increase in infections and the spread of antibiotic-resistant strains have contributed to renewed interest in the discovery of Enterococcus phages. The aims of this study were (1) the isolation, characterization, and genome sequencing of a phage capable of infecting an antibiotic-resistant E. faecalis strain, and (2) the comparative genomic analysis of publicly-available Enterococcus phages. For this purpose, multiple phages were isolated from wastewater treatment plant (WWTP) influent using a high-level aminoglycoside-resistant (HLAR) E. faecalis strain as the host. One phage, phiNASRA1, demonstrated a high lytic efficiency (∼97.52%). Transmission electron microscopy (TEM) and whole-genome sequencing (WGS) showed that phiNASRA1 belongs to the Siphoviridae family of double-stranded DNA viruses. The phage was approximately 250 nm in length and its complete genome (40,139 bp, 34.7% GC) contained 62 open reading frames (ORFs). Phylogenetic comparisons of phiNASRA1 and 31 publicly-available Enterococcus phages, based on the large subunit terminase and portal proteins, grouped phage by provenance, size, and GC content. In particular, both phylogenies grouped phages larger than 100 kbp into distinct clades. A phylogeny based on a pangenome analysis of the same 32 phages also grouped phages by provenance, size, and GC content although agreement between the two single-locus phylogenies was higher. Per the pangenome phylogeny, phiNASRA1 was most closely related to phage LY0322 that was similar in size, GC content, and number of ORFs (40,139 and 40,934 bp, 34.77 and 34.80%, and 60 and 64 ORFs, respectively). The pangenome analysis did illustrate the high degree of sequence diversity and genome plasticity as no coding sequence was homologous across all 32 phages, and even 'conserved' structural proteins (e.g., the large subunit terminase and portal proteins) were homologous in no more than half of the 32 phage genomes. These findings contribute to a growing body of literature devoted to understanding phage biology and diversity. We propose that this high degree of diversity limited the value of the single-locus and pangenome phylogenies. By contrast, the high degree of homology between phages larger than 100 kbp suggests that pangenome analyses of more similar phages is a viable method for assessing subclade diversity. Future work is focused on validating phiNASRA1 as a potential therapeutic agent to eradicate antibiotic-resistant E. faecalis infections in an animal model.}, } @article {pmid32496181, year = {2020}, author = {Wang, LYR and Jokinen, CC and Laing, CR and Johnson, RP and Ziebell, K and Gannon, VPJ}, title = {Assessing the genomic relatedness and evolutionary rates of persistent verotoxigenic Escherichia coli serotypes within a closed beef herd in Canada.}, journal = {Microbial genomics}, volume = {6}, number = {6}, pages = {}, pmid = {32496181}, issn = {2057-5858}, abstract = {Verotoxigenic Escherichia coli (VTEC) are food- and water-borne pathogens associated with both sporadic illness and outbreaks of enteric disease. While it is known that cattle are reservoirs of VTEC, little is known about the genomic variation of VTEC in cattle, and whether the variation in genomes reported for human outbreak strains is consistent with individual animal or group/herd sources of infection. A previous study of VTEC prevalence identified serotypes carried persistently by three consecutive cohorts of heifers within a closed herd of cattle. This present study aimed to: (i) determine whether the genomic relatedness of bovine isolates is similar to that reported for human strains associated with single source outbreaks, (ii) estimate the rates of genome change among dominant serotypes over time within a cattle herd, and (iii) identify genomic features of serotypes associated with persistence in cattle. Illumina MiSeq genome sequencing and genotyping based on allelic and single nucleotide variations were completed, while genome change over time was measured using Bayesian evolutionary analysis sampling trees. The accessory genome, including the non-protein-encoding intergenic regions (IGRs), virulence factors, antimicrobial-resistance genes and plasmid gene content of representative persistent and sporadic cattle strains were compared using Fisher's exact test corrected for multiple comparisons. Herd strains from serotypes O6:H34 (n=22), O22:H8 (n=30), O108:H8 (n=39), O139:H19 (n=44) and O157:H7 (n=106) were readily distinguishable from epidemiologically unrelated strains of the same serotype using a similarity threshold of 10 or fewer allele differences between adjacent nodes. Temporal-cohort clustering within each serotype was supported by date randomization analysis. Substitutions per site per year were consistent with previously reported values for E. coli; however, there was low branch support for these values. Acquisition of the phage-encoded Shiga toxin 2 gene in serotype O22:H8 was observed. Pan-genome analyses identified accessory regions that were more prevalent in persistent serotypes (P≤0.05) than in sporadic serotypes. These results suggest that VTEC serotypes from a specific cattle population are highly clonal with a similar level of relatedness as human single-source outbreak-associated strains, but changes in the genome occur gradually over time. Additionally, elements in the accessory genomes may provide a selective advantage for persistence of VTEC within cattle herds.}, } @article {pmid32494685, year = {2020}, author = {Fan, X and Qiu, H and Han, W and Wang, Y and Xu, D and Zhang, X and Bhattacharya, D and Ye, N}, title = {Phytoplankton pangenome reveals extensive prokaryotic horizontal gene transfer of diverse functions.}, journal = {Science advances}, volume = {6}, number = {18}, pages = {eaba0111}, pmid = {32494685}, issn = {2375-2548}, abstract = {The extent and role of horizontal gene transfer (HGT) in phytoplankton and, more broadly, eukaryotic evolution remain controversial topics. Recent studies substantiate the importance of HGT in modifying or expanding functions such as metal or reactive species detoxification and buttressing halotolerance. Yet, the potential of HGT to significantly alter the fate of species in a major eukaryotic assemblage remains to be established. We provide such an example for the ecologically important lineages encompassed by cryptophytes, rhizarians, alveolates, stramenopiles, and haptophytes ("CRASH" taxa). We describe robust evidence of prokaryotic HGTs in these taxa affecting functions such as polysaccharide biosynthesis. Numbers of HGTs range from 0.16 to 1.44% of CRASH species gene inventories, comparable to the ca. 1% prokaryote-derived HGTs found in the genomes of extremophilic red algae. Our results substantially expand the impact of HGT in eukaryotes and define a set of general principles for prokaryotic gene fixation in phytoplankton genomes.}, } @article {pmid32493786, year = {2020}, author = {Wesevich, A and Sutton, G and Ruffin, F and Park, LP and Fouts, DE and Fowler, VG and Thaden, JT}, title = {Newly Named Klebsiella aerogenes (formerly Enterobacter aerogenes) Is Associated with Poor Clinical Outcomes Relative to Other Enterobacter Species in Patients with Bloodstream Infection.}, journal = {Journal of clinical microbiology}, volume = {58}, number = {9}, pages = {}, pmid = {32493786}, issn = {1098-660X}, support = {U19 AI110819/AI/NIAID NIH HHS/United States ; }, abstract = {Enterobacter aerogenes was recently renamed Klebsiella aerogenes This study aimed to identify differences in clinical characteristics, outcomes, and bacterial genetics among patients with K. aerogenes versus Enterobacter species bloodstream infections (BSI). We prospectively enrolled patients with K. aerogenes or Enterobacter cloacae complex (Ecc) BSI from 2002 to 2015. We performed whole-genome sequencing (WGS) and pan-genome analysis on all bacteria. Overall, 150 patients with K. aerogenes (46/150 [31%]) or Ecc (104/150 [69%]) BSI were enrolled. The two groups had similar baseline characteristics. Neither total in-hospital mortality (13/46 [28%] versus 22/104 [21%]; P = 0.3) nor attributable in-hospital mortality (9/46 [20%] versus 13/104 [12%]; P = 0.3) differed between patients with K. aerogenes versus Ecc BSI, respectively. However, poor clinical outcome (death before discharge, recurrent BSI, and/or BSI complication) was higher for K. aerogenes than Ecc BSI (32/46 [70%] versus 42/104 [40%]; P = 0.001). In a multivariable regression model, K. aerogenes BSI, relative to Ecc BSI, was predictive of poor clinical outcome (odds ratio 3.3; 95% confidence interval 1.4 to 8.1; P = 0.008). Pan-genome analysis revealed 983 genes in 323 genomic islands unique to K. aerogenes isolates, including putative virulence genes involved in iron acquisition (n = 67), fimbriae/pili/flagella production (n = 117), and metal homeostasis (n = 34). Antibiotic resistance was largely found in Ecc lineage 1, which had a higher rate of multidrug resistant phenotype (23/54 [43%]) relative to all other bacterial isolates (23/96 [24%]; P = 0.03). K. aerogenes BSI was associated with poor clinical outcomes relative to Ecc BSI. Putative virulence factors in K. aerogenes may account for these differences.}, } @article {pmid32480355, year = {2020}, author = {Badet, T and Croll, D}, title = {The rise and fall of genes: origins and functions of plant pathogen pangenomes.}, journal = {Current opinion in plant biology}, volume = {56}, number = {}, pages = {65-73}, doi = {10.1016/j.pbi.2020.04.009}, pmid = {32480355}, issn = {1879-0356}, mesh = {*Plant Diseases/genetics ; *Plants/genetics ; Virulence ; }, abstract = {Plant pathogens can rapidly overcome resistance of their hosts by mutating key pathogenicity genes encoding for effectors. Pathogen adaptation is fuelled by extensive genetic variability in populations and different strains may not share the same set of genes. Recently, such an intra-specific variation in gene content became formalized as pangenomes distinguishing core genes (i.e. shared) and accessory genes (i.e. lineage or strain-specific). Across pathogens species, key effectors tend to be part of the rapidly evolving accessory genome. Here, we show how the construction and analysis of pathogen pangenomes provide deep insights into the dynamic host adaptation process. We also discuss how pangenomes should ideally be built and how geography, niche and lifestyle likely determine pangenome sizes.}, } @article {pmid32471418, year = {2020}, author = {Pilar, AVC and Petronella, N and Dussault, FM and Verster, AJ and Bekal, S and Levesque, RC and Goodridge, L and Tamber, S}, title = {Similar yet different: phylogenomic analysis to delineate Salmonella and Citrobacter species boundaries.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {377}, pmid = {32471418}, issn = {1471-2164}, support = {8505//Genome Canada/ ; }, abstract = {BACKGROUND: Salmonella enterica is a leading cause of foodborne illness worldwide resulting in considerable public health and economic costs. Testing for the presence of this pathogen in food is often hampered by the presence of background microflora that may present as Salmonella (false positives). False positive isolates belonging to the genus Citrobacter can be difficult to distinguish from Salmonella due to similarities in their genetics, cell surface antigens, and other phenotypes. In order to understand the genetic basis of these similarities, a comparative genomic approach was used to define the pan-, core, accessory, and unique coding sequences of a representative population of Salmonella and Citrobacter strains.

RESULTS: Analysis of the genomic content of 58 S. enterica strains and 37 Citrobacter strains revealed the presence of 31,130 and 1540 coding sequences within the pan- and core genome of this population. Amino acid sequences unique to either Salmonella (n = 1112) or Citrobacter (n = 195) were identified and revealed potential niche-specific adaptations. Phylogenetic network analysis of the protein families encoded by the pan-genome indicated that genetic exchange between Salmonella and Citrobacter may have led to the acquisition of similar traits and also diversification within the genera.

CONCLUSIONS: Core genome analysis suggests that the Salmonella enterica and Citrobacter populations investigated here share a common evolutionary history. Comparative analysis of the core and pan-genomes was able to define the genetic features that distinguish Salmonella from Citrobacter and highlight niche specific adaptations.}, } @article {pmid32469363, year = {2020}, author = {Li, M and Aye, SM and Ahmed, MU and Han, ML and Li, C and Song, J and Boyce, JD and Powell, DR and Azad, MAK and Velkov, T and Zhu, Y and Li, J}, title = {Pan-transcriptomic analysis identified common differentially expressed genes of Acinetobacter baumannii in response to polymyxin treatments.}, journal = {Molecular omics}, volume = {16}, number = {4}, pages = {327-338}, pmid = {32469363}, issn = {2515-4184}, support = {R01 AI132154/AI/NIAID NIH HHS/United States ; }, abstract = {Multidrug-resistant Acinetobacter baumannii is a top-priority Gram-negative pathogen and polymyxins are a last-line therapeutic option. Previous systems pharmacological studies examining polymyxin killing and resistance usually focused on individual strains, and the derived knowledge could be limited by strain-specific genomic context. In this study, we examined the gene expression of five A. baumannii strains (34654, 1207552, 1428368, 1457504 and ATCC 19606) to determine the common differentially expressed genes in response to polymyxin treatments. A pan-genome containing 6061 genes was identified for 89 A. baumannii genomes from RefSeq database which included the five strains examined in this study; 2822 of the 6061 genes constituted the core genome. After 2 mg L-1 or 0.75 × MIC polymyxin treatments for 15 min, 41 genes were commonly up-regulated, including those involved in membrane biogenesis and homeostasis, lipoprotein and phospholipid trafficking, efflux pump and poly-N-acetylglucosamine biosynthesis; six genes were commonly down-regulated, three of which were related to fatty acid biosynthesis. Additionally, comparison of the gene expression at 15 and 60 min in ATCC 19606 revealed that polymyxin treatment resulted in a rapid change in amino acid metabolism at 15 min and perturbations on envelope biogenesis at both time points. This is the first pan-transcriptomic study for polymyxin-treated A. baumannii and our results identified that the remodelled outer membrane, up-regulated efflux pumps and down-regulated fatty acid biosynthesis might be essential for early responses to polymyxins in A. baumannii. Our findings provide important mechanistic insights into bacterial responses to polymyxin killing and may facilitate the optimisation of polymyxin therapy against this problematic 'superbug'.}, } @article {pmid32468160, year = {2020}, author = {Tschoeke, D and Salazar, VW and Vidal, L and Campeão, M and Swings, J and Thompson, F and Thompson, C}, title = {Unlocking the Genomic Taxonomy of the Prochlorococcus Collective.}, journal = {Microbial ecology}, volume = {80}, number = {3}, pages = {546-558}, doi = {10.1007/s00248-020-01526-5}, pmid = {32468160}, issn = {1432-184X}, abstract = {Prochlorococcus is the most abundant photosynthetic prokaryote on our planet. The extensive ecological literature on the Prochlorococcus collective (PC) is based on the assumption that it comprises one single genus comprising the species Prochlorococcus marinus, containing itself a collective of ecotypes. Ecologists adopt the distributed genome hypothesis of an open pan-genome to explain the observed genomic diversity and evolution patterns of the ecotypes within PC. Novel genomic data for the PC prompted us to revisit this group, applying the current methods used in genomic taxonomy. As a result, we were able to distinguish the five genera: Prochlorococcus, Eurycolium, Prolificoccus, Thaumococcus, and Riococcus. The novel genera have distinct genomic and ecological attributes.}, } @article {pmid32466367, year = {2020}, author = {Sharma, P and Gupta, SK and Barrett, JB and Hiott, LM and Woodley, TA and Kariyawasam, S and Frye, JG and Jackson, CR}, title = {Comparison of Antimicrobial Resistance and Pan-Genome of Clinical and Non-Clinical Enterococcus cecorum from Poultry Using Whole-Genome Sequencing.}, journal = {Foods (Basel, Switzerland)}, volume = {9}, number = {6}, pages = {}, pmid = {32466367}, issn = {2304-8158}, support = {6040-32000-009-00D//Agricultural Research Service/ ; }, abstract = {Enterococcus cecorum is an emerging avian pathogen, particularly in chickens, but can be found in both diseased (clinical) and healthy (non-clinical) poultry. To better define differences between E. cecorum from the two groups, whole-genome sequencing (WGS) was used to identify and compare antimicrobial resistance genes as well as the pan-genome among the isolates. Eighteen strains selected from our previous study were subjected to WGS using Illumina MiSeq and comparatively analyzed. Assembled contigs were analyzed for resistance genes using ARG-ANNOT. Resistance to erythromycin was mediated by ermB, ermG, and mefA, in clinical isolates and ermB and mefA, in non-clinical isolates. Lincomycin resistance genes were identified as linB, lnuB, lnuC, and lnuD with lnuD found only in non-clinical E. cecorum; however, lnuB and linB were found in only one clinical isolate. For both groups of isolates, kanamycin resistance was mediated by aph3-III, while tetracycline resistance was conferred by tetM, tetO, and tetL. No mutations or known resistance genes were found for isolates resistant to either linezolid or chloramphenicol, suggesting possible new mechanisms of resistance to these drugs. A comparison of WGS results confirmed that non-clinical isolates contained more resistance genes than clinical isolates. The pan-genome of clinical and non-clinical isolates resulted in 3651 and 4950 gene families, respectively, whereas the core gene sets were comprised of 1559 and 1534 gene families in clinical and non-clinical isolates, respectively. Unique genes were found more frequently in non-clinical isolates than clinical. Phylogenetic analysis of the isolates and all the available complete and draft genomes showed no correlation between healthy and diseased poultry. Additional genomic comparison is required to elucidate genetic factors in E. cecorum that contribute to disease in poultry.}, } @article {pmid32463784, year = {2020}, author = {Liu, YH and Xie, YG and Li, L and Jiang, HC and Mohamad, OAA and Hozzein, W and Fang, BZ and Li, WJ}, title = {Cyclobacterium salsum sp. nov. and Cyclobacterium roseum sp. nov., isolated from a saline lake.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {6}, pages = {3785-3793}, doi = {10.1099/ijsem.0.004237}, pmid = {32463784}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Bacteroidetes/*classification/isolation & purification ; Base Composition ; China ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Geologic Sediments/*microbiology ; Lakes/*microbiology ; Nucleic Acid Hybridization ; *Phylogeny ; Pigmentation ; RNA, Ribosomal, 16S/genetics ; *Saline Waters ; Sequence Analysis, DNA ; }, abstract = {Two novel strains, designated SYSU L10167T and SYSU L10180T, were isolated from sediment sampled at Dabancheng saline lake in Xinjiang, PR China. A polyphasic approach was used to clarify the taxonomic positions of the two strains. Cells of the isolates were curved ring-like, horseshoe-shaped or rod-shaped, non-motile and non-spore-forming. Cells were Gram-stain-negative, aerobic, heterotrophic and rose-pigmented. The phylogenetic trees based on 16S rRNA gene sequences showed that strains SYSU L10167T and SYSU L10180T formed a distinct lineage within the genus Cyclobacterium. Strains SYSU L10167T and SYSU L10180T showed highest similarities to Cyclobacterium jeungdonense KCTC 23150T (98.0 and 97.4%, respectively). Results of genomic analyses (including average nucleotide identity, digital DNA-DNA hybridization and the marker gene tree) and pan-genome analysis further confirmed that strains SYSU L10167T and SYSU L10180T were separate from each other and other species of the genus Cyclobacterium. The draft genomes of the isolates had sizes of 5.5-5.7 Mb and reflected their major physiological capabilities. Based on phenotypic, physiological, chemotaxonomic and genotypic characterization, we propose that the isolates represent two novel species, for which the names Cyclobacterium salsum sp. nov. and Cyclobacterium roseum sp. nov. are proposed. The type strains of the species are SYSU L10167T (=KCTC 72390T=CGMCC 1.17521T) and SYSU L10180T (=KCTC 72391T=CGMCC 1.17278T).}, } @article {pmid32455698, year = {2020}, author = {Garrido-Sanz, D and Redondo-Nieto, M and Martín, M and Rivilla, R}, title = {Comparative Genomics of the Rhodococcus Genus Shows Wide Distribution of Biodegradation Traits.}, journal = {Microorganisms}, volume = {8}, number = {5}, pages = {}, pmid = {32455698}, issn = {2076-2607}, support = {Greener Grant Agreement 826312//Horizon 2020/ ; RTI2018-0933991-B-I00//Ministerio de Ciencia e Innovación/ ; FPU14/03965//Ministerio de Educación, Cultura y Deporte/ ; }, abstract = {The genus Rhodococcus exhibits great potential for bioremediation applications due to its huge metabolic diversity, including biotransformation of aromatic and aliphatic compounds. Comparative genomic studies of this genus are limited to a small number of genomes, while the high number of sequenced strains to date could provide more information about the Rhodococcus diversity. Phylogenomic analysis of 327 Rhodococcus genomes and clustering of intergenomic distances identified 42 phylogenomic groups and 83 species-level clusters. Rarefaction models show that these numbers are likely to increase as new Rhodococcus strains are sequenced. The Rhodococcus genus possesses a small "hard" core genome consisting of 381 orthologous groups (OGs), while a "soft" core genome of 1253 OGs is reached with 99.16% of the genomes. Models of sequentially randomly added genomes show that a small number of genomes are enough to explain most of the shared diversity of the Rhodococcus strains, while the "open" pangenome and strain-specific genome evidence that the diversity of the genus will increase, as new genomes still add more OGs to the whole genomic set. Most rhodococci possess genes involved in the degradation of aliphatic and aromatic compounds, while short-chain alkane degradation is restricted to a certain number of groups, among which a specific particulate methane monooxygenase (pMMO) is only found in Rhodococcus sp. WAY2. The analysis of Rieske 2Fe-2S dioxygenases among rhodococci genomes revealed that most of these enzymes remain uncharacterized.}, } @article {pmid32453966, year = {2020}, author = {Eizenga, JM and Novak, AM and Sibbesen, JA and Heumos, S and Ghaffaari, A and Hickey, G and Chang, X and Seaman, JD and Rounthwaite, R and Ebler, J and Rautiainen, M and Garg, S and Paten, B and Marschall, T and Sirén, J and Garrison, E}, title = {Pangenome Graphs.}, journal = {Annual review of genomics and human genetics}, volume = {21}, number = {}, pages = {139-162}, doi = {10.1146/annurev-genom-120219-080406}, pmid = {32453966}, issn = {1545-293X}, support = {U01 HL137183/HL/NHLBI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; BB/S004661/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Low-cost whole-genome assembly has enabled the collection of haplotype-resolved pangenomes for numerous organisms. In turn, this technological change is encouraging the development of methods that can precisely address the sequence and variation described in large collections of related genomes. These approaches often use graphical models of the pangenome to support algorithms for sequence alignment, visualization, functional genomics, and association studies. The additional information provided to these methods by the pangenome allows them to achieve superior performance on a variety of bioinformatic tasks, including read alignment, variant calling, and genotyping. Pangenome graphs stand to become a ubiquitous tool in genomics. Although it is unclear whether they will replace linearreference genomes, their ability to harmoniously relate multiple sequence and coordinate systems will make them useful irrespective of which pangenomic models become most common in the future.}, } @article {pmid32451426, year = {2020}, author = {Kelly, LJ and Plumb, WJ and Carey, DW and Mason, ME and Cooper, ED and Crowther, W and Whittemore, AT and Rossiter, SJ and Koch, JL and Buggs, RJA}, title = {Convergent molecular evolution among ash species resistant to the emerald ash borer.}, journal = {Nature ecology & evolution}, volume = {4}, number = {8}, pages = {1116-1128}, doi = {10.1038/s41559-020-1209-3}, pmid = {32451426}, issn = {2397-334X}, support = {BB/L012162/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; *Coleoptera/genetics ; Evolution, Molecular ; *Fraxinus/genetics ; Larva ; Phylogeny ; }, abstract = {Recent studies show that molecular convergence plays an unexpectedly common role in the evolution of convergent phenotypes. We exploited this phenomenon to find candidate loci underlying resistance to the emerald ash borer (EAB, Agrilus planipennis), the United States' most costly invasive forest insect to date, within the pan-genome of ash trees (the genus Fraxinus). We show that EAB-resistant taxa occur within three independent phylogenetic lineages. In genomes from these resistant lineages, we detect 53 genes with evidence of convergent amino acid evolution. Gene-tree reconstruction indicates that, for 48 of these candidates, the convergent amino acids are more likely to have arisen via independent evolution than by another process such as hybridization or incomplete lineage sorting. Seven of the candidate genes have putative roles connected to the phenylpropanoid biosynthesis pathway and 17 relate to herbivore recognition, defence signalling or programmed cell death. Evidence for loss-of-function mutations among these candidates is more frequent in susceptible species than in resistant ones. Our results on evolutionary relationships, variability in resistance, and candidate genes for defence response within the ash genus could inform breeding for EAB resistance, facilitating ecological restoration in areas invaded by this beetle.}, } @article {pmid32448920, year = {2020}, author = {Gao, S and Wu, J and Stiller, J and Zheng, Z and Zhou, M and Wang, YG and Liu, C}, title = {Identifying barley pan-genome sequence anchors using genetic mapping and machine learning.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {133}, number = {9}, pages = {2535-2544}, doi = {10.1007/s00122-020-03615-y}, pmid = {32448920}, issn = {1432-2242}, support = {R-10191-01//Commonwealth Scientific and Industrial Research Organisation/ ; DP160104292//Centre of Excellence for Mathematical and Statistical Frontiers, Australian Research Council/ ; }, abstract = {KEY MESSAGE: We identified 1.844 million barley pan-genome sequence anchors from 12,306 genotypes using genetic mapping and machine learning. There is increasing evidence that genes from a given crop genotype are far to cover all genes in that species; thus, building more comprehensive pan-genomes is of great importance in genetic research and breeding. Obtaining a thousand-genotype scale pan-genome using deep-sequencing data is currently impractical for species like barley which has a huge and highly repetitive genome. To this end, we attempted to identify barley pan-genome sequence anchors from a large quantity of genotype-by-sequencing (GBS) datasets by combining genetic mapping and machine learning algorithms. Based on the GBS sequences from 11,166 domesticated and 1140 wild barley genotypes, we identified 1.844 million pan-genome sequence anchors. Of them, 532,253 were identified as presence/absence variation (PAV) tags. Through aligning these PAV tags to the genome of hulless barley genotype Zangqing320, our analysis resulted in a validation of 83.6% of them from the domesticated genotypes and 88.6% from the wild barley genotypes. Association analyses against flowering time, plant height and kernel size showed that the relative importance of the PAV and non-PAV tags varied for different traits. The pan-genome sequence anchors based on GBS tags can facilitate the construction of a comprehensive pan-genome and greatly assist various genetic studies including identification of structural variation, genetic mapping and breeding in barley.}, } @article {pmid32443820, year = {2020}, author = {Oshkin, IY and Miroshnikov, KK and Grouzdev, DS and Dedysh, SN}, title = {Pan-Genome-Based Analysis as a Framework for Demarcating Two Closely Related Methanotroph Genera Methylocystis and Methylosinus.}, journal = {Microorganisms}, volume = {8}, number = {5}, pages = {}, pmid = {32443820}, issn = {2076-2607}, support = {18-74-00058//Russian Science Foundation/ ; 18-34-00363//Russian Foundation for Basic Research/ ; }, abstract = {The Methylocystis and Methylosinus are two of the five genera that were included in the first taxonomic framework of methanotrophic bacteria created half a century ago. Members of both genera are widely distributed in various environments and play a key role in reducing methane fluxes from soils and wetlands. The original separation of these methanotrophs in two distinct genera was based mainly on their differences in cell morphology. Further comparative studies that explored various single-gene-based phylogenies suggested the monophyletic nature of each of these genera. Current availability of genome sequences from members of the Methylocystis/ Methylosinus clade opens the possibility for in-depth comparison of the genomic potentials of these methanotrophs. Here, we report the finished genome sequence of Methylocystis heyeri H2T and compare it to 23 currently available genomes of Methylocystis and Methylosinus species. The phylogenomic analysis confirmed that members of these genera form two separate clades. The Methylocystis/Methylosinus pan-genome core comprised 1,173 genes, with the accessory genome containing 4,941 and 11,192 genes in the shell and the cloud, respectively. Major differences between the genome-encoded environmental traits of these methanotrophs include a variety of enzymes for methane oxidation and dinitrogen fixation as well as genomic determinants for cell motility and photosynthesis.}, } @article {pmid32434538, year = {2020}, author = {Castillo, AI and Chacón-Díaz, C and Rodríguez-Murillo, N and Coletta-Filho, HD and Almeida, RPP}, title = {Impacts of local population history and ecology on the evolution of a globally dispersed pathogen.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {369}, pmid = {32434538}, issn = {1471-2164}, support = {European Union's Horizon 2020 research and innovation programme under grant agreement No 635646: POnTE (Pest Organisms Threatening Europe), the California Department of Food and Agriculture Pierce's Disease Research Program, and grant agreement No 727987: XF-ACTORS//California Department of Food and Agriculture (US)/ ; }, abstract = {BACKGROUND: Pathogens with a global distribution face diverse biotic and abiotic conditions across populations. Moreover, the ecological and evolutionary history of each population is unique. Xylella fastidiosa is a xylem-dwelling bacterium infecting multiple plant hosts, often with detrimental effects. As a group, X. fastidiosa is divided into distinct subspecies with allopatric historical distributions and patterns of multiple introductions from numerous source populations. The capacity of X. fastidiosa to successfully colonize and cause disease in naïve plant hosts varies among subspecies, and potentially, among populations. Within Central America (i.e. Costa Rica) two X. fastidiosa subspecies coexist: the native subsp. fastidiosa and the introduced subsp. pauca. Using whole genome sequences, the patterns of gene gain/loss, genomic introgression, and genetic diversity were characterized within Costa Rica and contrasted to other X. fastidiosa populations.

RESULTS: Within Costa Rica, accessory and core genome analyses showed a highly malleable genome with numerous intra- and inter-subspecific gain/loss events. Likewise, variable levels of inter-subspecific introgression were found within and between both coexisting subspecies; nonetheless, the direction of donor/recipient subspecies to the recombinant segments varied. Some strains appeared to recombine more frequently than others; however, no group of genes or gene functions were overrepresented within recombinant segments. Finally, the patterns of genetic diversity of subsp. fastidiosa in Costa Rica were consistent with those of other native populations (i.e. subsp. pauca in Brazil).

CONCLUSIONS: Overall, this study shows the importance of characterizing local evolutionary and ecological history in the context of world-wide pathogen distribution.}, } @article {pmid32431712, year = {2020}, author = {Fiuza, TS and Lima, JPMS and de Souza, GA}, title = {EpitoCore: Mining Conserved Epitope Vaccine Candidates in the Core Proteome of Multiple Bacteria Strains.}, journal = {Frontiers in immunology}, volume = {11}, number = {}, pages = {816}, pmid = {32431712}, issn = {1664-3224}, abstract = {In reverse vaccinology approaches, complete proteomes of bacteria are submitted to multiple computational prediction steps in order to filter proteins that are possible vaccine candidates. Most available tools perform such analysis only in a single strain, or a very limited number of strains. But the vast amount of genomic data had shown that most bacteria contain pangenomes, i.e., their genomic information contains core, conserved genes, and random accessory genes specific to each strain. Therefore, in reverse vaccinology methods it is of the utmost importance to define core proteins and core epitopes. EpitoCore is a decision-tree pipeline developed to fulfill that need. It provides surfaceome prediction of proteins from related strains, defines core proteins within those, calculate their immunogenicity, predicts epitopes for a given set of MHC alleles defined by the user, and then reports if epitopes are located extracellularly and if they are conserved among the core homologs. Pipeline performance is illustrated by mining peptide vaccine candidates in Mycobacterium avium hominissuis strains. From a total proteome of ~4,800 proteins per strain, EpitoCore predicted 103 highly immunogenic core homologs located at cell surface, many of those related to virulence and drug resistance. Conserved epitopes identified among these homologs allows the users to define sets of peptides with potential to immunize the largest coverage of tested HLA alleles using peptide-based vaccines. Therefore, EpitoCore is able to provide automated identification of conserved epitopes in bacterial pangenomic datasets.}, } @article {pmid32428556, year = {2020}, author = {Gohil, K and Rajput, V and Dharne, M}, title = {Pan-genomics of Ochrobactrum species from clinical and environmental origins reveals distinct populations and possible links.}, journal = {Genomics}, volume = {112}, number = {5}, pages = {3003-3012}, doi = {10.1016/j.ygeno.2020.04.030}, pmid = {32428556}, issn = {1089-8646}, abstract = {Ochrobactrum genus is comprised of soil-dwelling Gram-negative bacteria mainly reported for bioremediation of toxic compounds. Since last few years, mainly two species of this genus, O. intermedium and O. anthropi were documented for causing infections mostly in the immunocompromised patients. Despite such ubiquitous presence, study of adaptation in various niches is still lacking. Thus, to gain insights into the niche adaptation strategies, pan-genome analysis was carried out by comparing 67 genome sequences belonging to Ochrobactrum species. Pan-genome analysis revealed it is an open pan-genome indicative of the continuously evolving nature of the genus. The presence/absence of gene clusters also illustrated the unique presence of antibiotic efflux transporter genes and type IV secretion system genes in the clinical strains while the genes of solvent resistance and exporter pumps in the environmental strains. A phylogenomic investigation based on 75 core genes depicted better and robust phylogenetic resolution and topology than the 16S rRNA gene. To support the pan-genome analysis, individual genomes were also investigated for the mobile genetic elements (MGE), antibiotic resistance genes (ARG), metal resistance genes (MRG) and virulence factors (VF). The analysis revealed the presence of MGE, ARG, and MRG in all the strains which play an important role in the species evolution which is in agreement with the pan-genome analysis. The average nucleotide identity (ANI) based on the genetic relatedness between the Ochrobactrum species indicated a distinction between individual species. Interestingly, the ANI tool was able to classify the Ochrobactrum genomes to the species level which were assigned till the genus level on the NCBI database.}, } @article {pmid32427945, year = {2020}, author = {Katiyar, A and Sharma, P and Dahiya, S and Singh, H and Kapil, A and Kaur, P}, title = {Genomic profiling of antimicrobial resistance genes in clinical isolates of Salmonella Typhi from patients infected with Typhoid fever in India.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {8299}, pmid = {32427945}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/*pharmacology ; Bacterial Proteins/*genetics ; Cefixime/pharmacology/therapeutic use ; Ceftriaxone/pharmacology/therapeutic use ; DNA Gyrase/genetics ; DNA Topoisomerase IV/genetics ; *Drug Resistance, Multiple, Bacterial ; Fluoroquinolones/pharmacology/therapeutic use ; Gene Expression Regulation, Bacterial/drug effects ; Humans ; India ; Microbial Sensitivity Tests ; Phenotype ; Salmonella typhi/drug effects/*genetics/isolation & purification ; Typhoid Fever/drug therapy/*microbiology ; Whole Genome Sequencing/*methods ; }, abstract = {The development of multidrug resistance in Salmonella enterica serovar Typhi currently forms a major roadblock for the treatment of enteric fever. This poses a major health problem in endemic regions and extends to travellers returning from developing countries. The appearance of fluoroquinolone non-susceptible strains has resulted in use of ceftriaxone as drug of choice with azithromycin being recommended for uncomplicated cases of typhoid fever. A recent sporadic instance of decreased susceptibility to the latest drug regime has necessitated a detailed analysis of antimicrobial resistance genes and possible relationships with their phenotypes to facilitate selection of future treatment regimes. Whole genome sequencing (WGS) was conducted for 133 clinical isolates from typhoid patients. Sequence output files were processed for pan-genome analysis and prediction of antimicrobial resistance genes. The WGS analyses disclosed the existence of fluoroquinolone resistance conferring mutations in gyrA, gyrB, parC and parE genes of all strains. Acquired resistance determining mechanisms observed included catA1 genes for chloramphenicol resistance, dfrA7, dfrA15, sul1 and sul2 for trimethoprim-sulfamethoxazole and blaTEM-116/blaTEM-1B genes for amoxicillin. No resistance determinants were found for ceftriaxone and cefixime. The genotypes were further correlated with their respective phenotypes for chloramphenicol, ampicillin, co-trimoxazole, ciprofloxacin and ceftriaxone. A high correlation was observed between genotypes and phenotypes in isolates of S. Typhi. The pan-genome analysis revealed that core genes were enriched in metabolic functions and accessory genes were majorly implicated in pathogenesis and antimicrobial resistance. The pan-genome of S. Typhi appears to be closed (Bpan = 0.09) as analysed by Heap's law. Simpson's diversity index of 0.51 showed a lower level of genetic diversity among isolates of S. Typhi. Overall, this study augments the present knowledge that WGS can help predict resistance genotypes and eventual correlation with phenotypes, enabling the chance to spot AMR determinants for fast diagnosis and prioritize antibiotic use directly from sequence.}, } @article {pmid32424209, year = {2020}, author = {Datta, S and Saha, D and Chattopadhyay, L and Majumdar, B}, title = {Genome Comparison Identifies Different Bacillus Species in a Bast Fibre-Retting Bacterial Consortium and Provides Insights into Pectin Degrading Genes.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {8169}, pmid = {32424209}, issn = {2045-2322}, mesh = {Bacillus/classification/enzymology/*genetics/metabolism ; Bacterial Proteins/genetics/*metabolism ; Biodegradation, Environmental ; Genome Size ; *Genome, Bacterial ; Genomics ; Microbial Consortia ; Pectins/*metabolism ; Phylogeny ; Polysaccharide-Lyases/genetics/metabolism ; }, abstract = {Retting of bast fibres requires removal of pectin, hemicellulose and other non-cellulosic materials from plant stem tissues by a complex microbial community. A microbial retting consortium with high-efficiency pectinolytic bacterial strains is effective in reducing retting-time and enhancing fibre quality. We report comprehensive genomic analyses of three bacterial strains (PJRB 1, 2 and 3) of the consortium and resolve their taxonomic status, genomic features, variations, and pan-genome dynamics. The genome sizes of the strains are ~3.8 Mb with 3729 to 4002 protein-coding genes. Detailed annotations of the protein-coding genes revealed different carbohydrate-degrading CAZy classes viz. PL1, PL9, GH28, CE8, and CE12. Phylogeny and structural features of pectate lyase proteins of PJRB strains divulge their functional uniqueness and evolutionary convergence with closely related Bacillus strains. Genome-wide prediction of genomic variations revealed 12461 to 67381 SNPs, and notably many unique SNPs were localized within the important pectin metabolism genes. The variations in the pectate lyase genes possibly contribute to their specialized pectinolytic function during the retting process. These findings encompass a strong foundation for fundamental and evolutionary studies on this unique microbial degradation of decaying plant material with immense industrial significance. These have preponderant implications in plant biomass research and food industry, and also posit application in the reclamation of water pollution from plant materials.}, } @article {pmid32421490, year = {2020}, author = {Huang, CH and Chen, CC and Liou, JS and Lee, AY and Blom, J and Lin, YC and Huang, L and Watanabe, K}, title = {Genome-based reclassification of Lactobacillus casei: emended classification and description of the species Lactobacillus zeae.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {6}, pages = {3755-3762}, doi = {10.1099/ijsem.0.003969}, pmid = {32421490}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Genes, Bacterial ; Lactobacillus/*classification ; Lactobacillus casei/*classification ; Multilocus Sequence Typing ; Nucleic Acid Hybridization ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Taxonomic relationships between Lactobacillus casei, Lactobacillus paracasei and Lactobacillus zeae have long been debated. Results of previous analyses have shown that overall genome relatedness indices (such as average nucleotide identity and core nucleotide identity) between the type strains L. casei ATCC 393T and L. zeae ATCC 15820T were 94.6 and 95.3 %, respectively, which are borderline for species definition. However, the digital DNA‒DNA hybridization value was 57.3 %, which was clearly lower than the species delineation threshold of 70 %, and hence raised the possibility that L. casei could be reclassified into two species. To re-evaluate the taxonomic relationship of these taxa, multilocus sequence analysis (MLSA) based on the concatenated five housekeeping gene (dnaJ, dnaK, mutL, pheS and yycH) sequences, phylogenomic and core genome multilocus sequence typing analyses, gene presence and absence profiles using pan-genome analysis, matrix-assisted laser desorption/ionization time-of-flight mass spectrometry (MALDI-TOF MS) profiling analysis, cellular fatty acid compositions, and phenotype analysis were carried out. The results of phenotypic characterization, MLSA, whole-genome sequence-based analyses and MALDI-TOF MS profiling justified an independent species designation for the L. zeae strains, and supported an emended the description of the name of Lactobacillus zeae (ex Kuznetsov 1956) Dicks et al. 1996, with ATCC 15820T (=DSM 20178T=BCRC 17942T) as the type strain.}, } @article {pmid32407252, year = {2020}, author = {Bakhshi Ganje, M and Mackay, J and Nicolaisen, M and Shams-Bakhsh, M}, title = {Comparative Genomics, Pangenome, and Phylogenomic Analyses of Brenneria spp., and Delineation of Brenneria izadpanahii sp. nov.}, journal = {Phytopathology}, volume = {}, number = {}, pages = {PHYTO04200129FI}, doi = {10.1094/PHYTO-04-20-0129-FI}, pmid = {32407252}, issn = {0031-949X}, abstract = {Brenneria species are bacterial plant pathogens mainly affecting woody plants. Association of all members with devastating disorders (e.g., acute oak decline in Iran and United Kingdom) are due to adaptation and pathogenic behavior in response to host and environmental factors. Some species, including B. goodwinii, B. salicis, and B. nigrifluens, also show endophytic residence. Here we show that all species including novel Brenneria sp. are closely related. Gene-based and genome/pangenome-based phylogeny divide the genus into two distinct lineages, Brenneria clades A and B. The two clades were functionally distinct and were consistent with their common and special potential activities as determined via annotation of functional domains. Pangenome analysis demonstrated that the core pathogenicity factors were highly conserved, an hrp gene cluster encoding a type III secretion system was found in all species except B. corticis. An extensive repertoire of candidate virulence factors was identified. Comparative genomics indicated a repertoire of plant cell wall degrading enzymes, metabolites/antibiotics, and numerous prophages providing new insights into Brenneria-host interactions and appropriate targets for further characterization. This work not only documented the genetic differentiation of Brenneria species but also delineates a more functionally driven understanding of Brenneria by comparison with relevant Pectobacteriaceae thereby substantially enriching the extent of information available for functional genomic investigations.}, } @article {pmid32403359, year = {2020}, author = {Wang, M and Zhu, H and Kong, Z and Li, T and Ma, L and Liu, D and Shen, Q}, title = {Pan-Genome Analyses of Geobacillus spp. Reveal Genetic Characteristics and Composting Potential.}, journal = {International journal of molecular sciences}, volume = {21}, number = {9}, pages = {}, pmid = {32403359}, issn = {1422-0067}, support = {2018YFD0500201//National Key R&D Program of China/ ; 31972513//National Natural Science Foundation of China/ ; BK20150059//Jiangsu Province Natural Sciences Fund Subsidization Project/ ; KYZ201716//Fundamental Research Funds for the Central Universities/ ; }, abstract = {The genus Geobacillus is abundant in ecological diversity and is also well-known as an authoritative source for producing various thermostable enzymes. Although it is clear now that Geobacillus evolved from Bacillus, relatively little knowledge has been obtained regarding its evolutionary mechanism, which might also contribute to its ecological diversity and biotechnology potential. Here, a statistical comparison of thirty-two Geobacillus genomes was performed with a specific focus on pan- and core genomes. The pan-genome of this set of Geobacillus strains contained 14,913 genes, and the core genome contained 940 genes. The Clusters of Orthologous Groups (COG) and Carbohydrate-Active Enzymes (CAZymes) analysis revealed that the Geobacillus strains had huge potential industrial application in composting for agricultural waste management. Detailed comparative analyses showed that basic functional classes and housekeeping genes were conserved in the core genome, while genes associated with environmental interaction or energy metabolism were more enriched in the pan-genome. Therefore, the evolution of Geobacillus seems to be guided by environmental parameters. In addition, horizontal gene transfer (HGT) events among different Geobacillus species were detected. Altogether, pan-genome analysis was a useful method for detecting the evolutionary mechanism, and Geobacillus' evolution was directed by the environment and HGT events.}, } @article {pmid32393168, year = {2020}, author = {Chibani, CM and Roth, O and Liesegang, H and Wendling, CC}, title = {Genomic variation among closely related Vibrio alginolyticus strains is located on mobile genetic elements.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {354}, pmid = {32393168}, issn = {1471-2164}, support = {WE 5822/ 1-1//Deutsche Forschungsgemeinschaft/ ; WE5822/1-2//Deutsche Forschungsgemeinschaft/ ; RO462/4-2//Deutsche Forschungsgemeinschaft/ ; na//KAAD/ ; }, abstract = {BACKGROUND: Species of the genus Vibrio, one of the most diverse bacteria genera, have undergone niche adaptation followed by clonal expansion. Niche adaptation and ultimately the formation of ecotypes and speciation in this genus has been suggested to be mainly driven by horizontal gene transfer (HGT) through mobile genetic elements (MGEs). Our knowledge about the diversity and distribution of Vibrio MGEs is heavily biased towards human pathogens and our understanding of the distribution of core genomic signatures and accessory genes encoded on MGEs within specific Vibrio clades is still incomplete. We used nine different strains of the marine bacterium Vibrio alginolyticus isolated from pipefish in the Kiel-Fjord to perform a multiscale-comparative genomic approach that allowed us to investigate [1] those genomic signatures that characterize a habitat-specific ecotype and [2] the source of genomic variation within this ecotype.

RESULTS: We found that the nine isolates from the Kiel-Fjord have a closed-pangenome and did not differ based on core-genomic signatures. Unique genomic regions and a unique repertoire of MGEs within the Kiel-Fjord isolates suggest that the acquisition of gene-blocks by HGT played an important role in the evolution of this ecotype. Additionally, we found that ~ 90% of the genomic variation among the nine isolates is encoded on MGEs, which supports ongoing theory that accessory genes are predominately located on MGEs and shared by HGT. Lastly, we could show that these nine isolates share a unique virulence and resistance profile which clearly separates them from all other investigated V. alginolyticus strains and suggests that these are habitat-specific genes, required for a successful colonization of the pipefish, the niche of this ecotype.

CONCLUSION: We conclude that all nine V. alginolyticus strains from the Kiel-Fjord belong to a unique ecotype, which we named the Kiel-alginolyticus ecotype. The low sequence variation of the core-genome in combination with the presence of MGE encoded relevant traits, as well as the presence of a suitable niche (here the pipefish), suggest, that this ecotype might have evolved from a clonal expansion following HGT driven niche-adaptation.}, } @article {pmid32386604, year = {2020}, author = {Molina, L and Segura, A and Duque, E and Ramos, JL}, title = {The versatility of Pseudomonas putida in the rhizosphere environment.}, journal = {Advances in applied microbiology}, volume = {110}, number = {}, pages = {149-180}, doi = {10.1016/bs.aambs.2019.12.002}, pmid = {32386604}, issn = {0065-2164}, abstract = {This article addresses the lifestyle of Pseudomonas and focuses on how Pseudomonas putida can be used as a model system for biotechnological processes in agriculture, and in the removal of pollutants from soils. In this chapter we aim to show how a deep analysis using genetic information and experimental tests has helped to reveal insights into the lifestyle of Pseudomonads. Pseudomonas putida is a Plant Growth Promoting Rhizobacteria (PGPR) that establishes commensal relationships with plants. The interaction involves a series of functions encoded by core genes which favor nutrient mobilization, prevention of pathogen development and efficient niche colonization. Certain Pseudomonas putida strains harbor accessory genes that confer specific biodegradative properties and because these microorganisms can thrive on the roots of plants they can be exploited to remove pollutants via rhizoremediation, making the consortium plant/Pseudomonas a useful tool to combat pollution.}, } @article {pmid32381322, year = {2020}, author = {Kim, YB and Kim, JY and Song, HS and Lee, SH and Shin, NR and Bae, JW and Myoung, J and Lee, KE and Cha, IT and Rhee, JK and Roh, SW}, title = {Haloplanus rubicundus sp. nov., an extremely halophilic archaeon isolated from solar salt.}, journal = {Systematic and applied microbiology}, volume = {43}, number = {3}, pages = {126085}, doi = {10.1016/j.syapm.2020.126085}, pmid = {32381322}, issn = {1618-0984}, mesh = {Bacterial Typing Techniques ; Base Composition ; Gene Library ; Genome, Archaeal ; Genomics/methods ; Halobacteriaceae/*classification/*genetics/isolation & purification ; Phenotype ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Two extremely halophilic archaea strains, CBA1112T and CBA1113, were isolated from solar salt in Korea. The genome sizes and G+C content of CBA1112T and CBA1113 were 3.77 and 3.53Mb, and 66.0 and 66.5mol%, respectively. Phylogenetic analysis based on closely related taxa and environmental Haloplanus sequences indicated that both CBA1112T and CBA1113 strains are grouped within the genus Haloplanus. OrthoANI and in silico DNA-DNA hybridization values were below the species delineation threshold. Pan-genomic analysis showed that the two novel strains and four reference strains had 6203 pan-orthologous groups in total. Six Haloplanus strains shared 1728 core pan-genome orthologous groups, which were mainly associated with amino acid transport and metabolism and translation, ribosomal structure and biogenesis categories, and amino acid metabolism and carbohydrate metabolism related categories. The novel strain-specific pan-genome orthologous groups were mainly involved with replication, recombination and repair category and replication and repair pathway or amino acid metabolism pathway. Cells of both strains were Gram-negative and pleomorphic, and colonies were red-pigmented. The major polar lipids of both strains were phosphatidylglycerol, phosphatidylglycerol phosphate methyl ester, phosphatidylglycerol sulfate, and one glycolipid, sulfated mannosyl glucosyl diether. Based on genomic, phylogenetic, phenotypic, and chemotaxonomic features, strains CBA1112T and CBA1113 are described as novel species of the genus Haloplanus. Thus, we propose the name Haloplanus rubicundus sp. nov. The type strain is CBA1112T (=KCCM 43224T=JCM 30475T).}, } @article {pmid32375991, year = {2020}, author = {Gladstone, RA and Lo, SW and Goater, R and Yeats, C and Taylor, B and Hadfield, J and Lees, JA and Croucher, NJ and van Tonder, AJ and Bentley, LJ and Quah, FX and Blaschke, AJ and Pershing, NL and Byington, CL and Balaji, V and Hryniewicz, W and Sigauque, B and Ravikumar, KL and Almeida, SCG and Ochoa, TJ and Ho, PL and du Plessis, M and Ndlangisa, KM and Cornick, JE and Kwambana-Adams, B and Benisty, R and Nzenze, SA and Madhi, SA and Hawkins, PA and Pollard, AJ and Everett, DB and Antonio, M and Dagan, R and Klugman, KP and von Gottberg, A and Metcalf, BJ and Li, Y and Beall, BW and McGee, L and Breiman, RF and Aanensen, DM and Bentley, SD and , }, title = {Visualizing variation within Global Pneumococcal Sequence Clusters (GPSCs) and country population snapshots to contextualize pneumococcal isolates.}, journal = {Microbial genomics}, volume = {6}, number = {5}, pages = {}, pmid = {32375991}, issn = {2057-5858}, support = {098051/WT_/Wellcome Trust/United Kingdom ; 206194/WT_/Wellcome Trust/United Kingdom ; T32 AI055434/AI/NIAID NIH HHS/United States ; UL1 TR002538/TR/NCATS NIH HHS/United States ; }, abstract = {Knowledge of pneumococcal lineages, their geographic distribution and antibiotic resistance patterns, can give insights into global pneumococcal disease. We provide interactive bioinformatic outputs to explore such topics, aiming to increase dissemination of genomic insights to the wider community, without the need for specialist training. We prepared 12 country-specific phylogenetic snapshots, and international phylogenetic snapshots of 73 common Global Pneumococcal Sequence Clusters (GPSCs) previously defined using PopPUNK, and present them in Microreact. Gene presence and absence defined using Roary, and recombination profiles derived from Gubbins are presented in Phandango for each GPSC. Temporal phylogenetic signal was assessed for each GPSC using BactDating. We provide examples of how such resources can be used. In our example use of a country-specific phylogenetic snapshot we determined that serotype 14 was observed in nine unrelated genetic backgrounds in South Africa. The international phylogenetic snapshot of GPSC9, in which most serotype 14 isolates from South Africa were observed, highlights that there were three independent sub-clusters represented by South African serotype 14 isolates. We estimated from the GPSC9-dated tree that the sub-clusters were each established in South Africa during the 1980s. We show how recombination plots allowed the identification of a 20 kb recombination spanning the capsular polysaccharide locus within GPSC97. This was consistent with a switch from serotype 6A to 19A estimated to have occured in the 1990s from the GPSC97-dated tree. Plots of gene presence/absence of resistance genes (tet, erm, cat) across the GPSC23 phylogeny were consistent with acquisition of a composite transposon. We estimated from the GPSC23-dated tree that the acquisition occurred between 1953 and 1975. Finally, we demonstrate the assignment of GPSC31 to 17 externally generated pneumococcal serotype 1 assemblies from Utah via Pathogenwatch. Most of the Utah isolates clustered within GPSC31 in a USA-specific clade with the most recent common ancestor estimated between 1958 and 1981. The resources we have provided can be used to explore to data, test hypothesis and generate new hypotheses. The accessible assignment of GPSCs allows others to contextualize their own collections beyond the data presented here.}, } @article {pmid32375781, year = {2020}, author = {Bu, QT and Li, YP and Xie, H and Wang, J and Li, ZY and Chen, XA and Mao, XM and Li, YQ}, title = {Comprehensive dissection of dispensable genomic regions in Streptomyces based on comparative analysis approach.}, journal = {Microbial cell factories}, volume = {19}, number = {1}, pages = {99}, pmid = {32375781}, issn = {1475-2859}, support = {2019YFA09005400//National Key Research and Development Program/ ; 31520103901//National Natural Science Foundation of China/ ; 3173002//National Natural Science Foundation of China/ ; }, mesh = {Bacterial Proteins/genetics ; *Genome, Bacterial ; Genomics/*methods ; Multigene Family ; Phylogeny ; Sequence Deletion ; Streptomyces/*genetics ; }, abstract = {BACKGROUND: Large-scale genome reduction has been performed to significantly improve the performance of microbial chassis. Identification of the essential or dispensable genes is pivotal for genome reduction to avoid synthetic lethality. Here, taking Streptomyces as an example, we developed a combinatorial strategy for systematic identification of large and dispensable genomic regions in Streptomyces based on multi-omics approaches.

RESULTS: Phylogenetic tree analysis revealed that the model strains including S. coelicolor A3(2), S. albus J1074 and S. avermitilis MA-4680 were preferred reference for comparative analysis of candidate genomes. Multiple genome alignment suggested that the Streptomyces genomes embodied highly conserved core region and variable sub-telomeric regions, and may present symmetric or asymmetric structure. Pan-genome and functional genome analyses showed that most conserved genes responsible for the fundamental functions of cell viability were concentrated in the core region and the vast majority of abundant genes were dispersed in the sub-telomeric regions. These results suggested that large-scale deletion can be performed in sub-telomeric regions to greatly streamline the Streptomyces genomes for developing versatile chassis.

CONCLUSIONS: The integrative approach of comparative genomics, functional genomics and pan-genomics can not only be applied to perform a multi-tiered dissection for Streptomyces genomes, but also work as a universal method for systematic analysis of removable regions in other microbial hosts in order to generate more miscellaneous and versatile chassis with minimized genome for drug discovery.}, } @article {pmid32373862, year = {2020}, author = {Zwarycz, AS and Livingstone, PG and Whitworth, DE}, title = {Within-species variation in OMV cargo proteins: the Myxococcus xanthus OMV pan-proteome.}, journal = {Molecular omics}, volume = {16}, number = {4}, pages = {387-397}, doi = {10.1039/d0mo00027b}, pmid = {32373862}, issn = {2515-4184}, abstract = {Extracellular membrane vesicles are produced by all domains of life (bacteria, archaea and eukaryotes). Bacterial extracellular vesicles (outer membrane vesicles or OMVs) are produced by outer membrane blebbing, and contain proteins, nucleic acids, virulence factors, lipids and metabolites. OMV functions depend on their internal composition, therefore understanding the proteome of OMVs, and how it varies between organisms, is imperative. Here, we report a comparative proteomic profiling of OMVs from strains of Myxococcus xanthus, a predatory species of Gram-negative myxobacteria whose secretions include secondary metabolites and hydrolytic enzymes, thought to be involved in prey lysis. Ten strains were chosen for study, of which seven had genome sequences available. The remaining three strains were genome sequenced allowing definition of the core and accessory genes and genome-derived proteins found within the pan-genome and pan-proteome respectively. OMVs were isolated from each strain and proteins identified using mass spectrometry. The M. xanthus OMV pan-proteome was found to contain tens of 'core' and hundreds of 'accessory' proteins. Properties of the OMV pan-proteome were compared with those of the pan-proteome deduced from the M. xanthus pan-genome. On average, 80% of 'core' OMV proteins are encoded by genes of the core genome, yet the OMV proteomes of individual strains contain subsets of core genome-derived proteins which only partially overlap. In addition, the distribution of characteristics of vesicle proteins does not correlate with the genome-derived proteome characteristic distribution. We hypothesize that M. xanthus cells package a personalized subset of proteins whose availability is only partially dictated by the presence/absence of encoding genes within the genome.}, } @article {pmid32373098, year = {2020}, author = {Garcia-Gutierrez, E and Walsh, CJ and Sayavedra, L and Diaz-Calvo, T and Thapa, D and Ruas-Madiedo, P and Mayer, MJ and Cotter, PD and Narbad, A}, title = {Genotypic and Phenotypic Characterization of Fecal Staphylococcus epidermidis Isolates Suggests Plasticity to Adapt to Different Human Body Sites.}, journal = {Frontiers in microbiology}, volume = {11}, number = {}, pages = {688}, pmid = {32373098}, issn = {1664-302X}, abstract = {Staphylococcus epidermidis is a commensal species that has been increasingly identified as a nosocomial agent. Despite the interest, little is known about the ability of S. epidermidis isolates to adapt to different ecological niches through comparisons at genotype or phenotype levels. One niche where S. epidermidis has been reported is the human gut. Here, we present three S. epidermidis strains isolated from feces and show that they are not phylogenetically distinct from S. epidermidis isolated from other human body sites. Both gut and skin strains harbored multiple genes associated with biofilm formation and showed similar levels of biofilm formation on abiotic surfaces. High-throughput physiological tests using the BIOLOG technology showed no major metabolic differences between isolates from stool, skin, or cheese, while an isolate from bovine mastitis showed more phenotypic variation. Gut and skin isolates showed the ability to metabolize glycine-conjugated bile acids and to grow in the presence of bile, but the gut isolates exhibited faster anaerobic growth compared to isolates of skin origin.}, } @article {pmid32363038, year = {2020}, author = {Zhang, Y and Wang, J and Yajun, C and Zhou, M and Wang, W and Geng, M and Xu, D and Xu, Z}, title = {Comparative Genomics Uncovers the Genetic Diversity and Synthetic Biology of Secondary Metabolite Production of Trametes.}, journal = {Mycobiology}, volume = {48}, number = {2}, pages = {104-114}, pmid = {32363038}, issn = {1229-8093}, abstract = {The carbohydrate-active enzyme (CAZyme) genes of Trametes contribute to polysaccharide degradation. However, the comprehensive analysis of the composition of CAZymes and the biosynthetic gene clusters (BGCs) of Trametes remain unclear. Here, we conducted comparative analysis, detected the CAZyme genes, and predicted the BGCs for nine Trametes strains. Among the 82,053 homologous clusters obtained for Trametes, we identified 8518 core genes, 60,441 accessory genes, and 13,094 specific genes. A large proportion of CAZyme genes were cataloged into glycoside hydrolases, glycosyltransferases, and carbohydrate esterases. The predicted BGCs of Trametes were divided into six strategies, and the nine Trametes strains harbored 47.78 BGCs on average. Our study revealed that Trametes exhibits an open pan-genome structure. These findings provide insights into the genetic diversity and explored the synthetic biology of secondary metabolite production for Trametes.}, } @article {pmid32360114, year = {2020}, author = {Farin, W and Oñate, FP and Plassais, J and Bonny, C and Beglinger, C and Woelnerhanssen, B and Nocca, D and Magoules, F and Le Chatelier, E and Pons, N and Cervino, ACL and Ehrlich, SD}, title = {Impact of laparoscopic Roux-en-Y gastric bypass and sleeve gastrectomy on gut microbiota: a metagenomic comparative analysis.}, journal = {Surgery for obesity and related diseases : official journal of the American Society for Bariatric Surgery}, volume = {16}, number = {7}, pages = {852-862}, doi = {10.1016/j.soard.2020.03.014}, pmid = {32360114}, issn = {1878-7533}, abstract = {BACKGROUND: Bariatric surgery is an effective therapeutic procedure for morbidly obese patients. The 2 most common interventions are sleeve gastrectomy (SG) and laparoscopic Roux-en-Y gastric bypass (LRYGB).

OBJECTIVES: The aim of this study was to compare microbiome long-term microbiome after SG and LRYGB surgery in obese patients.

SETTING: University Hospital, France; University Hospital, United States; and University Hospital, Switzerland.

METHODS: Eighty-nine and 108 patients who underwent SG and LRYGB, respectively, were recruited. Stools were collected before and 6 months after surgery. Microbial DNA was analyzed with shotgun metagenomic sequencing (SOLiD 5500 xl Wildfire). MSPminer, a novel innovative tool to characterize new in silico biological entities, was used to identify 715 Metagenomic Species Pan-genome. One hundred forty-eight functional modules were analyzed using GOmixer and KEGG database.

RESULTS: Both interventions resulted in a similar increase of Shannon's diversity index and gene richness of gut microbiota, in parallel with weight loss, but the changes of microbial composition were different. LRYGB led to higher relative abundance of aero-tolerant bacteria, such as Escherichia coli and buccal species, such as Streptococcus and Veillonella spp. In contrast, anaerobes, such as Clostridium, were more abundant after SG, suggesting better conservation of anaerobic conditions in the gut. Enrichment of Akkermansia muciniphila was also observed after both surgeries. Function-level changes included higher potential for bacterial use of supplements, such as vitamin B12, B1, and iron upon LRYGB.

CONCLUSION: Microbiota changes after bariatric surgery depend on the nature of the intervention. LRYGB induces greater taxonomic and functional changes in gut microbiota than SG. Possible long-term health consequences of these alterations remain to be established.}, } @article {pmid32354325, year = {2020}, author = {Li, J and Gu, T and Li, L and Wu, X and Shen, L and Yu, R and Liu, Y and Qiu, G and Zeng, W}, title = {Complete genome sequencing and comparative genomic analyses of Bacillus sp. S3, a novel hyper Sb(III)-oxidizing bacterium.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {106}, pmid = {32354325}, issn = {1471-2180}, support = {2019JJ40361//Natural Science Foundation of Hunan Province of China/International ; 31470230, 51320105006, 51604308//National Natural Science Foundation of China/International ; 2018WK2012//Key Research and Development Projects in Hunan Province/International ; 2019zzts687//Fundamental Research Funds for the Central University of Central South University/International ; 2017RS3003//Youth Talent Foundation of Hunan Province of China/International ; }, abstract = {BACKGROUND: Antimonite [Sb(III)]-oxidizing bacterium has great potential in the environmental bioremediation of Sb-polluted sites. Bacillus sp. S3 that was previously isolated from antimony-contaminated soil displayed high Sb(III) resistance and Sb(III) oxidation efficiency. However, the genomic information and evolutionary feature of Bacillus sp. S3 are very scarce.

RESULTS: Here, we identified a 5,436,472 bp chromosome with 40.30% GC content and a 241,339 bp plasmid with 36.74% GC content in the complete genome of Bacillus sp. S3. Genomic annotation showed that Bacillus sp. S3 contained a key aioB gene potentially encoding As (III)/Sb(III) oxidase, which was not shared with other Bacillus strains. Furthermore, a wide variety of genes associated with Sb(III) and other heavy metal (loid) s were also ascertained in Bacillus sp. S3, reflecting its adaptive advantage for growth in the harsh eco-environment. Based on the analysis of phylogenetic relationship and the average nucleotide identities (ANI), Bacillus sp. S3 was proved to a novel species within the Bacillus genus. The majority of mobile genetic elements (MGEs) mainly distributed on chromosomes within the Bacillus genus. Pan-genome analysis showed that the 45 genomes contained 554 core genes and many unique genes were dissected in analyzed genomes. Whole genomic alignment showed that Bacillus genus underwent frequently large-scale evolutionary events. In addition, the origin and evolution analysis of Sb(III)-resistance genes revealed the evolutionary relationships and horizontal gene transfer (HGT) events among the Bacillus genus. The assessment of functionality of heavy metal (loid) s resistance genes emphasized its indispensable role in the harsh eco-environment of Bacillus genus. Real-time quantitative PCR (RT-qPCR) analysis indicated that Sb(III)-related genes were all induced under the Sb(III) stress, while arsC gene was down-regulated.

CONCLUSIONS: The results in this study shed light on the molecular mechanisms of Bacillus sp. S3 coping with Sb(III), extended our understanding on the evolutionary relationships between Bacillus sp. S3 and other closely related species, and further enriched the Sb(III) resistance genetic data sources.}, } @article {pmid32336352, year = {2020}, author = {Kim, E and Yang, SM and Cho, EJ and Kim, HY}, title = {Novel real-time PCR assay for Lactobacillus casei group species using comparative genomics.}, journal = {Food microbiology}, volume = {90}, number = {}, pages = {103485}, doi = {10.1016/j.fm.2020.103485}, pmid = {32336352}, issn = {1095-9998}, mesh = {DNA Primers/genetics ; DNA, Bacterial/*genetics ; Genomics/*methods ; Lactobacillus casei/classification/*genetics ; Probiotics ; Real-Time Polymerase Chain Reaction/*methods ; Sequence Analysis, DNA ; }, abstract = {The Lactobacillus casei group, which includes the closely related species L. casei, L. paracasei, L. rhamnosus, and L. chiayiensis, has been under debate regarding its taxonomy because of the difficulty in distinguishing the species from each other. In the present study, we developed a novel real-time PCR assay for distinguishing the L. casei group species. The pan-genome, as determined by the genomes of 44 strains, comprised 6789 genes, comparative genomic analysis showed that L. casei group strains were classified by species. Based on these results, species-specific genes were identified, and primers were designed from those genes. Real-time PCR clearly distinguished each species of the L. casei group and specifically amplified only to the target species. The method was applied to 29 probiotic products, and the detected results and label claims were compared. Total 23 products were in accordance with the label claims, and the remaining products contained species different from those stated in the label claims. Our method can rapidly and accurately distinguish the L. casei group species in a single reaction. Hence, our assay can be applied to identify L. casei group species from food or environmental samples and to accurately determine the nomenclature of the species.}, } @article {pmid32331872, year = {2020}, author = {Bickhart, DM and McClure, JC and Schnabel, RD and Rosen, BD and Medrano, JF and Smith, TPL}, title = {Symposium review: Advances in sequencing technology herald a new frontier in cattle genomics and genome-enabled selection.}, journal = {Journal of dairy science}, volume = {103}, number = {6}, pages = {5278-5290}, doi = {10.3168/jds.2019-17693}, pmid = {32331872}, issn = {1525-3198}, mesh = {Animals ; Cattle/*genetics ; *Genome ; Genomics/*instrumentation ; *Selection, Genetic ; Sequence Analysis, DNA/*veterinary ; }, abstract = {The cattle reference genome assembly has underpinned major innovations in beef and dairy genetics through genome-enabled selection, including removal of deleterious recessive variants and selection for favorable alleles affecting quantitative production traits. The initial reference assemblies, up to and including UMD3.1 and Btau4.1, were based on a combination of clone-by-clone sequencing of bacterial artificial chromosome clones generated from blood DNA of a Hereford bull and whole-genome shotgun sequencing of blood DNA from his inbred daughter/granddaughter named L1 Dominette 01449 (Dominette). The approach introduced assembly gaps, misassemblies, and errors, and it limited the ability to assemble regions that undergo rearrangement in blood cells, such as immune gene clusters. Nonetheless, the reference supported the creation of genotyping tools and provided a basis for many studies of gene expression. Recently, long-read sequencing technologies have emerged that facilitated a re-assembly of the reference genome, using lung tissue from Dominette to resolve many of the problems and providing a bridge to place historical studies in common context. The new reference, ARS-UCD1.2, successfully assembled germline immune gene clusters and improved overall continuity (i.e., reduction of gaps and inversions) by over 250-fold. This reference properly places nearly all of the legacy genetic markers used for over a decade in the industry. In this review, we discuss the improvements made to the cattle reference; remaining issues present in the assembly; tools developed to support genome-based studies in beef and dairy cattle; and the emergence of newer genome assembly methods that are producing even higher-quality assemblies for other breeds of cattle at a fraction of the cost. The new frontier for cattle genomics research will likely include a transition from the individual Hereford reference genome, to a "pan-genome" reference, representing all the DNA segments existing in commonly used cattle breeds, bringing the cattle reference into line with the current direction of human genome research.}, } @article {pmid32320376, year = {2020}, author = {Guillier, L and Gourmelon, M and Lozach, S and Cadel-Six, S and Vignaud, ML and Munck, N and Hald, T and Palma, F}, title = {AB_SA: Accessory genes-Based Source Attribution - tracing the source of Salmonella enterica Typhimurium environmental strains.}, journal = {Microbial genomics}, volume = {6}, number = {7}, pages = {}, pmid = {32320376}, issn = {2057-5858}, abstract = {The partitioning of pathogenic strains isolated in environmental or human cases to their sources is challenging. The pathogens usually colonize multiple animal hosts, including livestock, which contaminate the food-production chain and the environment (e.g. soil and water), posing an additional public-health burden and major challenges in the identification of the source. Genomic data opens up new opportunities for the development of statistical models aiming to indicate the likely source of pathogen contamination. Here, we propose a computationally fast and efficient multinomial logistic regression source-attribution classifier to predict the animal source of bacterial isolates based on 'source-enriched' loci extracted from the accessory-genome profiles of a pangenomic dataset. Depending on the accuracy of the model's self-attribution step, the modeller selects the number of candidate accessory genes that best fit the model for calculating the likelihood of (source) category membership. The Accessory genes-Based Source Attribution (AB_SA) method was applied to a dataset of strains of Salmonella enterica Typhimurium and its monophasic variant (S. enterica 1,4,[5],12:i:-). The model was trained on 69 strains with known animal-source categories (i.e. poultry, ruminant and pig). The AB_SA method helped to identify 8 genes as predictors among the 2802 accessory genes. The self-attribution accuracy was 80 %. The AB_SA model was then able to classify 25 of the 29 S. enterica Typhimurium and S. enterica 1,4,[5],12:i:- isolates collected from the environment (considered to be of unknown source) into a specific category (i.e. animal source), with more than 85 % of probability. The AB_SA method herein described provides a user-friendly and valuable tool for performing source-attribution studies in only a few steps. AB_SA is written in R and freely available at https://github.com/lguillier/AB_SA.}, } @article {pmid32307574, year = {2020}, author = {Teixeira, P and Tacão, M and Baraúna, RA and Silva, A and Henriques, I}, title = {Genomic analysis of Chromobacterium haemolyticum: insights into the species resistome, virulence determinants and genome plasticity.}, journal = {Molecular genetics and genomics : MGG}, volume = {295}, number = {4}, pages = {1001-1012}, doi = {10.1007/s00438-020-01676-8}, pmid = {32307574}, issn = {1617-4623}, support = {CEECIND/01304/2017//Fundação para a Ciência e a Tecnologia/ ; SFRH/BD/132046/2017//Fundação para a Ciência e a Tecnologia/ ; UID/AMB/50017/2019//Fundação para a Ciência e a Tecnologia/ ; }, mesh = {Anti-Bacterial Agents/adverse effects/therapeutic use ; Bacterial Infections/drug therapy/*genetics/microbiology ; Chromobacterium/classification/drug effects/*genetics/pathogenicity ; Drug Resistance, Multiple, Bacterial/*genetics ; Genome, Bacterial/drug effects/genetics ; Genomics ; Humans ; Microbial Sensitivity Tests ; *Phylogeny ; Virulence/genetics ; }, abstract = {The increasing number of Chromobacterium haemolyticum human infection reports, especially in tropical regions and connected with environmental sources, resulted in an urge to better describe this species. This study aimed to characterize the C. haemolyticum resistome, virulence determinants and genetic platforms related with genome plasticity. A comparative genomic analysis was conducted between clinical C. haemolyticum genomes publicly available and the genome of an environmental isolate obtained in this study. The pangenome of C. haemolyticum was calculated and a total of 3378 core genes were predicted in its core genome, corresponding to 51.7% of the pangenome. Genetic determinants putatively encoding resistance to beta-lactams, fosfomycin, aminoglycosides and trimethoprim were predicted in all genomes, possibly constituting the intrinsic resistome of this species. In terms of resistance to beta-lactams, 4 genes were predicted encoding beta-lactamases of classes A, C and D. Moreover, the analysis of Chromobacterium genomes and C. haemolyticum environmental isolates reinforced the role of this genus as progenitor of the blaKPC gene. Putative virulence factors (VFs) were predicted in all genomes, related to adherence, toxins production, colonization and cell invasion. Secretion systems, including type III, were detected. A significant number of transposases and genomic islands were predicted in C. haemolyticum, in some cases above the average reported for Gram-negative bacterial genomes. We conclude that C. haemolyticum strains, including those of environmental origin, present a noteworthy collection of antibiotic resistance genes and VFs. Furthermore, sequences related to gene mobility and genome plasticity suggest high adaptability potential and a possible role as disseminator of antibiotic resistance.}, } @article {pmid32302403, year = {2020}, author = {Gounot, JS and Neuvéglise, C and Freel, KC and Devillers, H and Piškur, J and Friedrich, A and Schacherer, J}, title = {High Complexity and Degree of Genetic Variation in Brettanomyces bruxellensis Population.}, journal = {Genome biology and evolution}, volume = {12}, number = {6}, pages = {795-807}, pmid = {32302403}, issn = {1759-6653}, abstract = {Genome-wide characterization of genetic variants of a large population of individuals within the same species is essential to have a deeper insight into its evolutionary history as well as the genotype-phenotype relationship. Population genomic surveys have been performed in multiple yeast species, including the two model organisms, Saccharomyces cerevisiae and Schizosaccharomyces pombe. In this context, we sought to characterize at the population level the Brettanomyces bruxellensis yeast species, which is a major cause of wine spoilage and can contribute to the specific flavor profile of some Belgium beers. We have completely sequenced the genome of 53 B. bruxellensis strains isolated worldwide. The annotation of the reference genome allowed us to define the gene content of this species. As previously suggested, our genomic data clearly highlighted that genetic diversity variation is related to ploidy level, which is variable in the B. bruxellensis species. Genomes are punctuated by multiple loss-of-heterozygosity regions, whereas aneuploidies as well as segmental duplications are uncommon. Interestingly, triploid genomes are more prone to gene copy number variation than diploids. Finally, the pangenome of the species was reconstructed and was found to be small with few accessory genes compared with S. cerevisiae. The pangenome is composed of 5,409 ORFs (open reading frames) among which 5,106 core ORFs and 303 ORFs that are variable within the population. All these results highlight the different trajectories of species evolution and consequently the interest of establishing population genomic surveys in more populations.}, } @article {pmid32299360, year = {2020}, author = {Dziadkiewicz, P and Dojer, N}, title = {Getting insight into the pan-genome structure with PangTree.}, journal = {BMC genomics}, volume = {21}, number = {Suppl 2}, pages = {274}, pmid = {32299360}, issn = {1471-2164}, abstract = {BACKGROUND: The term pan-genome was proposed to denominate collections of genomic sequences jointly analyzed or used as a reference. The constant growth of genomic data intensifies development of data structures and algorithms to investigate pan-genomes efficiently.

RESULTS: This work focuses on providing a tool for discovering and visualizing the relationships between the sequences constituting a pan-genome. A new structure to represent such relationships - called affinity tree - is proposed. Each node of this tree has assigned a subset of genomes, as well as their homogeneity level and averaged consensus sequence. Moreover, subsets assigned to sibling nodes form a partition of the genomes assigned to their parent.

CONCLUSIONS: Functionality of affinity tree is demonstrated on simulated data and on the Ebola virus pan-genome. Furthermore, two software packages are provided: PangTreeBuild constructs affinity tree, while PangTreeVis presents its result.}, } @article {pmid32296571, year = {2020}, author = {Yu, Y and Wei, C}, title = {A powerful HUPAN on a pan-genome study: significance and perspectives.}, journal = {Cancer biology & medicine}, volume = {17}, number = {1}, pages = {1-5}, pmid = {32296571}, issn = {2095-3941}, } @article {pmid32291353, year = {2020}, author = {Moulana, A and Anderson, RE and Fortunato, CS and Huber, JA}, title = {Selection Is a Significant Driver of Gene Gain and Loss in the Pangenome of the Bacterial Genus Sulfurovum in Geographically Distinct Deep-Sea Hydrothermal Vents.}, journal = {mSystems}, volume = {5}, number = {2}, pages = {}, pmid = {32291353}, issn = {2379-5077}, abstract = {Microbial genomes have highly variable gene content, and the evolutionary history of microbial populations is shaped by gene gain and loss mediated by horizontal gene transfer and selection. To evaluate the influence of selection on gene content variation in hydrothermal vent microbial populations, we examined 22 metagenome-assembled genomes (MAGs) (70 to 97% complete) from the ubiquitous vent Epsilonbacteraeota genus Sulfurovum that were recovered from two deep-sea hydrothermal vent regions, Axial Seamount in the northeastern Pacific Ocean (13 MAGs) and the Mid-Cayman Rise in the Caribbean Sea (9 MAGs). Genes involved in housekeeping functions were highly conserved across Sulfurovum lineages. However, genes involved in environment-specific functions, and in particular phosphate regulation, were found mostly in Sulfurovum genomes from the Mid-Cayman Rise in the low-phosphate Atlantic Ocean environment, suggesting that nutrient limitation is an important selective pressure for these bacteria. Furthermore, genes that were rare within the pangenome were more likely to undergo positive selection than genes that were highly conserved in the pangenome, and they also appeared to have experienced gene-specific sweeps. Our results suggest that selection is a significant driver of gene gain and loss for dominant microbial lineages in hydrothermal vents and highlight the importance of factors like nutrient limitation in driving microbial adaptation and evolution.IMPORTANCE Microbes can alter their gene content through the gain and loss of genes. However, there is some debate as to whether natural selection or neutral processes play a stronger role in molding the gene content of microbial genomes. In this study, we examined variation in gene content for the Epsilonbacteraeota genus Sulfurovum from deep-sea hydrothermal vents, which are dynamic habitats known for extensive horizontal gene transfer within microbial populations. Our results show that natural selection is a strong driver of Sulfurovum gene content and that nutrient limitation in particular has shaped the Sulfurovum genome, leading to differences in gene content between ocean basins. Our results also suggest that recently acquired genes undergo stronger selection than genes that were acquired in the more distant past. Overall, our results highlight the importance of natural selection in driving the evolution of microbial populations in these dynamic habitats.}, } @article {pmid32279278, year = {2020}, author = {Oh, YJ and Kim, JY and Jo, HE and Park, HK and Lim, SK and Kwon, MS and Choi, HJ}, title = {Lentibacillus cibarius sp. nov., isolated from kimchi, a Korean fermented food.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {58}, number = {5}, pages = {387-394}, pmid = {32279278}, issn = {1976-3794}, abstract = {Two bacterial strains designated NKC220-2T and NKC851-2 were isolated from commercial kimchi from different areas in Korea. The strains were Gram-positive, aerobic, oxidaseand catalase-positive, rod-shaped, spore-forming, non-motile, and halophilic bacteria. Both strains grew without NaCl, unlike type species in the genus Lentibacillus. The optimal pH for growth was 8.0, higher than that of the type species in the genus Lentibacillus, although growth was observed at pH 5.5-9.0. 16S rRNA gene sequence-based phylogenetic analysis indicated that the two strains (99.3-99.9% similarity) are grouped within the genus Lentibacillus and most closely related to Lentibacillus juripiscarius IS40-3T (97.4-97.6% similarity) isolated from fish sauce in Thailand. OrthoANI value between two novel strains and Lentibacillus lipolyticus SSKP1-9T (79.5-79.6% similarity) was far lower than the species demarcation threshold. Comparative genomic analysis displayed differences between the two strains as well as among other strains belonging to Lentibacillus. Furthermore, each isolate had strain-specific groups of orthologous genes based on pangenome analysis. Genomic G + C contents of strains NKC-220-2T and NKC851-2 were 41.9 and 42.2 mol%, respectively. The strains contained meso-diaminopimelic acid in their cell walls, and the major menaquinone was menaquinone-7. Phosphatidylglycerol, diphosphatidylglycerol, and an unidentified glycolipid, aminophospholipid, and phospholipid were the major polar lipid components of both strains. The major cellular fatty acids of the strains were anteiso-C15:0 and anteiso-C17:0. Based on phenotypic, genomic, phylogenetic, and chemotaxonomic features, strains NKC220-2T and NKC851-2 represent novel species of the genus Lentibacillus, for which the name Lentibacillus cibarius sp. nov. is proposed. The type strain is NKC220-2T (= KACC 21232T = JCM 33390T).}, } @article {pmid32278144, year = {2020}, author = {Zeb, S and Gulfam, SM and Bokhari, H}, title = {Comparative core/pan genome analysis of Vibrio cholerae isolates from Pakistan.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {82}, number = {}, pages = {104316}, doi = {10.1016/j.meegid.2020.104316}, pmid = {32278144}, issn = {1567-7257}, abstract = {Cholera is an endemic disease in many regions of Asia including, Pakistan. Vibrio cholerae, the causative agent of cholera, is considered as one of the best adapted bacteria due to its ability to withstand severe environmental stresses. The V. cholerae genome is very plastic with many gene additions and deletions. In this study, we sought to understand the diversity of V. cholerae genes in two Pakistani subclades [e.g. Pakistani subclade I (PSC I) and Pakistani subclade II (PSC II)]. We have analyzed 44 PSC I and 56 PSC II strains, respectively. By analyzing our data, it was concluded that subclade group 2 (PSC II) has 2967 core genes repositories, while the PSC 1 group has just 1062 core genes. It was observed that the pangenome in the PSC II group is open while the pan-genome in PSC I are closed. It was also noted that the number of accessory genes (n = 2500) is higher in the PSC I group compared to the PSC II group (n = 550). Furthermore, analysis extended to the study of unique gene profiles suggested that all strains of the PSC II group have unique genes. One strain among the PSC II group had a high number of unique genes (n = 2612). However, in the PSC I group, only a few strains had unique genes with a maximum of 86 unique genes being found in a single strain. Core phylogeny of PSC I indicated that just three groups initially arose from a single common ancestor. At the same time, a complex pattern of evolution was found in the PSC II phylogenetic tree based on core gene information. This comparative genomic analysis has revealed 'waves' of V. cholerae evolution and information on its transmission and ability to modify its genetic content to survive in different environmental conditions. Here, we have investigated how the versatility of V. cholerae, a bacterium that persists across different habitats, is reflected in its genome. The data generated during the study should be extremely beneficial in defining the evolutionary relationship as well as diversity between V. cholerae subclades. It will also benefit epidemiological studies and the design of better treatment strategies for controlling epidemics.}, } @article {pmid32278068, year = {2020}, author = {Zhao, J and Liu, C and Liu, Y and Zhang, Y and Xiong, Z and Fan, Y and Zou, X and Lu, B and Cao, B}, title = {Genomic characteristics of clinically important ST11 Klebsiella pneumoniae strains worldwide.}, journal = {Journal of global antimicrobial resistance}, volume = {22}, number = {}, pages = {519-526}, doi = {10.1016/j.jgar.2020.03.023}, pmid = {32278068}, issn = {2213-7173}, abstract = {OBJECTIVES: ST11 Klebsiella pneumoniae is among the most important clinical pathogens in China, and KL47 and KL64 are the dominant K types of these strains. Understanding the genomic characteristics of these strains would be critical to their anti-infection treatment.

METHODS: There were 364 genome sequences of ST11 K. pneumoniae strains isolated and collected from 13 countries from 2003 to 2018. These genome sequences included 338 downloaded from the National Center for Biotechnology Information (NCBI) database and 26 newly sequenced. Phylogenetic analyses of pan-genome and unique genes, and resistance and virulence gene analyses, were carried out to elucidate the molecular characteristics of these strains.

RESULTS: A total of 19 732 genes were identified from the 364 ST11 strains, and the pan-genome was open, indicating the genetic diversity of ST11 K. pneumoniae. These strains were clustered into three clades. Clade 1 contained the most various K types (14/15, 93.3%) and unique genes. KL47 and KL64 were the dominant K types of clades 2 and 3, accounting for 100% and 99.4% of strains in each clade, respectively. KL64 strains contained the most virulence genes, including iucA and rmpA, and the two genes tend to coexist. In addition, strains in clade 1 were isolated from all 13 countries; the strains in clades 2 and 3 were isolated mainly from China.

CONCLUSIONS: The ST11 K. pneumoniae strain of KL64 is a newly emerging superbug, with more resistance and virulence genes in China; this was significantly different from other countries, and we should be alert to the dissemination of this subclone.}, } @article {pmid32265447, year = {2020}, author = {Zhou, Y and Chebotarov, D and Kudrna, D and Llaca, V and Lee, S and Rajasekar, S and Mohammed, N and Al-Bader, N and Sobel-Sorenson, C and Parakkal, P and Arbelaez, LJ and Franco, N and Alexandrov, N and Hamilton, NRS and Leung, H and Mauleon, R and Lorieux, M and Zuccolo, A and McNally, K and Zhang, J and Wing, RA}, title = {A platinum standard pan-genome resource that represents the population structure of Asian rice.}, journal = {Scientific data}, volume = {7}, number = {1}, pages = {113}, pmid = {32265447}, issn = {2052-4463}, mesh = {Crops, Agricultural/genetics ; Genetic Variation ; *Genome, Plant ; Genomics ; Oryza/*genetics ; }, abstract = {As the human population grows from 7.8 billion to 10 billion over the next 30 years, breeders must do everything possible to create crops that are highly productive and nutritious, while simultaneously having less of an environmental footprint. Rice will play a critical role in meeting this demand and thus, knowledge of the full repertoire of genetic diversity that exists in germplasm banks across the globe is required. To meet this demand, we describe the generation, validation and preliminary analyses of transposable element and long-range structural variation content of 12 near-gap-free reference genome sequences (RefSeqs) from representatives of 12 of 15 subpopulations of cultivated Asian rice. When combined with 4 existing RefSeqs, that represent the 3 remaining rice subpopulations and the largest admixed population, this collection of 16 Platinum Standard RefSeqs (PSRefSeq) can be used as a template to map resequencing data to detect virtually all standing natural variation that exists in the pan-genome of cultivated Asian rice.}, } @article {pmid32245763, year = {2020}, author = {Smith, EA and Miller, EA and Weber, BP and Munoz Aguayo, J and Flores Figueroa, C and Huisinga, J and Nezworski, J and Kromm, M and Wileman, B and Johnson, TJ}, title = {Genomic Landscape of Ornithobacterium rhinotracheale in Commercial Turkey Production in the United States.}, journal = {Applied and environmental microbiology}, volume = {86}, number = {11}, pages = {}, pmid = {32245763}, issn = {1098-5336}, mesh = {Animal Husbandry ; Animals ; Cross-Sectional Studies ; Flavobacteriaceae Infections/microbiology/veterinary ; *Genome, Bacterial ; Midwestern United States ; Ornithobacterium/*genetics ; Poultry Diseases/microbiology ; Retrospective Studies ; Southeastern United States ; Turkeys/*microbiology ; }, abstract = {Ornithobacterium rhinotracheale is a causative agent of respiratory tract infections in avian hosts worldwide but is a particular problem for commercial turkey production. Little is known about the ecologic and evolutionary dynamics of O. rhinotracheale, which makes prevention and control of this pathogen a challenge. The purpose of this study was to gain insight into the genetic relationships between O. rhinotracheale populations through comparative genomics of clinical isolates from different U.S. turkey producers. O. rhinotracheale clinical isolates were collected from four major U.S. turkey producers and several independent turkey growers from the upper Midwest and Southeast, and whole-genome sequencing was performed. Genomes were compared phylogenetically using single nucleotide polymorphism (SNP)-based analysis, and then assembly and annotations were performed to identify genes encoding putative virulence factors and antimicrobial resistance determinants. A pangenome approach was also used to establish a core set of genes consistently present in O. rhinotracheale and to highlight differences in gene content between phylogenetic clades. A total of 1,457 nonrecombinant SNPs were identified from 157 O. rhinotracheale genomes, and four distinct phylogenetic clades were identified. Isolates clustered by company on the phylogenetic tree, however, and each company had isolates in multiple clades with similar collection dates, indicating that there are multiple O. rhinotracheale strains circulating within each of the companies examined. Additionally, several antimicrobial resistance proteins, putative virulence factors, and the pOR1 plasmid were associated with particular clades and multilocus sequence types, which may explain why the same strains seem to have persisted in the same turkey operations for decades.IMPORTANCE The whole-genome approach enhances our understanding of evolutionary relationships between clinical Ornithobacterium rhinotracheale isolates from different commercial turkey producers and allows for identification of genes associated with virulence, antimicrobial resistance, or mobile genetic elements that are often excluded using traditional typing methods. Additionally, differentiating O. rhinotracheale isolates at the whole-genome level may provide insight into selection of the most appropriate autogenous vaccine strain, or groups of strains, for a given population of clinical isolates.}, } @article {pmid32239329, year = {2020}, author = {Zhu, L and Zhao, M and Chen, M and Li, L and Jiang, Y and Liu, S and Jiang, Y and Wang, K and Wang, Y and Sun, C and Chen, J and Chen, P and Lei, J and Su, Y and Wang, Y and Zhang, M}, title = {The bHLH gene family and its response to saline stress in Jilin ginseng, Panax ginseng C.A. Meyer.}, journal = {Molecular genetics and genomics : MGG}, volume = {295}, number = {4}, pages = {877-890}, doi = {10.1007/s00438-020-01658-w}, pmid = {32239329}, issn = {1617-4623}, support = {2013AA102604-3//China 863 Project/ ; 20170101010JC//the Bureau of Science and Technology of Jilin Province/ ; 20180414077GH//the Bureau of Science and Technology of Jilin Province/ ; 20180101027JC//the Bureau of Science and Technology of Jilin Province/ ; 20190201264JC//the Bureau of Science and Technology of Jilin Province/ ; 20190103104JH//the Bureau of Science and Technology of Jilin Province/ ; 2016C064//the Development and Reform Commission of Jilin Province/ ; 2018C047-3//the Development and Reform Commission of Jilin Province/ ; }, mesh = {Alternative Splicing/genetics ; Basic Helix-Loop-Helix Transcription Factors/*genetics ; China ; *Evolution, Molecular ; Gene Expression Regulation, Plant/genetics ; Gene Ontology ; Multigene Family/genetics ; Panax/drug effects/*genetics/growth & development ; Phylogeny ; Saline Solution/toxicity ; Salt Stress/*genetics ; Transcription Factors ; }, abstract = {Basic helix-loop-helix (bHLH) gene family is a gene family of transcription factors that plays essential roles in plant growth and development, secondary metabolism and response to biotic and abiotic stresses. Therefore, a comprehensive knowledge of the bHLH gene family is paramount to understand the molecular mechanisms underlying these processes and develop advanced technologies to manipulate the processes efficiently. Ginseng, Panax ginseng C.A. Meyer, is a well-known medicinal herb; however, little is known about the bHLH genes (PgbHLH) in the species. Here, we identified 137 PgbHLH genes from Jilin ginseng cultivar, Damaya, widely cultivated in Jilin, China, of which 50 are newly identified by pan-genome analysis. These 137 PgbHLH genes were phylogenetically classified into 26 subfamilies, suggesting their sequence diversification. They are alternatively spliced into 366 transcripts in a 4-year-old plant and involved in 11 functional subcategories of the gene ontology, indicating their functional differentiation in ginseng. The expressions of the PgbHLH genes dramatically vary spatio-temporally and across 42 genotypes, but they are still somehow functionally correlated. Moreover, the PgbHLH gene family, at least some of its genes, is shown to have roles in plant response to the abiotic stress of saline. These results provide a new insight into the evolution and functional differentiation of the bHLH gene family in plants, new bHLH genes to the PgbHLH gene family, and saline stress-responsive genes for genetic improvement in ginseng and other plant species.}, } @article {pmid32228746, year = {2020}, author = {Niu, XK and Narsing Rao, MP and Dong, ZY and Kan, Y and Li, QR and Huang, J and Zhao, L and Wang, MZ and Shen, ZP and Kang, YQ and Li, WJ}, title = {Vulcaniibacterium gelatinicum sp. nov., a moderately thermophilic bacterium isolated from a hot spring.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {70}, number = {3}, pages = {1571-1577}, doi = {10.1099/ijsem.0.003934}, pmid = {32228746}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; Genes, Bacterial ; Hot Springs/*microbiology ; Phospholipids/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Ubiquinone/chemistry ; Water Microbiology ; Xanthomonadaceae/*classification/isolation & purification ; }, abstract = {The present study aimed to determine the taxonomic positions of strains designated R-5-52-3T, R-5-33-5-1-2, R-5-48-2 and R-5-51-4 isolated from hot spring water samples. Cells of these strains were Gram-stain-negative, non-motile and rod-shaped. The strains shared highest 16S rRNA gene sequence similarity with Vulcaniibacterium thermophilum KCTC 32020T (95.1%). Growth occurred at 28-55 °C, at pH 6-8 and with up to 3 % (w/v) NaCl. DNA fingerprinting, biochemical, phylogenetic and 16S rRNA gene sequence analyses suggested that R-5-52-3T, R-5-33-5-1-2, R-5-48-2 and R-5-51-4 were different strains but belonged to the same species. Hence, R-5-52-3T was chosen for further analysis and R-5-33-5-1-2, R-5-48-2 and R-5-51-4 were considered as additional strains of this species. R-5-52-3T possessed Q-8 as the only quinone and iso-C15:0, iso-C11:0, C16 : 0 and iso-C17 : 0 as major fatty acids. The polar lipids were diphosphatidylglycerol, phosphatidylglycerol, phosphatidylethanolamine, unidentified polar lipids and two unidentified phospholipids. The genomic G+C content was 71.6 mol%. Heat shock proteins (e.g. Hsp20, GroEL, DnaK and Clp ATPases) were noted in the R-5-52-3T genome, which could suggest its protection in the hot spring environment. Pan-genome analysis showed the number of singleton gene clusters among Vulcaniibacterium members varied. Average nucleotide identity (ANI) values between R-5-52-3T, Vulcaniibacterium tengchongense YIM 77520T and V. thermophilum KCTC 32020T were 80.1-85.8 %, which were below the cut-off level (95-96 %) recommended as the ANI criterion for interspecies identity. Thus, based on the above results, strain R-5-52-3T represents a novel species of the genus Vulcaniibacterium, for which the name Vulcaniibacterium gelatinicum sp. nov. is proposed. The type strain is R-5-52-3T (=KCTC 72061T=CGMCC 1.16678T).}, } @article {pmid32198762, year = {2020}, author = {Dunning, LT and Christin, PA}, title = {Reticulate evolution, lateral gene transfer, and innovation in plants.}, journal = {American journal of botany}, volume = {107}, number = {4}, pages = {541-544}, doi = {10.1002/ajb2.1452}, pmid = {32198762}, issn = {1537-2197}, mesh = {Evolution, Molecular ; *Gene Transfer, Horizontal ; Phylogeny ; *Plants ; }, } @article {pmid32196089, year = {2020}, author = {Muthukumarasamy, U and Preusse, M and Kordes, A and Koska, M and Schniederjans, M and Khaledi, A and Häussler, S}, title = {Single-Nucleotide Polymorphism-Based Genetic Diversity Analysis of Clinical Pseudomonas aeruginosa Isolates.}, journal = {Genome biology and evolution}, volume = {12}, number = {4}, pages = {396-406}, pmid = {32196089}, issn = {1759-6653}, abstract = {Extensive use of next-generation sequencing has the potential to transform our knowledge on how genomic variation within bacterial species impacts phenotypic versatility. Because different environments have unique selection pressures, they drive divergent evolution. However, there is also parallel or convergent evolution of traits in independent bacterial isolates inhabiting similar environments. The application of tools to describe population-wide genomic diversity provides an opportunity to measure the predictability of genetic changes underlying adaptation. Here, we describe patterns of sequence variations in the core genome among 99 individual Pseudomonas aeruginosa clinical isolates and identified single-nucleotide polymorphisms that are the basis for branching of the phylogenetic tree. We also identified single-nucleotide polymorphisms that were acquired independently, in separate lineages, and not through inheritance from a common ancestor. Although our results demonstrate that the Pseudomonas aeruginosa core genome is highly conserved and in general, not subject to adaptive evolution, instances of parallel evolution will provide an opportunity to uncover genetic changes that underlie phenotypic diversity.}, } @article {pmid32191703, year = {2020}, author = {Gautreau, G and Bazin, A and Gachet, M and Planel, R and Burlot, L and Dubois, M and Perrin, A and Médigue, C and Calteau, A and Cruveiller, S and Matias, C and Ambroise, C and Rocha, EPC and Vallenet, D}, title = {PPanGGOLiN: Depicting microbial diversity via a partitioned pangenome graph.}, journal = {PLoS computational biology}, volume = {16}, number = {3}, pages = {e1007732}, pmid = {32191703}, issn = {1553-7358}, mesh = {Algorithms ; Bacteria/classification/genetics ; Genome, Bacterial/*genetics ; Genomics/*methods ; Multivariate Analysis ; *Software ; }, abstract = {The use of comparative genomics for functional, evolutionary, and epidemiological studies requires methods to classify gene families in terms of occurrence in a given species. These methods usually lack multivariate statistical models to infer the partitions and the optimal number of classes and don't account for genome organization. We introduce a graph structure to model pangenomes in which nodes represent gene families and edges represent genomic neighborhood. Our method, named PPanGGOLiN, partitions nodes using an Expectation-Maximization algorithm based on multivariate Bernoulli Mixture Model coupled with a Markov Random Field. This approach takes into account the topology of the graph and the presence/absence of genes in pangenomes to classify gene families into persistent, cloud, and one or several shell partitions. By analyzing the partitioned pangenome graphs of isolate genomes from 439 species and metagenome-assembled genomes from 78 species, we demonstrate that our method is effective in estimating the persistent genome. Interestingly, it shows that the shell genome is a key element to understand genome dynamics, presumably because it reflects how genes present at intermediate frequencies drive adaptation of species, and its proportion in genomes is independent of genome size. The graph-based approach proposed by PPanGGOLiN is useful to depict the overall genomic diversity of thousands of strains in a compact structure and provides an effective basis for very large scale comparative genomics. The software is freely available at https://github.com/labgem/PPanGGOLiN.}, } @article {pmid32188120, year = {2020}, author = {Hasni, I and Andréani, J and Colson, P and La Scola, B}, title = {Description of Virulent Factors and Horizontal Gene Transfers of Keratitis-Associated Amoeba Acanthamoeba Triangularis by Genome Analysis.}, journal = {Pathogens (Basel, Switzerland)}, volume = {9}, number = {3}, pages = {}, pmid = {32188120}, issn = {2076-0817}, support = {10-IAHU-03//Agence Nationale de la Recherche/ ; }, abstract = {Acanthamoeba triangularis strain SH 621 is a free-living amoeba belonging to Acanthamoeba ribo-genotype T4. This ubiquitous protist is among the free-living amoebas responsible for Acanthamoeba keratitis, a severe infection of human cornea. Genome sequencing and genomic comparison were carried out to explore the biological functions and to better understand the virulence mechanism related to the pathogenicity of Acanthamoeba keratitis. The genome assembly harbored a length of 66.43 Mb encompassing 13,849 scaffolds. The analysis of predicted proteins reported the presence of 37,062 ORFs. A complete annotation revealed 33,168 and 16,605 genes that matched with NCBI non-redundant protein sequence (nr) and Cluster of Orthologous Group of proteins (COG) databases, respectively. The Kyoto Encyclopedia of Genes and Genomes Pathway (KEGG) annotation reported a great number of genes related to carbohydrate, amino acid and lipid metabolic pathways. The pangenome performed with 8 available amoeba genomes belonging to genus Acanthamoeba revealed a core genome containing 843 clusters of orthologous genes with a ratio core genome/pangenome of less than 0.02. We detected 48 genes related to virulent factors of Acanthamoeba keratitis. Best hit analyses in nr database identified 99 homologous genes shared with amoeba-resisting microorganisms. This study allows the deciphering the genome of a free-living amoeba with medical interest and provides genomic data to better understand virulence-related Acanthamoeba keratitis.}, } @article {pmid32188055, year = {2020}, author = {Kim, YJ and Park, JY and Balusamy, SR and Huo, Y and Nong, LK and Thi Le, H and Yang, DC and Kim, D}, title = {Comprehensive Genome Analysis on the Novel Species Sphingomonas panacis DCY99T Reveals Insights into Iron Tolerance of Ginseng.}, journal = {International journal of molecular sciences}, volume = {21}, number = {6}, pages = {}, pmid = {32188055}, issn = {1422-0067}, support = {2019R1A2C1010428//National Research Foundation of Korea/ ; PJ0128132017//Rural Development Administration/ ; }, mesh = {DNA, Bacterial ; Drug Tolerance/*genetics ; Genes, Bacterial/genetics ; Genome Size ; *Genome, Bacterial ; Hydroxybenzoates ; Iron/*metabolism/toxicity ; Metals, Heavy ; Panax/*microbiology ; Plant Development ; Plant Roots/microbiology ; Soil Microbiology ; Sphingomonas/drug effects/*genetics/isolation & purification/*physiology ; Stress, Physiological ; }, abstract = {Plant growth-promoting rhizobacteria play vital roles not only in plant growth, but also in reducing biotic/abiotic stress. Sphingomonas panacis DCY99T is isolated from soil and root of Panax ginseng with rusty root disease, characterized by raised reddish-brown root and this is seriously affects ginseng cultivation. To investigate the relationship between 159 sequenced Sphingomonas strains, pan-genome analysis was carried out, which suggested genomic diversity of the Sphingomonas genus. Comparative analysis of S. panacis DCY99T with Sphingomonas sp. LK11 revealed plant growth-promoting potential of S. panacis DCY99T through indole acetic acid production, phosphate solubilizing, and antifungal abilities. Detailed genomic analysis has shown that S. panacis DCY99T contain various heavy metals resistance genes in its genome and the plasmid. Functional analysis with Sphingomonas paucimobilis EPA505 predicted that S. panacis DCY99T possess genes for degradation of polyaromatic hydrocarbon and phenolic compounds in rusty-ginseng root. Interestingly, when primed ginseng with S. panacis DCY99T during high concentration of iron exposure, iron stress of ginseng was suppressed. In order to detect S. panacis DCY99T in soil, biomarker was designed using spt gene. This study brings new insights into the role of S. panacis DCY99T as a microbial inoculant to protect ginseng plants against rusty root disease.}, } @article {pmid32182882, year = {2020}, author = {Kang, SM and Asaf, S and Khan, AL and Lubna, and Khan, A and Mun, BG and Khan, MA and Gul, H and Lee, IJ}, title = {Complete Genome Sequence of Pseudomonas psychrotolerans CS51, a Plant Growth-Promoting Bacterium, Under Heavy Metal Stress Conditions.}, journal = {Microorganisms}, volume = {8}, number = {3}, pages = {}, pmid = {32182882}, issn = {2076-2607}, abstract = {In the current study, we aimed to elucidate the plant growth-promoting characteristics of Pseudomonas psychrotolerans CS51 under heavy metal stress conditions (Zn, Cu, and Cd) and determine the genetic makeup of the CS51 genome using the single-molecule real-time (SMRT) sequencing technology of Pacific Biosciences. The results revealed that inoculation with CS51 induced endogenous indole-3-acetic acid (IAA) and gibberellins (GAs), which significantly enhanced cucumber growth (root shoot length) and increased the heavy metal tolerance of cucumber plants. Moreover, genomic analysis revealed that the CS51 genome consisted of a circular chromosome of 5,364,174 base pairs with an average G+C content of 64.71%. There were around 4774 predicted protein-coding sequences (CDSs) in 4859 genes, 15 rRNA genes, and 67 tRNA genes. Around 3950 protein-coding genes with function prediction and 733 genes without function prediction were identified. Furthermore, functional analyses predicted that the CS51 genome could encode genes required for auxin biosynthesis, nitrate and nitrite ammonification, the phosphate-specific transport system, and the sulfate transport system, which are beneficial for plant growth promotion. The heavy metal resistance of CS51 was confirmed by the presence of genes responsible for cobalt-zinc-cadmium resistance, nickel transport, and copper homeostasis in the CS51 genome. The extrapolation of the curve showed that the core genome contained a minimum of 2122 genes (95% confidence interval = 2034.24 to 2080.215). Our findings indicated that the genome sequence of CS51 may be used as an eco-friendly bioresource to promote plant growth in heavy metal-contaminated areas.}, } @article {pmid32169520, year = {2020}, author = {Satyam, R and Bhardwaj, T and Jha, NK and Jha, SK and Nand, P}, title = {Toward a chimeric vaccine against multiple isolates of Mycobacteroides - An integrative approach.}, journal = {Life sciences}, volume = {250}, number = {}, pages = {117541}, doi = {10.1016/j.lfs.2020.117541}, pmid = {32169520}, issn = {1879-0631}, mesh = {Alleles ; B-Lymphocytes/immunology ; Bacterial Vaccines/*chemistry ; Bacteriophages ; CRISPR-Cas Systems ; Computational Biology ; Drug Resistance, Bacterial ; Epitopes ; Epitopes, T-Lymphocyte/genetics ; Gastrointestinal Microbiome ; Genome, Bacterial ; Genomics ; Gram-Positive Bacterial Infections/*prevention & control ; Histocompatibility Antigens Class I/metabolism ; Histocompatibility Antigens Class II/metabolism ; Humans ; Immunotherapy ; Molecular Docking Simulation ; Molecular Dynamics Simulation ; Mycobacteriaceae/*genetics/pathogenicity ; Proteome ; *Vaccinology ; Virulence ; }, abstract = {AIM: Nontuberculous mycobacterial (NTM) infection such as endophthalmitis, dacryocystitis, and canaliculitis are pervasive across the globe and are currently managed by antibiotics. However, the recent cases of Mycobacteroides developing drug resistance reported along with the improper practice of medicine intrigued us to explore its genomic and proteomic canvas at a global scale and develop a chimeric vaccine against Mycobacteroides.

MAIN METHODS: We carried out a vivid genomic study on five recently sequenced strains of Mycobacteroides and explored their Pan-core genome/proteome in three different phases. The promiscuous antigenic proteins were identified via a subtractive proteomics approach that qualified for virulence causation, resistance and essentiality factors for this notorious bacterium. An integrated pipeline was developed for the identification of B-Cell, MHC (Major histocompatibility complex) class I and II epitopes.

KEY FINDINGS: Phase I identified the shreds of evidence of reductive evolution and propensity of the Pan-genome of Mycobacteroides getting closed soon. Phase II and Phase III produced 8 vaccine constructs. Our final vaccine construct, V6 qualified for all tests such as absence for allergenicity, presence of antigenicity, etc. V6 contains β-defensin as an adjuvant, linkers, Lysosomal-associated membrane protein 1 (LAMP1) signal peptide, and PADRE (Pan HLA-DR epitopes) amino acid sequence. Besides, V6 also interacts with a maximum number of MHC molecules and the TLR4/MD2 (Toll-like receptor 4/Myeloid differentiation factor 2) complex confirmed by docking and molecular dynamics simulation studies.

SIGNIFICANCE: The knowledge harnessed from the current study can help improve the current treatment regimens or in an event of an outbreak and propel further related studies.}, } @article {pmid32151246, year = {2020}, author = {Chen, M and Xu, CY and Wang, X and Ren, CY and Ding, J and Li, L}, title = {Comparative genomics analysis of c-di-GMP metabolism and regulation in Microcystis aeruginosa.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {217}, pmid = {32151246}, issn = {1471-2164}, support = {21577081//Natural Science Foundation of China/ ; }, mesh = {Computational Biology ; Cyclic GMP/*metabolism ; Escherichia coli Proteins/genetics/metabolism ; Gene Expression Regulation, Bacterial/*genetics ; Genomics ; Microcystis/classification/genetics/*metabolism ; Phosphorus-Oxygen Lyases/genetics/metabolism ; Phylogeny ; Protein Domains ; Signal Transduction ; }, abstract = {BACKGROUND: Cyanobacteria are of special concern because they proliferate in eutrophic water bodies worldwide and affect water quality. As an ancient photosynthetic microorganism, cyanobacteria can survive in ecologically diverse habitats because of their capacity to rapidly respond to environmental changes through a web of complex signaling networks, including using second messengers to regulate physiology or metabolism. A ubiquitous second messenger, bis-(3',5')-cyclic-dimeric-guanosine monophosphate (c-di-GMP), has been found to regulate essential behaviors in a few cyanobacteria but not Microcystis, which are the most dominant species in cyanobacterial blooms. In this study, comparative genomics analysis was performed to explore the genomic basis of c-di-GMP signaling in Microcystis aeruginosa.

RESULTS: Proteins involved in c-di-GMP metabolism and regulation, such as diguanylate cyclases, phosphodiesterases, and PilZ-containing proteins, were encoded in M. aeruginosa genomes. However, the number of identified protein domains involved in c-di-GMP signaling was not proportional to the size of M. aeruginosa genomes (4.97 Mb in average). Pan-genome analysis showed that genes involved in c-di-GMP metabolism and regulation are conservative in M. aeruginosa strains. Phylogenetic analysis showed good congruence between the two types of phylogenetic trees based on 31 highly conserved protein-coding genes and sensor domain-coding genes. Propensity for gene loss analysis revealed that most of genes involved in c-di-GMP signaling are stable in M. aeruginosa strains. Moreover, bioinformatics and structure analysis of c-di-GMP signal-related GGDEF and EAL domains revealed that they all possess essential conserved amino acid residues that bind the substrate. In addition, it was also found that all selected M. aeruginosa genomes encode PilZ domain containing proteins.

CONCLUSIONS: Comparative genomics analysis of c-di-GMP metabolism and regulation in M. aeruginosa strains helped elucidating the genetic basis of c-di-GMP signaling pathways in M. aeruginosa. Knowledge of c-di-GMP metabolism and relevant signal regulatory processes in cyanobacteria can enhance our understanding of their adaptability to various environments and bloom-forming mechanism.}, } @article {pmid32149071, year = {2020}, author = {Aaltonen, K and Kant, R and Eklund, M and Raunio-Saarnisto, M and Paulin, L and Vapalahti, O and Grönthal, T and Rantala, M and Sironen, T}, title = {Streptococcus halichoeri: Comparative Genomics of an Emerging Pathogen.}, journal = {International journal of genomics}, volume = {2020}, number = {}, pages = {8708305}, pmid = {32149071}, issn = {2314-4378}, abstract = {Streptococcus halichoeri is an emerging pathogen with a variety of host species and zoonotic potential. It has been isolated from grey seals and other marine mammals as well as from human infections. Beginning in 2010, two concurrent epidemics were identified in Finland, in fur animals and domestic dogs, respectively. The fur animals suffered from a new disease fur animal epidemic necrotic pyoderma (FENP) and the dogs presented with ear infections with poor treatment response. S. halichoeri was isolated in both studies, albeit among other pathogens, indicating a possible role in the disease etiologies. The aim was to find a possible common origin of the fur animal and dog isolates and study the virulence factors to assess pathogenic potential. Isolates from seal, human, dogs, and fur animals were obtained for comparison. The whole genomes were sequenced from 20 different strains using the Illumina MiSeq platform and annotated using an automatic annotation pipeline RAST. The core and pangenomes were formed by comparing the genomes against each other in an all-against-all comparison. A phylogenetic tree was constructed using the genes of the core genome. Virulence factors were assessed using the Virulence Factor Database (VFDB) concentrating on the previously confirmed streptococcal factors. A core genome was formed which encompassed approximately half of the genes in Streptococcus halichoeri. The resulting core was nearly saturated and would not change significantly by adding more genomes. The remaining genes formed the pangenome which was highly variable and would still evolve after additional genomes. The results highlight the great adaptability of this bacterium possibly explaining the ease at which it switches hosts and environments. Virulence factors were also analyzed and were found primarily in the core genome. They represented many classes and functions, but the largest single category was adhesins which again supports the marine origin of this species.}, } @article {pmid32138767, year = {2020}, author = {Moustafa, AM and Planet, PJ}, title = {WhatsGNU: a tool for identifying proteomic novelty.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {58}, pmid = {32138767}, issn = {1474-760X}, support = {1R01AI137526-01/NH/NIH HHS/United States ; 1K08AI101005/NH/NIH HHS/United States ; }, abstract = {To understand diversity in enormous collections of genome sequences, we need computationally scalable tools that can quickly contextualize individual genomes based on their similarities and identify features of each genome that make them unique. We present WhatsGNU, a tool based on exact match proteomic compression that, in seconds, classifies any new genome and provides a detailed report of protein alleles that may have novel functional differences. We use this technique to characterize the total allelic diversity (panallelome) of Salmonella enterica, Mycobacterium tuberculosis, Pseudomonas aeruginosa, and Staphylococcus aureus. It could be extended to others. WhatsGNU is available from https://github.com/ahmedmagds/WhatsGNU.}, } @article {pmid32132208, year = {2020}, author = {Seif, Y and Choudhary, KS and Hefner, Y and Anand, A and Yang, L and Palsson, BO}, title = {Metabolic and genetic basis for auxotrophies in Gram-negative species.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {117}, number = {11}, pages = {6264-6273}, pmid = {32132208}, issn = {1091-6490}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Algorithms ; Computer Simulation ; Energy Metabolism/*genetics ; Genome, Bacterial/*physiology ; Genomics ; Gram-Negative Bacteria/*physiology ; Host Microbial Interactions/*physiology ; Interspersed Repetitive Sequences/genetics ; Metabolic Networks and Pathways/genetics ; Metabolomics ; *Models, Biological ; Nutrients/metabolism ; }, abstract = {Auxotrophies constrain the interactions of bacteria with their environment, but are often difficult to identify. Here, we develop an algorithm (AuxoFind) using genome-scale metabolic reconstruction to predict auxotrophies and apply it to a series of available genome sequences of over 1,300 Gram-negative strains. We identify 54 auxotrophs, along with the corresponding metabolic and genetic basis, using a pangenome approach, and highlight auxotrophies conferring a fitness advantage in vivo. We show that the metabolic basis of auxotrophy is species-dependent and varies with 1) pathway structure, 2) enzyme promiscuity, and 3) network redundancy. Various levels of complexity constitute the genetic basis, including 1) deleterious single-nucleotide polymorphisms (SNPs), in-frame indels, and deletions; 2) single/multigene deletion; and 3) movement of mobile genetic elements (including prophages) combined with genomic rearrangements. Fourteen out of 19 predictions agree with experimental evidence, with the remaining cases highlighting shortcomings of sequencing, assembly, annotation, and reconstruction that prevent predictions of auxotrophies. We thus develop a framework to identify the metabolic and genetic basis for auxotrophies in Gram-negatives.}, } @article {pmid32131884, year = {2020}, author = {Jin, Y and Zhou, J and Zhou, J and Hu, M and Zhang, Q and Kong, N and Ren, H and Liang, L and Yue, J}, title = {Genome-based classification of Burkholderia cepacia complex provides new insight into its taxonomic status.}, journal = {Biology direct}, volume = {15}, number = {1}, pages = {6}, pmid = {32131884}, issn = {1745-6150}, mesh = {Bacterial Proteins/analysis ; Burkholderia cepacia complex/*classification/genetics ; *Genome, Bacterial ; Multilocus Sequence Typing ; *Phylogeny ; RNA, Bacterial/analysis ; RNA, Ribosomal, 16S/analysis ; Rec A Recombinases/analysis ; }, abstract = {BACKGROUND: Accurate classification of different Burkholderia cepacia complex (BCC) species is essential for therapy, prognosis assessment and research. The taxonomic status of BCC remains problematic and an improved knowledge about the classification of BCC is in particular needed.

METHODS: We compared phylogenetic trees of BCC based on 16S rRNA, recA, hisA and MLSA (multilocus sequence analysis). Using the available whole genome sequences of BCC, we inferred a species tree based on estimated single-copy orthologous genes and demarcated species of BCC using dDDH/ANI clustering.

RESULTS: We showed that 16S rRNA, recA, hisA and MLSA have limited resolutions in the taxonomic study of closely related bacteria such as BCC. Our estimated species tree and dDDH/ANI clustering clearly separated 116 BCC strains into 36 clusters. With the appropriate reclassification of misidentified strains, these clusters corresponded to 22 known species as well as 14 putative novel species.

CONCLUSIONS: This is the first large-scale and systematic study of the taxonomic status of the BCC and could contribute to further insights into BCC taxonomy. Our study suggested that conjunctive use of core phylogeny based on single-copy orthologous genes, as well as pangenome-based dDDH/ANI clustering would provide a preferable framework for demarcating closely related species.

REVIEWER: This article was reviewed by Dr. Xianwen Ren.}, } @article {pmid32128256, year = {2020}, author = {Thukral, A and Ross, K and Hansen, C and Phanse, Y and Narasimhan, B and Steinberg, H and Talaat, AM}, title = {A single dose polyanhydride-based nanovaccine against paratuberculosis infection.}, journal = {NPJ vaccines}, volume = {5}, number = {}, pages = {15}, pmid = {32128256}, issn = {2059-0105}, abstract = {Mycobacterium avium subsp. paratuberculosis (M. paratuberculosis) causes Johne's disease in ruminants and is characterized by chronic gastroenteritis leading to heavy economic losses to the dairy industry worldwide. The currently available vaccine (inactivated bacterin in oil base) is not effective in preventing pathogen shedding and is rarely used to control Johne's disease in dairy herds. To develop a better vaccine that can prevent the spread of Johne's disease, we utilized polyanhydride nanoparticles (PAN) to encapsulate mycobacterial antigens composed of whole cell lysate (PAN-Lysate) and culture filtrate (PAN-Cf) of M. paratuberculosis. These nanoparticle-based vaccines (i.e., nanovaccines) were well tolerated in mice causing no inflammatory lesions at the site of injection. Immunological assays demonstrated a substantial increase in the levels of antigen-specific T cell responses post-vaccination in the PAN-Cf vaccinated group as indicated by high percentages of triple cytokine (IFN-γ, IL-2, TNF-α) producing CD8+ T cells. Following challenge, animals vaccinated with PAN-Cf continued to produce significant levels of double (IFN-γ, TNF-α) and single cytokine (IFN-γ) secreting CD8+ T cells compared with animals vaccinated with an inactivated vaccine. A significant reduction in bacterial load was observed in multiple organs of animals vaccinated with PAN-Cf, which is a clear indication of protection. Overall, the use of polyanhydride nanovaccines resulted in development of protective and sustained immunity against Johne's disease, an approach that could be applied to counter other intracellular pathogens.}, } @article {pmid32108566, year = {2020}, author = {Tekedar, HC and Blom, J and Kalindamar, S and Nho, S and Karsi, A and Lawrence, ML}, title = {Comparative genomics of the fish pathogens Edwardsiella ictaluri 93-146 and Edwardsiella piscicida C07-087.}, journal = {Microbial genomics}, volume = {6}, number = {2}, pages = {}, pmid = {32108566}, issn = {2057-5858}, mesh = {Animals ; Catfishes/microbiology ; Edwardsiella/*genetics/isolation & purification/metabolism ; Edwardsiella ictaluri/*genetics/isolation & purification/metabolism ; Enterobacteriaceae Infections/microbiology/*veterinary ; Fish Diseases/*microbiology ; *Genome, Bacterial ; Genomics ; Phylogeny ; }, abstract = {Edwardsiella ictaluri and Edwardsiella piscicida are important fish pathogens affecting cultured and wild fish worldwide. To investigate the genome-level differences and similarities between catfish-adapted strains in these two species, the complete E. ictaluri 93-146 and E. piscicida C07-087 genomes were evaluated by applying comparative genomics analysis. All available complete (10) and non-complete (19) genomes from five Edwardsiella species were also included in a systematic analysis. Average nucleotide identity and core-genome phylogenetic tree analyses indicated that the five Edwardsiella species were separated from each other. Pan-/core-genome analyses for the 29 strains from the five species showed that genus Edwardsiella members have 9474 genes in their pan genome, while the core genome consists of 1421 genes. Orthology cluster analysis showed that E. ictaluri and E. piscicida genomes have the greatest number of shared clusters. However, E. ictaluri and E. piscicida also have unique features; for example, the E. ictaluri genome encodes urease enzymes and cytochrome o ubiquinol oxidase subunits, whereas E. piscicida genomes encode tetrathionate reductase operons, capsular polysaccharide synthesis enzymes and vibrioferrin-related genes. Additionally, we report for what is believed to be the first time that E. ictaluri 93-146 and three other E. ictaluri genomes encode a type IV secretion system (T4SS), whereas none of the E. piscicida genomes encode this system. Additionally, the E. piscicida C07-087 genome encodes two different type VI secretion systems. E. ictaluri genomes tend to encode more insertion elements, phage regions and genomic islands than E. piscicida. We speculate that the T4SS could contribute to the increased number of mobilome elements in E. ictaluri compared to E. piscicida. Two of the E. piscicida genomes encode full CRISPR-Cas regions, whereas none of the E. ictaluri genomes encode Cas proteins. Overall, comparison of the E. ictaluri and E. piscicida genomes reveals unique features and provides new insights on pathogenicity that may reflect the host adaptation of the two species.}, } @article {pmid32106516, year = {2020}, author = {Li, Q and Cooper, RE and Wegner, CE and Küsel, K}, title = {Molecular Mechanisms Underpinning Aggregation in Acidiphilium sp. C61 Isolated from Iron-Rich Pelagic Aggregates.}, journal = {Microorganisms}, volume = {8}, number = {3}, pages = {}, pmid = {32106516}, issn = {2076-2607}, support = {Jena School for Microbial Communication (JSMC) graduate school//Deutsche Forschungsgemeinschaft/ ; SFB 1127 ChemBioSys//Deutsche Forschungsgemeinschaft/ ; German Centre for Integrative Biodiversity Research (iDiv) Halle-Jena-Leipzig//Deutsche Forschungsgemeinschaft/ ; Jena School for Microbial Communication (JSMC) graduate school//Carl-Zeiss-Stiftung/ ; }, abstract = {Iron-rich pelagic aggregates (iron snow) are hot spots for microbial interactions. Using iron snow isolates, we previously demonstrated that the iron-oxidizer Acidithrix sp. C25 triggers Acidiphilium sp. C61 aggregation by producing the infochemical 2-phenethylamine (PEA). Here, we showed slightly enhanced aggregate formation in the presence of PEA on different Acidiphilium spp. but not other iron-snow microorganisms, including Acidocella sp. C78 and Ferrovum sp. PN-J47. Next, we sequenced the Acidiphilium sp. C61 genome to reconstruct its metabolic potential. Pangenome analyses of Acidiphilium spp. genomes revealed the core genome contained 65 gene clusters associated with aggregation, including autoaggregation, motility, and biofilm formation. Screening the Acidiphilium sp. C61 genome revealed the presence of autotransporter, flagellar, and extracellular polymeric substances (EPS) production genes. RNA-seq analyses of Acidiphilium sp. C61 incubations (+/- 10 µM PEA) indicated genes involved in energy production, respiration, and genetic processing were the most upregulated differentially expressed genes in the presence of PEA. Additionally, genes involved in flagellar basal body synthesis were highly upregulated, whereas the expression pattern of biofilm formation-related genes was inconclusive. Our data shows aggregation is a common trait among Acidiphilium spp. and PEA stimulates the central cellular metabolism, potentially advantageous in aggregates rapidly falling through the water column.}, } @article {pmid32103378, year = {2020}, author = {González-Castillo, A and Enciso-Ibarra, J and Gomez-Gil, B}, title = {Genomic taxonomy of the Mediterranei clade of the genus Vibrio (Gammaproteobacteria).}, journal = {Antonie van Leeuwenhoek}, volume = {113}, number = {6}, pages = {851-859}, doi = {10.1007/s10482-020-01396-4}, pmid = {32103378}, issn = {1572-9699}, support = {CB-2009-01 132328//CONACYT/ ; }, abstract = {The first genomic study of Mediterranei clade using five type strains (V. mediterranei, V. maritimus, V. variabilis, V. thalassae, and V. barjaei) and fourteen reference strains isolated from marine organisms, seawater, water and sediments of the sea was performed. These bacterial strains were characterised by means of a polyphasic approach comprising 16S rRNA gene, multilocus sequence analysis (MLSA) of 139 single-copy genes, the DNA G + C content, ANI, and in silico phenotypic characterisation. We found that the species of the Mediterranei clade formed two separate clusters based in 16S rRNA gene sequence similarity, MLSA, OrthoANI, and Codon and Amino Acid usage. The Mediterranei clade species showed values between 76 and 95% for ANIb, 84 and 95% for ANIm. The core genome consisted of 2057 gene families and the pan-genome of 13,094 gene families. Based on the genomic analyses performed, the Mediterranei clade can be divided in two clusters, one with the strains of V. maritimus, V. variabilis and two potential new species, and the other cluster with the strains of V. mediterranei, V. thalassae, and V. barjaei.}, } @article {pmid32100706, year = {2020}, author = {Whelan, FJ and Rusilowicz, M and McInerney, JO}, title = {Coinfinder: detecting significant associations and dissociations in pangenomes.}, journal = {Microbial genomics}, volume = {6}, number = {3}, pages = {}, pmid = {32100706}, issn = {2057-5858}, support = {BB/N018044/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Computational Biology ; *Genome ; Phylogeny ; *Software ; Streptococcus pneumoniae/genetics ; }, abstract = {The accessory genes of prokaryote and eukaryote pangenomes accumulate by horizontal gene transfer, differential gene loss, and the effects of selection and drift. We have developed Coinfinder, a software program that assesses whether sets of homologous genes (gene families) in pangenomes associate or dissociate with each other (i.e. are 'coincident') more often than would be expected by chance. Coinfinder employs a user-supplied phylogenetic tree in order to assess the lineage-dependence (i.e. the phylogenetic distribution) of each accessory gene, allowing Coinfinder to focus on coincident gene pairs whose joint presence is not simply because they happened to appear in the same clade, but rather that they tend to appear together more often than expected across the phylogeny. Coinfinder is implemented in C++, Python3 and R and is freely available under the GNU license from https://github.com/fwhelan/coinfinder.}, } @article {pmid32086304, year = {2020}, author = {Khan, AMAM and Hauk, VJ and Ibrahim, M and Raffel, TR and Blumer-Schuette, SE}, title = {Caldicellulosiruptor bescii Adheres to Polysaccharides via a Type IV Pilin-Dependent Mechanism.}, journal = {Applied and environmental microbiology}, volume = {86}, number = {9}, pages = {}, pmid = {32086304}, issn = {1098-5336}, mesh = {*Bacterial Adhesion ; Caldicellulosiruptor ; Fimbriae Proteins/*metabolism ; Firmicutes/metabolism/*physiology ; Polysaccharides, Bacterial/*metabolism ; }, abstract = {Biological hydrolysis of cellulose above 70°C involves microorganisms that secrete free enzymes and deploy separate protein systems to adhere to their substrate. Strongly cellulolytic Caldicellulosiruptor bescii is one such extreme thermophile, which deploys modular, multifunctional carbohydrate-acting enzymes to deconstruct plant biomass. Additionally, C. bescii also encodes noncatalytic carbohydrate binding proteins, which likely evolved as a mechanism to compete against other heterotrophs in carbon-limited biotopes that these bacteria inhabit. Analysis of the Caldicellulosiruptor pangenome identified a type IV pilus (T4P) locus encoded upstream of the tāpirins, that is encoded by all Caldicellulosiruptor species. In this study, we sought to determine if the C. bescii T4P plays a role in attachment to plant polysaccharides. The major C. bescii pilin (CbPilA) was identified by the presence of pilin-like protein domains, paired with transcriptomics and proteomics data. Using immuno-dot blots, we determined that the plant polysaccharide xylan induced production of CbPilA 10- to 14-fold higher than glucomannan or xylose. Furthermore, we are able to demonstrate that recombinant CbPilA directly interacts with xylan and cellulose at elevated temperatures. Localization of CbPilA at the cell surface was confirmed by immunofluorescence microscopy. Lastly, a direct role for CbPilA in cell adhesion was demonstrated using recombinant CbPilA or anti-CbPilA antibodies to reduce C. bescii cell adhesion to xylan and crystalline cellulose up to 4.5- and 2-fold, respectively. Based on these observations, we propose that CbPilA and, by extension, the T4P play a role in Caldicellulosiruptor cell attachment to plant biomass.IMPORTANCE Most microorganisms are capable of attaching to surfaces in order to persist in their environment. Type IV (T4) pili produced by certain mesophilic Firmicutes promote adherence; however, a role for T4 pili encoded by thermophilic members of this phylum has yet to be demonstrated. Prior comparative genomics analyses identified a T4 pilus locus possessed by an extremely thermophilic genus within the Firmicutes Here, we demonstrate that attachment to plant biomass-related carbohydrates by strongly cellulolytic Caldicellulosiruptor bescii is mediated by T4 pilins. Surprisingly, xylan but not cellulose induced expression of the major T4 pilin. Regardless, the C. bescii T4 pilin interacts with both polysaccharides at high temperatures and is located to the cell surface, where it is directly involved in C. bescii attachment. Adherence to polysaccharides is likely key to survival in environments where carbon sources are limiting, allowing C. bescii to compete against other plant-degrading microorganisms.}, } @article {pmid32076431, year = {2020}, author = {Romano, I and Ventorino, V and Pepe, O}, title = {Effectiveness of Plant Beneficial Microbes: Overview of the Methodological Approaches for the Assessment of Root Colonization and Persistence.}, journal = {Frontiers in plant science}, volume = {11}, number = {}, pages = {6}, pmid = {32076431}, issn = {1664-462X}, abstract = {Issues concerning the use of harmful chemical fertilizers and pesticides that have large negative impacts on environmental and human health have generated increasing interest in the use of beneficial microorganisms for the development of sustainable agri-food systems. A successful microbial inoculant has to colonize the root system, establish a positive interaction and persist in the environment in competition with native microorganisms living in the soil through rhizocompetence traits. Currently, several approaches based on culture-dependent, microscopic and molecular methods have been developed to follow bioinoculants in the soil and plant surface over time. Although culture-dependent methods are commonly used to estimate the persistence of bioinoculants, it is difficult to differentiate inoculated organisms from native populations based on morphological characteristics. Therefore, these methods should be used complementary to culture-independent approaches. Microscopy-based techniques (bright-field, electron and fluorescence microscopy) allow to obtain a picture of microbial colonization outside and inside plant tissues also at high resolution, but it is not possible to always distinguish living cells from dead cells by direct observation as well as distinguish bioinoculants from indigenous microbial populations living in soils. In addition, the development of metagenomic techniques, including the use of DNA probes, PCR-based methods, next-generation sequencing, whole-genome sequencing and pangenome methods, provides a complementary approach useful to understand plant-soil-microbe interactions. However, to ensure good results in microbiological analysis, the first fundamental prerequisite is correct soil sampling and sample preparation for the different methodological approaches that will be assayed. Here, we provide an overview of the advantages and limitations of the currently used methods and new methodological approaches that could be developed to assess the presence, plant colonization and soil persistence of bioinoculants in the rhizosphere. We further discuss the possibility of integrating multidisciplinary approaches to examine the variations in microbial communities after inoculation and to track the inoculated microbial strains.}, } @article {pmid32074720, year = {2020}, author = {Yu, YY and Wei, CC}, title = {[HUPAN promotes striding across of biomedical research from human genome to human pan-genome].}, journal = {Zhonghua bing li xue za zhi = Chinese journal of pathology}, volume = {49}, number = {2}, pages = {105-107}, doi = {10.3760/cma.j.issn.0529-5807.2020.02.001}, pmid = {32074720}, issn = {0529-5807}, mesh = {*Biomedical Research ; *Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, } @article {pmid32066773, year = {2020}, author = {Iversen, KH and Rasmussen, LH and Al-Nakeeb, K and Armenteros, JJA and Jensen, CS and Dargis, R and Lukjancenko, O and Justesen, US and Moser, C and Rosenvinge, FS and Nielsen, XC and Christensen, JJ and Rasmussen, S}, title = {Similar genomic patterns of clinical infective endocarditis and oral isolates of Streptococcus sanguinis and Streptococcus gordonii.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {2728}, pmid = {32066773}, issn = {2045-2322}, mesh = {Endocarditis/*microbiology/pathology ; Endocarditis, Bacterial/*microbiology/pathology ; Endocardium/microbiology/pathology ; *Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; Machine Learning ; Mouth/microbiology/pathology ; Phylogeny ; Streptococcal Infections/*microbiology/pathology ; Streptococcus gordonii/classification/*genetics/isolation & purification/pathogenicity ; Streptococcus sanguis/classification/*genetics/isolation & purification/pathogenicity ; Symbiosis/physiology ; Virulence ; Virulence Factors/classification/*genetics/metabolism ; }, abstract = {Streptococcus gordonii and Streptococcus sanguinis belong to the Mitis group streptococci, which mostly are commensals in the human oral cavity. Though they are oral commensals, they can escape their niche and cause infective endocarditis, a severe infection with high mortality. Several virulence factors important for the development of infective endocarditis have been described in these two species. However, the background for how the commensal bacteria, in some cases, become pathogenic is still not known. To gain a greater understanding of the mechanisms of the pathogenic potential, we performed a comparative analysis of 38 blood culture strains, S. sanguinis (n = 20) and S. gordonii (n = 18) from patients with verified infective endocarditis, along with 21 publicly available oral isolates from healthy individuals, S. sanguinis (n = 12) and S. gordonii (n = 9). Using whole genome sequencing data of the 59 streptococci genomes, functional profiles were constructed, using protein domain predictions based on the translated genes. These functional profiles were used for clustering, phylogenetics and machine learning. A clear separation could be made between the two species. No clear differences between oral isolates and clinical infective endocarditis isolates were found in any of the 675 translated core-genes. Additionally, random forest-based machine learning and clustering of the pan-genome data as well as amino acid variations in the core-genome could not separate the clinical and oral isolates. A total of 151 different virulence genes was identified in the 59 genomes. Among these homologs of genes important for adhesion and evasion of the immune system were found in all of the strains. Based on the functional profiles and virulence gene content of the genomes, we believe that all analysed strains had the ability to become pathogenic.}, } @article {pmid32065216, year = {2020}, author = {Wu, H and Wang, D and Gao, F}, title = {Toward a high-quality pan-genome landscape of Bacillus subtilis by removal of confounding strains.}, journal = {Briefings in bioinformatics}, volume = {}, number = {}, pages = {}, doi = {10.1093/bib/bbaa013}, pmid = {32065216}, issn = {1477-4054}, abstract = {Pan-genome analysis is widely used to study the evolution and genetic diversity of species, particularly in bacteria. However, the impact of strain selection on the outcome of pan-genome analysis is poorly understood. Furthermore, a standard protocol to ensure high-quality pan-genome results is lacking. In this study, we carried out a series of pan-genome analyses of different strain sets of Bacillus subtilis to understand the impact of various strains on the performance and output quality of pan-genome analyses. Consequently, we found that the results obtained by pan-genome analyses of B. subtilis can be influenced by the inclusion of incorrectly classified Bacillus subspecies strains, phylogenetically distinct strains, engineered genome-reduced strains, chimeric strains, strains with a large number of unique genes or a large proportion of pseudogenes, and multiple clonal strains. Since the presence of these confounding strains can seriously affect the quality and true landscape of the pan-genome, we should remove these deviations in the process of pan-genome analyses. Our study provides new insights into the removal of biases from confounding strains in pan-genome analyses at the beginning of data processing, which enables the achievement of a closer representation of a high-quality pan-genome landscape of B. subtilis that better reflects the performance and credibility of the B. subtilis pan-genome. This procedure could be added as an important quality control step in pan-genome analyses for improving the efficiency of analyses, and ultimately contributing to a better understanding of genome function, evolution and genome-reduction strategies for B. subtilis in the future.}, } @article {pmid32054757, year = {2020}, author = {Laflamme, B and Dillon, MM and Martel, A and Almeida, RND and Desveaux, D and Guttman, DS}, title = {The pan-genome effector-triggered immunity landscape of a host-pathogen interaction.}, journal = {Science (New York, N.Y.)}, volume = {367}, number = {6479}, pages = {763-768}, doi = {10.1126/science.aax4079}, pmid = {32054757}, issn = {1095-9203}, mesh = {Arabidopsis/genetics/*immunology/*microbiology ; Arabidopsis Proteins/genetics/physiology ; Bacterial Proteins/genetics/immunology ; Carrier Proteins/genetics/physiology ; Genome, Plant ; Host-Pathogen Interactions/genetics/*immunology ; Immunity, Innate/*genetics ; Plant Diseases/genetics/*immunology/*microbiology ; Plant Immunity/*genetics ; Pseudomonas syringae/genetics/*pathogenicity ; }, abstract = {Effector-triggered immunity (ETI), induced by host immune receptors in response to microbial effectors, protects plants against virulent pathogens. However, a systematic study of ETI prevalence against species-wide pathogen diversity is lacking. We constructed the Pseudomonas syringae Type III Effector Compendium (PsyTEC) to reduce the pan-genome complexity of 5127 unique effector proteins, distributed among 70 families from 494 strains, to 529 representative alleles. We screened PsyTEC on the model plant Arabidopsis thaliana and identified 59 ETI-eliciting alleles (11.2%) from 19 families (27.1%), with orthologs distributed among 96.8% of P. syringae strains. We also identified two previously undescribed host immune receptors, including CAR1, which recognizes the conserved effectors AvrE and HopAA1, and found that 94.7% of strains harbor alleles predicted to be recognized by either CAR1 or ZAR1.}, } @article {pmid32054452, year = {2020}, author = {Liao, F and Mo, Z and Gu, W and Xu, W and Fu, X and Zhang, Y}, title = {A comparative genomic analysis between methicillin-resistant Staphylococcus aureus strains of hospital acquired and community infections in Yunnan province of China.}, journal = {BMC infectious diseases}, volume = {20}, number = {1}, pages = {137}, pmid = {32054452}, issn = {1471-2334}, support = {2017ZX10103010//National Sci-Tech key project/ ; 2019LCZXKF-HX01//open subject of the first people's hospital of Yunnan province/ ; }, mesh = {Adolescent ; Adult ; Anti-Bacterial Agents/adverse effects/therapeutic use ; Child ; Child, Preschool ; China/epidemiology ; Community-Acquired Infections/microbiology ; Cross Infection/*microbiology ; Female ; Food Microbiology ; Genome, Bacterial/genetics ; Genomics/*methods ; Genotype ; Humans ; Male ; Methicillin/adverse effects/therapeutic use ; Methicillin-Resistant Staphylococcus aureus/*genetics/isolation & purification ; Microbial Sensitivity Tests ; Phylogeny ; Polymorphism, Single Nucleotide/genetics ; Staphylococcal Infections/drug therapy/*epidemiology/*genetics/microbiology ; Whole Genome Sequencing ; Young Adult ; }, abstract = {BACKGROUND: Currently, Staphylococcus aureus is one of the most important pathogens worldwide, especially for methicillin-resistant S. aureus (MRSA) infection. However, few reports referred to patients' MRSA infections in Yunnan province, southwest China.

METHODS: In this study, we selected representative MRSA strains from patients' systemic surveillance in Yunnan province of China, performed the genomic sequencing and compared their features, together with some food derived strains.

RESULTS: Among sixty selective isolates, forty strains were isolated from patients, and twenty isolated from food. Among the patients' strains, sixteen were recognized as community-acquired (CA), compared with 24 for hospital-acquired (HA). ST6-t701, ST59-t437 and ST239-t030 were the three major genotype profiles. ST6-t701 was predominated in food strains, while ST59-t437 and ST239-t030 were the primary clones in patients. The clinical features between CA and HA-MRSA of patients were statistical different. Compared the antibiotic resistant results between patients and food indicated that higher antibiotic resistant rates were found in patients' strains. Totally, the average genome sizes of 60 isolates were 2.79 ± 0.05 Mbp, with GC content 33% and 84.50 ± 0.20% of coding rate. The core genomes of these isolates were 1593 genes. Phylogenetic analysis based on pan-genome and SNP of strains showed that five clustering groups were generated. Clustering ST239-t030 contained all the HA-MRSA cases in this study; clustering ST6-t701 referred to food and CA-MRSA infections in community; clustering ST59-t437 showed the heterogeneity for provoking different clinical diseases in both community and hospital. Phylogenetic tree, incorporating 24 isolates from different regions, indicated ST239-t030 strains in this study were more closely related to T0131 isolate from Tianjin, China, belonged to 'Turkish clade' from Eastern Europe; two groups of ST59-t437 clones of MRSA in Yunnan province were generated, belonged to the 'Asian-Pacific' clone (AP) and 'Taiwan' clone (TW) respectively.

CONCLUSIONS: ST239-t030, ST59-t437 and ST6-t701 were the three major MRSA clones in Yunnan province of China. ST239-t030 clonal Yunnan isolates demonstrated the local endemic of clone establishment for a number of years, whereas ST59-t437 strains revealed the multi-origins of this clone. In general, genomic study on epidemic clones of MRSA in southwest China provided the features and evolution of this pathogen.}, } @article {pmid32052196, year = {2020}, author = {Dos Santos Silva, LK and Rodrigues, RAL and Dos Santos Pereira Andrade, AC and Hikida, H and Andreani, J and Levasseur, A and La Scola, B and Abrahão, JS}, title = {Isolation and genomic characterization of a new mimivirus of lineage B from a Brazilian river.}, journal = {Archives of virology}, volume = {165}, number = {4}, pages = {853-863}, doi = {10.1007/s00705-020-04542-5}, pmid = {32052196}, issn = {1432-8798}, mesh = {Brazil ; *Genome, Viral ; Genomics ; Mimiviridae/classification/genetics/*isolation & purification/physiology ; Phylogeny ; Rivers/*virology ; Virus Replication ; }, abstract = {Since its discovery, the first identified giant virus associated with amoebae, Acanthamoeba polyphaga mimivirus (APMV), has been rigorously studied to understand the structural and genomic complexity of this virus. In this work, we report the isolation and genomic characterization of a new mimivirus of lineage B, named "Borely moumouvirus". This new virus exhibits a structure and replicative cycle similar to those of other members of the family Mimiviridae. The genome of the new isolate is a linear double-strand DNA molecule of ~1.0 Mb, containing over 900 open reading frames. Genome annotation highlighted different translation system components encoded in the DNA of Borely moumouvirus, including aminoacyl-tRNA synthetases, translation factors, and tRNA molecules, in a distribution similar to that in other lineage B mimiviruses. Pan-genome analysis indicated an increase in the genetic arsenal of this group of viruses, showing that the family Mimiviridae is still expanding. Furthermore, phylogenetic analysis has shown that Borely moumouvirus is closely related to moumouvirus australiensis. This is the first mimivirus lineage B isolated from Brazilian territory to be characterized. Further prospecting studies are necessary for us to better understand the diversity of these viruses so a better classification system can be established.}, } @article {pmid32051000, year = {2020}, author = {Hickey, G and Heller, D and Monlong, J and Sibbesen, JA and Sirén, J and Eizenga, J and Dawson, ET and Garrison, E and Novak, AM and Paten, B}, title = {Genotyping structural variants in pangenome graphs using the vg toolkit.}, journal = {Genome biology}, volume = {21}, number = {1}, pages = {35}, pmid = {32051000}, issn = {1474-760X}, support = {U01 HL137183/HL/NHLBI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; U54 HG007990/HG/NHGRI NIH HHS/United States ; }, abstract = {Structural variants (SVs) remain challenging to represent and study relative to point mutations despite their demonstrated importance. We show that variation graphs, as implemented in the vg toolkit, provide an effective means for leveraging SV catalogs for short-read SV genotyping experiments. We benchmark vg against state-of-the-art SV genotypers using three sequence-resolved SV catalogs generated by recent long-read sequencing studies. In addition, we use assemblies from 12 yeast strains to show that graphs constructed directly from aligned de novo assemblies improve genotyping compared to graphs built from intermediate SV catalogs in the VCF format.}, } @article {pmid32047279, year = {2020}, author = {Maistrenko, OM and Mende, DR and Luetge, M and Hildebrand, F and Schmidt, TSB and Li, SS and Rodrigues, JFM and von Mering, C and Pedro Coelho, L and Huerta-Cepas, J and Sunagawa, S and Bork, P}, title = {Disentangling the impact of environmental and phylogenetic constraints on prokaryotic within-species diversity.}, journal = {The ISME journal}, volume = {14}, number = {5}, pages = {1247-1259}, pmid = {32047279}, issn = {1751-7370}, support = {BBS/E/F/000PR10353/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Biodiversity ; Ecosystem ; Genome Size ; Phylogeny ; *Prokaryotic Cells ; }, abstract = {Microbial organisms inhabit virtually all environments and encompass a vast biological diversity. The pangenome concept aims to facilitate an understanding of diversity within defined phylogenetic groups. Hence, pangenomes are increasingly used to characterize the strain diversity of prokaryotic species. To understand the interdependence of pangenome features (such as the number of core and accessory genes) and to study the impact of environmental and phylogenetic constraints on the evolution of conspecific strains, we computed pangenomes for 155 phylogenetically diverse species (from ten phyla) using 7,000 high-quality genomes to each of which the respective habitats were assigned. Species habitat ubiquity was associated with several pangenome features. In particular, core-genome size was more important for ubiquity than accessory genome size. In general, environmental preferences had a stronger impact on pangenome evolution than phylogenetic inertia. Environmental preferences explained up to 49% of the variance for pangenome features, compared with 18% by phylogenetic inertia. This observation was robust when the dataset was extended to 10,100 species (59 phyla). The importance of environmental preferences was further accentuated by convergent evolution of pangenome features in a given habitat type across different phylogenetic clades. For example, the soil environment promotes expansion of pangenome size, while host-associated habitats lead to its reduction. Taken together, we explored the global principles of pangenome evolution, quantified the influence of habitat, and phylogenetic inertia on the evolution of pangenomes and identified criteria governing species ubiquity and habitat specificity.}, } @article {pmid32046716, year = {2020}, author = {Badet, T and Oggenfuss, U and Abraham, L and McDonald, BA and Croll, D}, title = {A 19-isolate reference-quality global pangenome for the fungal wheat pathogen Zymoseptoria tritici.}, journal = {BMC biology}, volume = {18}, number = {1}, pages = {12}, pmid = {32046716}, issn = {1741-7007}, mesh = {Ascomycota/*genetics ; *DNA Transposable Elements ; *Genome, Fungal ; Plant Diseases/microbiology ; *Transcriptome ; Triticum/microbiology ; }, abstract = {BACKGROUND: The gene content of a species largely governs its ecological interactions and adaptive potential. A species is therefore defined by both core genes shared between all individuals and accessory genes segregating presence-absence variation. There is growing evidence that eukaryotes, similar to bacteria, show intra-specific variability in gene content. However, it remains largely unknown how functionally relevant such a pangenome structure is for eukaryotes and what mechanisms underlie the emergence of highly polymorphic genome structures.

RESULTS: Here, we establish a reference-quality pangenome of a fungal pathogen of wheat based on 19 complete genomes from isolates sampled across six continents. Zymoseptoria tritici causes substantial worldwide losses to wheat production due to rapidly evolved tolerance to fungicides and evasion of host resistance. We performed transcriptome-assisted annotations of each genome to construct a global pangenome. Major chromosomal rearrangements are segregating within the species and underlie extensive gene presence-absence variation. Conserved orthogroups account for only ~ 60% of the species pangenome. Investigating gene functions, we find that the accessory genome is enriched for pathogenesis-related functions and encodes genes involved in metabolite production, host tissue degradation and manipulation of the immune system. De novo transposon annotation of the 19 complete genomes shows that the highly diverse chromosomal structure is tightly associated with transposable element content. Furthermore, transposable element expansions likely underlie recent genome expansions within the species.

CONCLUSIONS: Taken together, our work establishes a highly complex eukaryotic pangenome providing an unprecedented toolbox to study how pangenome structure impacts crop-pathogen interactions.}, } @article {pmid32046654, year = {2020}, author = {Zwickl, NF and Stralis-Pavese, N and Schäffer, C and Dohm, JC and Himmelbauer, H}, title = {Comparative genome characterization of the periodontal pathogen Tannerella forsythia.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {150}, pmid = {32046654}, issn = {1471-2164}, mesh = {Codon Usage ; *Genome, Bacterial ; Genomic Islands ; Glycosylation ; Phylogeny ; Tannerella forsythia/classification/*genetics/pathogenicity ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: Tannerella forsythia is a bacterial pathogen implicated in periodontal disease. Numerous virulence-associated T. forsythia genes have been described, however, it is necessary to expand the knowledge on T. forsythia's genome structure and genetic repertoire to further elucidate its role within pathogenesis. Tannerella sp. BU063, a putative periodontal health-associated sister taxon and closest known relative to T. forsythia is available for comparative analyses. In the past, strain confusion involving the T. forsythia reference type strain ATCC 43037 led to discrepancies between results obtained from in silico analyses and wet-lab experimentation.

RESULTS: We generated a substantially improved genome assembly of T. forsythia ATCC 43037 covering 99% of the genome in three sequences. Using annotated genomes of ten Tannerella strains we established a soft core genome encompassing 2108 genes, based on orthologs present in > = 80% of the strains analysed. We used a set of known and hypothetical virulence factors for comparisons in pathogenic strains and the putative periodontal health-associated isolate Tannerella sp. BU063 to identify candidate genes promoting T. forsythia's pathogenesis. Searching for pathogenicity islands we detected 38 candidate regions in the T. forsythia genome. Only four of these regions corresponded to previously described pathogenicity islands. While the general protein O-glycosylation gene cluster of T. forsythia ATCC 43037 has been described previously, genes required for the initiation of glycan synthesis are yet to be discovered. We found six putative glycosylation loci which were only partially conserved in other bacteria. Lastly, we performed a comparative analysis of translational bias in T. forsythia and Tannerella sp. BU063 and detected highly biased genes.

CONCLUSIONS: We provide resources and important information on the genomes of Tannerella strains. Comparative analyses enabled us to assess the suitability of T. forsythia virulence factors as therapeutic targets and to suggest novel putative virulence factors. Further, we report on gene loci that should be addressed in the context of elucidating T. forsythia's protein O-glycosylation pathway. In summary, our work paves the way for further molecular dissection of T. forsythia biology in general and virulence of this species in particular.}, } @article {pmid32034321, year = {2020}, author = {Sherman, RM and Salzberg, SL}, title = {Pan-genomics in the human genome era.}, journal = {Nature reviews. Genetics}, volume = {21}, number = {4}, pages = {243-254}, pmid = {32034321}, issn = {1471-0064}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; R01 HL129239/HL/NHLBI NIH HHS/United States ; R35 GM130151/GM/NIGMS NIH HHS/United States ; }, mesh = {Genome, Bacterial ; *Genome, Human ; Genome, Plant ; *Genomics ; Humans ; }, abstract = {Since the early days of the genome era, the scientific community has relied on a single 'reference' genome for each species, which is used as the basis for a wide range of genetic analyses, including studies of variation within and across species. As sequencing costs have dropped, thousands of new genomes have been sequenced, and scientists have come to realize that a single reference genome is inadequate for many purposes. By sampling a diverse set of individuals, one can begin to assemble a pan-genome: a collection of all the DNA sequences that occur in a species. Here we review efforts to create pan-genomes for a range of species, from bacteria to humans, and we further consider the computational methods that have been proposed in order to capture, interpret and compare pan-genome data. As scientists continue to survey and catalogue the genomic variation across human populations and begin to assemble a human pan-genome, these efforts will increase our power to connect variation to human diversity, disease and beyond.}, } @article {pmid32020732, year = {2020}, author = {Zhao, J and Bayer, PE and Ruperao, P and Saxena, RK and Khan, AW and Golicz, AA and Nguyen, HT and Batley, J and Edwards, D and Varshney, RK}, title = {Trait associations in the pangenome of pigeon pea (Cajanus cajan).}, journal = {Plant biotechnology journal}, volume = {18}, number = {9}, pages = {1946-1954}, pmid = {32020732}, issn = {1467-7652}, mesh = {Africa ; Animals ; *Cajanus/genetics ; Columbidae ; India ; Peas/genetics ; }, abstract = {Pigeon pea (Cajanus cajan) is an important orphan crop mainly grown by smallholder farmers in India and Africa. Here, we present the first pigeon pea pangenome based on 89 accessions mainly from India and the Philippines, showing that there is significant genetic diversity in Philippine individuals that is not present in Indian individuals. Annotation of variable genes suggests that they are associated with self-fertilization and response to disease. We identified 225 SNPs associated with nine agronomically important traits over three locations and two different time points, with SNPs associated with genes for transcription factors and kinases. These results will lead the way to an improved pigeon pea breeding programme.}, } @article {pmid32013858, year = {2020}, author = {Zhou, X and Yang, B and Stanton, C and Ross, RP and Zhao, J and Zhang, H and Chen, W}, title = {Comparative analysis of Lactobacillus gasseri from Chinese subjects reveals a new species-level taxa.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {119}, pmid = {32013858}, issn = {1471-2164}, support = {Nos. 31530056, 31820103010, 31801521//National Natural Science Foundation of China/ ; JUSRP11733//Fundamental Research Funds for the Central Universities/ ; JUFSTR20180102//National Firs-Class Discipline Program of Food Science and Technology/ ; }, mesh = {Adult ; Asian Continental Ancestry Group ; Bacterial Proteins/genetics ; Bacteriocins/genetics ; CRISPR-Cas Systems/genetics ; DNA, Bacterial/genetics ; Feces/microbiology ; Genome, Bacterial/genetics ; Genomics/methods ; Humans ; Infant ; Lactobacillus gasseri/*genetics ; Phylogeny ; Probiotics/metabolism ; RNA, Ribosomal, 16S/genetics ; }, abstract = {BACKGROUND: Lactobacillus gasseri as a probiotic has history of safe consumption is prevalent in infants and adults gut microbiota to maintain gut homeostasis.

RESULTS: In this study, to explore the genomic diversity and mine potential probiotic characteristics of L. gasseri, 92 strains of L. gasseri were isolated from Chinese human feces and identified based on 16 s rDNA sequencing, after draft genomes sequencing, further average nucleotide identity (ANI) value and phylogenetic analysis reclassified them as L. paragasseri (n = 79) and L. gasseri (n = 13), respectively. Their pan/core-genomes were determined, revealing that L. paragasseri had an open pan-genome. Comparative analysis was carried out to identify genetic features, and the results indicated that 39 strains of L. paragasseri harboured Type II-A CRISPR-Cas system while 12 strains of L. gasseri contained Type I-E and II-A CRISPR-Cas systems. Bacteriocin operons and the number of carbohydrate-active enzymes were significantly different between the two species.

CONCLUSIONS: This is the first time to study pan/core-genome of L. gasseri and L. paragasseri, and compare their genetic diversity, and all the results provided better understating on genetics of the two species.}, } @article {pmid32006709, year = {2020}, author = {Isidro, J and Ferreira, S and Pinto, M and Domingues, F and Oleastro, M and Gomes, JP and Borges, V}, title = {Virulence and antibiotic resistance plasticity of Arcobacter butzleri: Insights on the genomic diversity of an emerging human pathogen.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {80}, number = {}, pages = {104213}, doi = {10.1016/j.meegid.2020.104213}, pmid = {32006709}, issn = {1567-7257}, abstract = {Arcobacter butzleri is a foodborne emerging human pathogen, frequently displaying a multidrug resistant character. Still, the lack of comprehensive genome-scale comparative analysis has limited our knowledge on A. butzleri diversification and pathogenicity. Here, we performed a deep genome analysis of A. butzleri focused on decoding its core- and pan-genome diversity and specific genetic traits underlying its pathogenic potential and diverse ecology. A. butzleri (genome size 2.07-2.58 Mbp) revealed a large open pan-genome with 7474 genes (about 50% being singletons) and a small but diverse core-genome with 1165 genes. It presents a plastic virulome (including newly identified determinants), marked by the differential presence of multiple adaptation-related virulence factors, such as the urease cluster ureD(AB)CEFG (phenotypically confirmed), the hypervariable hemagglutinin-encoding hecA, a type I secretion system (T1SS) harboring another agglutinin and a novel VirB/D4 T4SS likely linked to interbacterial competition and cytotoxicity. In addition, A. butzleri harbors a large repertoire of efflux pumps (EPs) and other antibiotic resistant determinants. We unprecedentedly describe a genetic mechanism of A. butzleri macrolides resistance, (inactivation of a TetR repressor likely regulating an EP). Fluoroquinolones resistance correlated with Thr-85-Ile in GyrA and ampicillin resistance was linked to an OXA-15-like β-lactamase. Remarkably, by decoding the polymorphism pattern of the main antigen PorA, we show that A. butzleri is able to exchange porA as a whole and/or hypervariable epitope-encoding regions separately, leading to a multitude of chimeric PorA presentations that can impact pathogen-host interaction during infection. Ultimately, our unprecedented screening of short sequence repeats indicates that phase variation likely modulates A. butzleri key adaptive functions. In summary, this study constitutes a turning point on A. butzleri comparative genomics revealing that this human gastrointestinal pathogen is equipped with vast and diverse virulence and antibiotic resistance arsenals that open a multitude of phenotypic fingerprints for environmental/host adaptation and pathogenicity.}, } @article {pmid31982844, year = {2020}, author = {Danilevicz, MF and Tay Fernandez, CG and Marsh, JI and Bayer, PE and Edwards, D}, title = {Plant pangenomics: approaches, applications and advancements.}, journal = {Current opinion in plant biology}, volume = {54}, number = {}, pages = {18-25}, doi = {10.1016/j.pbi.2019.12.005}, pmid = {31982844}, issn = {1879-0356}, mesh = {Biological Evolution ; *Genome, Plant ; *Plants ; }, abstract = {With the assembly of increasing numbers of plant genomes, it is becoming accepted that a single reference assembly does not reflect the gene diversity of a species. The production of pangenomes, which reflect the structural variation and polymorphisms in genomes, enables in depth comparisons of variation within species or higher taxonomic groups. In this review, we discuss the current and emerging approaches for pangenome assembly, analysis and visualisation. In addition, we consider the potential of pangenomes for applied crop improvement, evolutionary and biodiversity studies. To fully exploit the value of pangenomes it is important to integrate broad information such as phenotypic, environmental, and expression data to gain insights into the role of variable regions within genomes.}, } @article {pmid31980727, year = {2020}, author = {Talwar, C and Nagar, S and Kumar, R and Scaria, J and Lal, R and Negi, RK}, title = {Defining the Environmental Adaptations of Genus Devosia: Insights into its Expansive Short Peptide Transport System and Positively Selected Genes.}, journal = {Scientific reports}, volume = {10}, number = {1}, pages = {1151}, pmid = {31980727}, issn = {2045-2322}, mesh = {Adaptation, Physiological ; Bacterial Proteins/*genetics ; Base Composition ; DNA, Bacterial/genetics/isolation & purification ; Environment ; Gene Ontology ; *Genes, Bacterial ; Genome, Bacterial ; Hyphomicrobiaceae/classification/*genetics/metabolism ; Membrane Transport Proteins/*genetics ; Metabolic Networks and Pathways/genetics ; Nutrients/metabolism ; Open Reading Frames ; Operon ; Peptides/metabolism ; Phylogeny ; Selection, Genetic ; Sequence Alignment ; Soil Microbiology ; Soil Pollutants ; Species Specificity ; }, abstract = {Devosia are well known for their dominance in soil habitats contaminated with various toxins and are best characterized for their bioremediation potential. In this study, we compared the genomes of 27 strains of Devosia with aim to understand their metabolic abilities. The analysis revealed their adaptive gene repertoire which was bared from 52% unique pan-gene content. A striking feature of all genomes was the abundance of oligo- and di-peptide permeases (oppABCDF and dppABCDF) with each genome harboring an average of 60.7 ± 19.1 and 36.5 ± 10.6 operon associated genes respectively. Apart from their primary role in nutrition, these permeases may help Devosia to sense environmental signals and in chemotaxis at stressed habitats. Through sequence similarity network analyses, we identified 29 Opp and 19 Dpp sequences that shared very little homology with any other sequence suggesting an expansive short peptidic transport system within Devosia. The substrate determining components of these permeases viz. OppA and DppA further displayed a large diversity that separated into 12 and 9 homologous clusters respectively in addition to large number of isolated nodes. We also dissected the genome scale positive evolution and found genes associated with growth (exopolyphosphatase, HesB_IscA_SufA family protein), detoxification (moeB, nifU-like domain protein, alpha/beta hydrolase), chemotaxis (cheB, luxR) and stress response (phoQ, uspA, luxR, sufE) were positively selected. The study highlights the genomic plasticity of the Devosia spp. for conferring adaptation, bioremediation and the potential to utilize a wide range of substrates. The widespread toxin-antitoxin loci and 'open' state of the pangenome provided evidence of plastic genomes and a much larger genetic repertoire of the genus which is yet uncovered.}, } @article {pmid31980014, year = {2020}, author = {Sanderson, H and Ortega-Polo, R and Zaheer, R and Goji, N and Amoako, KK and Brown, RS and Majury, A and Liss, SN and McAllister, TA}, title = {Comparative genomics of multidrug-resistant Enterococcus spp. isolated from wastewater treatment plants.}, journal = {BMC microbiology}, volume = {20}, number = {1}, pages = {20}, pmid = {31980014}, issn = {1471-2180}, support = {001//Genomics Research and Development Initiative/International ; }, abstract = {BACKGROUND: Wastewater treatment plants (WWTPs) are considered hotspots for the environmental dissemination of antimicrobial resistance (AMR) determinants. Vancomycin-Resistant Enterococcus (VRE) are candidates for gauging the degree of AMR bacteria in wastewater. Enterococcus faecalis and Enterococcus faecium are recognized indicators of fecal contamination in water. Comparative genomics of enterococci isolated from conventional activated sludge (CAS) and biological aerated filter (BAF) WWTPs was conducted.

RESULTS: VRE isolates, including E. faecalis (n = 24), E. faecium (n = 11), E. casseliflavus (n = 2) and E. gallinarum (n = 2) were selected for sequencing based on WWTP source, species and AMR phenotype. The pangenomes of E. faecium and E. faecalis were both open. The genomic fraction related to the mobilome was positively correlated with genome size in E. faecium (p < 0.001) and E. faecalis (p < 0.001) and with the number of AMR genes in E. faecium (p = 0.005). Genes conferring vancomycin resistance, including vanA and vanM (E. faecium), vanG (E. faecalis), and vanC (E. casseliflavus/E. gallinarum), were detected in 20 genomes. The most prominent functional AMR genes were efflux pumps and transporters. A minimum of 16, 6, 5 and 3 virulence genes were detected in E. faecium, E. faecalis, E. casseliflavus and E. gallinarum, respectively. Virulence genes were more common in E. faecalis and E. faecium, than E. casseliflavus and E. gallinarum. A number of mobile genetic elements were shared among species. Functional CRISPR/Cas arrays were detected in 13 E. faecalis genomes, with all but one also containing a prophage. The lack of a functional CRISPR/Cas arrays was associated with multi-drug resistance in E. faecium. Phylogenetic analysis demonstrated differential clustering of isolates based on original source but not WWTP. Genes related to phage and CRISPR/Cas arrays could potentially serve as environmental biomarkers.

CONCLUSIONS: There was no discernible difference between enterococcal genomes from the CAS and BAF WWTPs. E. faecalis and E. faecium have smaller genomes and harbor more virulence, AMR, and mobile genetic elements than other Enterococcus spp.}, } @article {pmid31972312, year = {2020}, author = {Yun, BR and Malik, A and Kim, SB}, title = {Genome based characterization of Kitasatospora sp. MMS16-BH015, a multiple heavy metal resistant soil actinobacterium with high antimicrobial potential.}, journal = {Gene}, volume = {733}, number = {}, pages = {144379}, doi = {10.1016/j.gene.2020.144379}, pmid = {31972312}, issn = {1879-0038}, mesh = {Actinobacteria/classification/*drug effects/*genetics/isolation & purification ; Anti-Bacterial Agents/*pharmacology ; Bacterial Proteins/*genetics ; Drug Resistance, Bacterial/*genetics ; *Genome, Bacterial ; Metals, Heavy/*pharmacology ; Mining ; Multigene Family ; Phylogeny ; Soil Microbiology ; }, abstract = {An actinobacterial strain designated Kitasatospora sp. MMS16-BH015, exhibiting high level of heavy metal resistance, was isolated from soil of an abandoned metal mining site, and its potential for metal resistance and secondary metabolite production was studied. The strain was resistant to multiple heavy metals including zinc (up to 100 mM), nickel (up to 2 mM) and copper (up to 0.8 mM), and also showed antimicrobial potential against a broad group of microorganisms, in particular filamentous fungi. The genome of strain MMS16-BH015 was 8.96 Mbp in size with a G + C content of 72.7%, and contained 7270 protein-coding genes and 107 tRNA/rRNA genes. The genome analysis revealed presence of at least 121 metal resistance related genes, which was prominently higher in strain MMS16-BH015 compared to other genomes of Kitasatospora. The genes included those for proteins representing various families involved in the transport of heavy metals, for example dipeptide transport ATP-binding proteins, high-affinity nickel transport proteins, and P-type heavy metal-transporting ATPases. Additionally, 43 biosynthetic gene clusters (BGCs) for secondary metabolites, enriched with those for non-ribosomal peptides, were detected in this multiple heavy metal resistant actinobacterium, which was again the highest among the compared genomes of Kitasatospora. The pan-genome analysis also identified higher numbers of unique genes related to secondary metabolite production and metal resistance mechanism in strain MMS16-BH015. A high level of correlation between the biosynthetic potential and heavy metal resistance could be observed, thus indicating that heavy metal resistant actinobacteria can be a promising source of bioactive compounds.}, } @article {pmid31965706, year = {2020}, author = {Wang, L and Luo, Y and Zhao, Y and Gao, GF and Bi, Y and Qiu, HJ}, title = {Comparative genomic analysis reveals an 'open' pan-genome of African swine fever virus.}, journal = {Transboundary and emerging diseases}, volume = {67}, number = {4}, pages = {1553-1562}, doi = {10.1111/tbed.13489}, pmid = {31965706}, issn = {1865-1682}, support = {2018YFC0840401//National Key Research and Development Program of China/ ; XDB29010102//Strategic Priority Research Program of the Chinese Academy of Sciences (CAS)/ ; KJZD-SW-L06-01//Intramural Special Grants for African Swine Fever Research from the Chinese Academy of Sciences/ ; 31941003//National Natural Science Foundation (NSFC) of China/ ; Y2019YJ07-02//Central Public-interest Scientific Institution Basal Research Fund of China/ ; 81621091//NSFC Innovative Research Group/ ; 31822055//NSFC Outstanding Young Scholars/ ; 2017122//Youth Innovation Promotion Association of CAS/ ; }, mesh = {African Swine Fever/*virology ; African Swine Fever Virus/*genetics ; Animals ; DNA, Viral/*genetics ; Genome, Viral/*genetics ; Genome-Wide Association Study ; Genomics ; Polymorphism, Genetic ; Sequence Analysis, DNA ; Swine ; Swine Diseases/*virology ; Viral Proteins/*genetics ; Virulence ; }, abstract = {The worldwide transmission of African swine fever virus (ASFV) drastically affects the pig industry and global trade. Development of vaccines is hindered by the lack of knowledge of the genomic characteristics of ASFV. In this study, we developed a pipeline for the de novo assembly of ASFV genome without virus isolation and purification. We then used a comparative genomics approach to systematically study 46 genomes of ASFVs to reveal the genomic characteristics. The analysis revealed that ASFV has an 'open' pan-genome based on both protein-coding genes and intergenic regions. Of the 151-174 genes found in the ASFV strains, only 86 were identified as core genes; the remainder were flexible accessory genes. Notably, 44 of the 86 core genes and 155 of the 324 accessory genes have been functionally annotated according to the known proteins. Interestingly, a dynamic number of taxis-related genes were identified in the accessory genes, and two potential virulence genes were identified in all ASFV isolates. The 'open' pan-genome of ASFV based on gene and intergenic regions reveals its pronounced natural diversity concerning genomic composition and regulation.}, } @article {pmid31956321, year = {2019}, author = {Alexandraki, V and Kazou, M and Blom, J and Pot, B and Papadimitriou, K and Tsakalidou, E}, title = {Comparative Genomics of Streptococcus thermophilus Support Important Traits Concerning the Evolution, Biology and Technological Properties of the Species.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2916}, pmid = {31956321}, issn = {1664-302X}, abstract = {Streptococcus thermophilus is a major starter for the dairy industry with great economic importance. In this study we analyzed 23 fully sequenced genomes of S. thermophilus to highlight novel aspects of the evolution, biology and technological properties of this species. Pan/core genome analysis revealed that the species has an important number of conserved genes and that the pan genome is probably going to be closed soon. According to whole genome phylogeny and average nucleotide identity (ANI) analysis, most S. thermophilus strains were grouped in two major clusters (i.e., clusters A and B). More specifically, cluster A includes strains with chromosomes above 1.83 Mbp, while cluster B includes chromosomes below this threshold. This observation suggests that strains belonging to the two clusters may be differentiated by gene gain or gene loss events. Furthermore, certain strains of cluster A could be further subdivided in subgroups, i.e., subgroup I (ASCC 1275, DGCC 7710, KLDS SM, MN-BM-A02, and ND07), II (MN-BM-A01 and MN-ZLW-002), III (LMD-9 and SMQ-301), and IV (APC151 and ND03). In cluster B certain strains formed one distinct subgroup, i.e., subgroup I (CNRZ1066, CS8, EPS, and S9). Clusters and subgroups observed for S. thermophilus indicate the existence of lineages within the species, an observation which was further supported to a variable degree by the distribution and/or the architecture of several genomic traits. These would include exopolysaccharide (EPS) gene clusters, Clustered Regularly Interspaced Short Palindromic Repeats (CRISPRs)-CRISPR associated (Cas) systems, as well as restriction-modification (R-M) systems and genomic islands (GIs). Of note, the histidine biosynthetic cluster was found present in all cluster A strains (plus strain NCTC12958T) but was absent from all strains in cluster B. Other loci related to lactose/galactose catabolism and urea metabolism, aminopeptidases, the majority of amino acid and peptide transporters, as well as amino acid biosynthetic pathways were found to be conserved in all strains suggesting their central role for the species. Our study highlights the necessity of sequencing and analyzing more S. thermophilus complete genomes to further elucidate important aspects of strain diversity within this starter culture that may be related to its application in the dairy industry.}, } @article {pmid31954181, year = {2020}, author = {Lannes-Costa, PS and Baraúna, RA and Ramos, JN and Veras, JFC and Conceição, MVR and Vieira, VV and de Mattos-Guaraldi, AL and Ramos, RTJ and Doran, KS and Silva, A and Nagao, PE}, title = {Comparative genomic analysis and identification of pathogenicity islands of hypervirulent ST-17 Streptococcus agalactiae Brazilian strain.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {80}, number = {}, pages = {104195}, doi = {10.1016/j.meegid.2020.104195}, pmid = {31954181}, issn = {1567-7257}, abstract = {Streptococcus agalactiae are important pathogenic bacteria that cause severe infections in humans, especially neonates. The mechanism by which ST-17 causes invasive infections than other STs is not well understood. In this study, we sequenced the first genome of a S. agalactiae ST-17 strain isolated in Brazil using the Illumina HiSeq 2500 technology. S. agalactiae GBS90356 ST-17 belongs to the capsular type III and was isolated from a neonatal with a fatal case of meningitis. The genome presented a size of 2.03 Mbp and a G + C content of 35.2%. S. agalactiae has 706 genes in its core genome and an open pan-genome with a size of 5.020 genes, suggesting a high genomic plasticity. GIPSy software was used to identify 10 Pathogenicity islands (PAIs) which corresponded to 15% of the genome size. IslandViewer4 corroborated the prediction of six PAIs. The pathogenicity islands showed important virulence factors genes for S. agalactiae e.g. neu, cps, dlt, fbs, cfb, lmb. SignalP detected 20 proteins with signal peptides among the 352 proteins found in PAIs, which 60% were located in the SagPAI_5. SagPAI_2 and 5 were mainly detected in ST-17 strains studied. Moreover, we identified 51 unique genes, 9 recombination regions and a large number of SNPs with an average of 760.3 polymorphisms, which can be related with high genomic plasticity and virulence during host-pathogen interactions. Our results showed implications for pathogenesis, evolution, concept of species and in silico analysis value to understand the epidemiology and genome plasticity of S. agalactiae.}, } @article {pmid31950028, year = {2019}, author = {Ying, J and Ye, J and Xu, T and Wang, Q and Bao, Q and Li, A}, title = {Comparative Genomic Analysis of Rhodococcus equi: An Insight into Genomic Diversity and Genome Evolution.}, journal = {International journal of genomics}, volume = {2019}, number = {}, pages = {8987436}, pmid = {31950028}, issn = {2314-4378}, abstract = {Rhodococcus equi, a member of the Rhodococcus genus, is a gram-positive pathogenic bacterium. Rhodococcus possesses an open pan-genome that constitutes the basis of its high genomic diversity and allows for adaptation to specific niche conditions and the changing host environments. Our analysis further showed that the core genome of R. equi contributes to the pathogenicity and niche adaptation of R. equi. Comparative genomic analysis revealed that the genomes of R. equi shared identical collinearity relationship, and heterogeneity was mainly acquired by means of genomic islands and prophages. Moreover, genomic islands in R. equi were always involved in virulence, resistance, or niche adaptation and possibly working with prophages to cause the majority of genome expansion. These findings provide an insight into the genomic diversity, evolution, and structural variation of R. equi and a valuable resource for functional genomic studies.}, } @article {pmid31948633, year = {2020}, author = {Mataragas, M}, title = {Investigation of genomic characteristics and carbohydrates' metabolic activity of Lactococcus lactis subsp. lactis during ripening of a Swiss-type cheese.}, journal = {Food microbiology}, volume = {87}, number = {}, pages = {103392}, doi = {10.1016/j.fm.2019.103392}, pmid = {31948633}, issn = {1095-9998}, mesh = {*Carbohydrate Metabolism ; Carbohydrates/chemistry ; Cheese/*microbiology ; Fermentation ; Food Microbiology ; Genetic Variation ; Genomics ; Lactococcus lactis/classification/*genetics/isolation & purification/*metabolism ; Phylogeny ; }, abstract = {Genetic diversity and metabolic properties of Lactococcus lactis subsp. lactis were explored using phylogenetic, pan-genomic and metatranscriptomic analysis. The genomes, used in the current study, were available and downloaded from the GenBank which were primarily related with microorganisms isolated from dairy products and secondarily from other foodstuffs. To study the genetic diversity of the microorganism, various bioinformatics tools were employed such as average nucleotide identity, digital DNA-DNA hybridization, phylogenetic analysis, clusters of orthologous groups analysis, KEGG orthology analysis and pan-genomic analysis. The results showed that Lc. lactis subsp. lactis strains cannot be sufficiently separated into phylogenetic lineages based on the 16S rRNA gene sequences and core genome-based phylogenetic analysis was more appropriate. Pan-genomic analysis of the strains indicated that the core, accessory and unique genome comprised of 1036, 3146 and 1296 genes, respectively. Considering the results of pan-genomic and KEGG orthology analyses, the metabolic network of Lc. lactis subsp. lactis was rebuild regarding its carbohydrates' metabolic capabilities. Based on the metatranscriptomic data during the ripening of the Swiss-type Maasdam cheese at 20 °C and 5 °C, it was shown that the microorganism performed mixed acid fermentation producing lactate, formate, acetate, ethanol and 2,3-butanediol. Mixed acid fermentation was more pronounced at higher ripening temperatures. At lower ripening temperatures, the genes involved in mixed acid fermentation were repressed while lactate production remained unaffected resembling to a homolactic fermentation. Comparative genomics and metatranscriptomic analysis are powerful tools to gain knowledge on the genomic diversity of the lactic acid bacteria used as starter cultures as well as on the metabolic activities occurring in fermented dairy products.}, } @article {pmid31943080, year = {2020}, author = {Yu, J and Xiang, X and Huang, J and Liang, X and Pan, X and Dong, Z and Petersen, TS and Qu, K and Yang, L and Zhao, X and Li, S and Zheng, T and Xu, Z and Liu, C and Han, P and Xu, F and Yang, H and Liu, X and Zhang, X and Bolund, L and Luo, Y and Lin, L}, title = {Haplotyping by CRISPR-mediated DNA circularization (CRISPR-hapC) broadens allele-specific gene editing.}, journal = {Nucleic acids research}, volume = {48}, number = {5}, pages = {e25}, pmid = {31943080}, issn = {1362-4962}, mesh = {Alleles ; Base Sequence ; CRISPR-Associated Protein 9/*genetics/metabolism ; *CRISPR-Cas Systems ; Cell Line, Tumor ; *Clustered Regularly Interspaced Short Palindromic Repeats ; DNA, Circular/*genetics/metabolism ; Gene Editing/methods ; HEK293 Cells ; Haplotypes ; Hep G2 Cells ; Humans ; Plasmids/chemistry/metabolism ; RNA, Guide/*genetics/metabolism ; }, abstract = {Allele-specific protospacer adjacent motif (asPAM)-positioning SNPs and CRISPRs are valuable resources for gene therapy of dominant disorders. However, one technical hurdle is to identify the haplotype comprising the disease-causing allele and the distal asPAM SNPs. Here, we describe a novel CRISPR-based method (CRISPR-hapC) for haplotyping. Based on the generation (with a pair of CRISPRs) of extrachromosomal circular DNA in cells, the CRISPR-hapC can map haplotypes from a few hundred bases to over 200 Mb. To streamline and demonstrate the applicability of the CRISPR-hapC and asPAM CRISPR for allele-specific gene editing, we reanalyzed the 1000 human pan-genome and generated a high frequency asPAM SNP and CRISPR database (www.crispratlas.com/knockout) for four CRISPR systems (SaCas9, SpCas9, xCas9 and Cas12a). Using the huntingtin (HTT) CAG expansion and transthyretin (TTR) exon 2 mutation as examples, we showed that the asPAM CRISPRs can specifically discriminate active and dead PAMs for all 23 loci tested. Combination of the CRISPR-hapC and asPAM CRISPRs further demonstrated the capability for achieving highly accurate and haplotype-specific deletion of the HTT CAG expansion allele and TTR exon 2 mutation in human cells. Taken together, our study provides a new approach and an important resource for genome research and allele-specific (haplotype-specific) gene therapy.}, } @article {pmid31941435, year = {2020}, author = {He, Y and Zhou, X and Chen, Z and Deng, X and Gehring, A and Ou, H and Zhang, L and Shi, X}, title = {PRAP: Pan Resistome analysis pipeline.}, journal = {BMC bioinformatics}, volume = {21}, number = {1}, pages = {20}, pmid = {31941435}, issn = {1471-2105}, support = {2017YFC1601200//Key Technologies Research and Development Program/ ; 31601562//National Natural Science Foundation of China/ ; }, mesh = {Alleles ; China ; Drug Resistance, Microbial/*genetics ; Salmonella enterica/genetics ; *Software ; Whole Genome Sequencing ; }, abstract = {BACKGROUND: Antibiotic resistance genes (ARGs) can spread among pathogens via horizontal gene transfer, resulting in imparities in their distribution even within the same species. Therefore, a pan-genome approach to analyzing resistomes is necessary for thoroughly characterizing patterns of ARGs distribution within particular pathogen populations. Software tools are readily available for either ARGs identification or pan-genome analysis, but few exist to combine the two functions.

RESULTS: We developed Pan Resistome Analysis Pipeline (PRAP) for the rapid identification of antibiotic resistance genes from various formats of whole genome sequences based on the CARD or ResFinder databases. Detailed annotations were used to analyze pan-resistome features and characterize distributions of ARGs. The contribution of different alleles to antibiotic resistance was predicted by a random forest classifier. Results of analysis were presented in browsable files along with a variety of visualization options. We demonstrated the performance of PRAP by analyzing the genomes of 26 Salmonella enterica isolates from Shanghai, China.

CONCLUSIONS: PRAP was effective for identifying ARGs and visualizing pan-resistome features, therefore facilitating pan-genomic investigation of ARGs. This tool has the ability to further excavate potential relationships between antibiotic resistance genes and their phenotypic traits.}, } @article {pmid31937675, year = {2020}, author = {Park, CJ and Andam, CP}, title = {Distinct but Intertwined Evolutionary Histories of Multiple Salmonella enterica Subspecies.}, journal = {mSystems}, volume = {5}, number = {1}, pages = {}, pmid = {31937675}, issn = {2379-5077}, abstract = {Salmonella is responsible for many nontyphoidal foodborne infections and enteric (typhoid) fever in humans. Of the two Salmonella species, Salmonella enterica is highly diverse and includes 10 known subspecies and approximately 2,600 serotypes. Understanding the evolutionary processes that generate the tremendous diversity in Salmonella is important in reducing and controlling the incidence of disease outbreaks and the emergence of virulent strains. In this study, we aim to elucidate the impact of homologous recombination in the diversification of S. enterica subspecies. Using a data set of previously published 926 Salmonella genomes representing the 10 S. enterica subspecies and Salmonella bongori, we calculated a genus-wide pan-genome composed of 84,041 genes and the S. enterica pan-genome of 81,371 genes. The size of the accessory genomes varies between 12,429 genes in S. enterica subsp. arizonae (subsp. IIIa) to 33,257 genes in S. enterica subsp. enterica (subsp. I). A total of 12,136 genes in the Salmonella pan-genome show evidence of recombination, representing 14.44% of the pan-genome. We identified genomic hot spots of recombination that include genes associated with flagellin and the synthesis of methionine and thiamine pyrophosphate, which are known to influence host adaptation and virulence. Last, we uncovered within-species heterogeneity in rates of recombination and preferential genetic exchange between certain donor and recipient strains. Frequent but biased recombination within a bacterial species may suggest that lineages vary in their response to environmental selection pressure. Certain lineages, such as the more uncommon non-enterica subspecies (non-S. enterica subsp. enterica), may also act as a major reservoir of genetic diversity for the wider population.IMPORTANCES. enterica is a major foodborne pathogen, which can be transmitted via several distinct routes from animals and environmental sources to human hosts. Multiple subspecies and serotypes of S. enterica exhibit considerable differences in virulence, host specificity, and colonization. This study provides detailed insights into the dynamics of recombination and its contributions to S. enterica subspecies evolution. Widespread recombination within the species means that new adaptations arising in one lineage can be rapidly transferred to another lineage. We therefore predict that recombination has been an important factor in the emergence of several major disease-causing strains from diverse genomic backgrounds and their ability to adapt to disparate environments.}, } @article {pmid31935184, year = {2020}, author = {Nakamura, K and Murase, K and Sato, MP and Toyoda, A and Itoh, T and Mainil, JG and Piérard, D and Yoshino, S and Kimata, K and Isobe, J and Seto, K and Etoh, Y and Narimatsu, H and Saito, S and Yatsuyanagi, J and Lee, K and Iyoda, S and Ohnishi, M and Ooka, T and Gotoh, Y and Ogura, Y and Hayashi, T}, title = {Differential dynamics and impacts of prophages and plasmids on the pangenome and virulence factor repertoires of Shiga toxin-producing Escherichia coli O145:H28.}, journal = {Microbial genomics}, volume = {6}, number = {1}, pages = {}, pmid = {31935184}, issn = {2057-5858}, mesh = {*Genome, Bacterial ; Phylogeny ; *Plasmids ; Polymorphism, Single Nucleotide ; *Prophages ; Shiga-Toxigenic Escherichia coli/*genetics ; *Siphoviridae ; Virulence Factors/*genetics ; }, abstract = {Phages and plasmids play important roles in bacterial evolution and diversification. Although many draft genomes have been generated, phage and plasmid genomes are usually fragmented, limiting our understanding of their dynamics. Here, we performed a systematic analysis of 239 draft genomes and 7 complete genomes of Shiga toxin (Stx)-producing Escherichia coli O145:H28, the major virulence factors of which are encoded by prophages (PPs) or plasmids. The results indicated that PPs are more stably maintained than plasmids. A set of ancestrally acquired PPs was well conserved, while various PPs, including Stx phages, were acquired by multiple sublineages. In contrast, gains and losses of a wide range of plasmids have frequently occurred across the O145:H28 lineage, and only the virulence plasmid was well conserved. The different dynamics of PPs and plasmids have differentially impacted the pangenome of O145:H28, with high proportions of PP- and plasmid-associated genes in the variably present and rare gene fractions, respectively. The dynamics of PPs and plasmids have also strongly impacted virulence gene repertoires, such as the highly variable distribution of stx genes and the high conservation of a set of type III secretion effectors, which probably represents the core effectors of O145:H28 and the genes on the virulence plasmid in the entire O145:H28 population. These results provide detailed insights into the dynamics of PPs and plasmids, and show the application of genomic analyses using a large set of draft genomes and appropriately selected complete genomes.}, } @article {pmid31934876, year = {2020}, author = {Tetz, VV and Tetz, GV}, title = {A new biological definition of life.}, journal = {Biomolecular concepts}, volume = {11}, number = {1}, pages = {1-6}, doi = {10.1515/bmc-2020-0001}, pmid = {31934876}, issn = {1868-503X}, mesh = {Biological Evolution ; Genes/*physiology ; Heredity/*genetics ; *Life ; Models, Biological ; Models, Theoretical ; }, abstract = {Here we have proposed a new biological definition of life based on the function and reproduction of existing genes and creation of new ones, which is applicable to both unicellular and multicellular organisms. First, we coined a new term "genetic information metabolism" comprising functioning, reproduction, and creation of genes and their distribution among living and non-living carriers of genetic information. Encompassing this concept, life is defined as organized matter that provides genetic information metabolism. Additionally, we have articulated the general biological function of life as Tetz biological law: "General biological function of life is to provide genetic information metabolism" and formulated novel definition of life: "Life is an organized matter that provides genetic information metabolism". New definition of life and Tetz biological law allow to distinguish in a new way living and non-living objects on Earth and other planets based on providing genetic information metabolism.}, } @article {pmid31932676, year = {2020}, author = {Song, JM and Guan, Z and Hu, J and Guo, C and Yang, Z and Wang, S and Liu, D and Wang, B and Lu, S and Zhou, R and Xie, WZ and Cheng, Y and Zhang, Y and Liu, K and Yang, QY and Chen, LL and Guo, L}, title = {Eight high-quality genomes reveal pan-genome architecture and ecotype differentiation of Brassica napus.}, journal = {Nature plants}, volume = {6}, number = {1}, pages = {34-45}, pmid = {31932676}, issn = {2055-0278}, mesh = {Brassica napus/*genetics ; Chromosome Mapping ; *Ecotype ; Flowers/genetics ; *Genome, Plant ; Genome-Wide Association Study ; Polymorphism, Single Nucleotide ; Seeds/genetics ; }, abstract = {Rapeseed (Brassica napus) is the second most important oilseed crop in the world but the genetic diversity underlying its massive phenotypic variations remains largely unexplored. Here, we report the sequencing, de novo assembly and annotation of eight B. napus accessions. Using pan-genome comparative analysis, millions of small variations and 77.2-149.6 megabase presence and absence variations (PAVs) were identified. More than 9.4% of the genes contained large-effect mutations or structural variations. PAV-based genome-wide association study (PAV-GWAS) directly identified causal structural variations for silique length, seed weight and flowering time in a nested association mapping population with ZS11 (reference line) as the donor, which were not detected by single-nucleotide polymorphisms-based GWAS (SNP-GWAS), demonstrating that PAV-GWAS was complementary to SNP-GWAS in identifying associations to traits. Further analysis showed that PAVs in three FLOWERING LOCUS C genes were closely related to flowering time and ecotype differentiation. This study provides resources to support a better understanding of the genome architecture and acceleration of the genetic improvement of B. napus.}, } @article {pmid31924165, year = {2020}, author = {Jaiswal, AK and Tiwari, S and Jamal, SB and de Castro Oliveira, L and Alves, LG and Azevedo, V and Ghosh, P and Oliveira, CJF and Soares, SC}, title = {The pan-genome of Treponema pallidum reveals differences in genome plasticity between subspecies related to venereal and non-venereal syphilis.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {33}, pmid = {31924165}, issn = {1471-2164}, mesh = {Genome, Bacterial/genetics ; Genomic Islands/genetics ; Humans ; Phylogeny ; Syphilis/*microbiology ; Treponema pallidum/classification/*genetics ; }, abstract = {BACKGROUND: Spirochetal organisms of the Treponema genus are responsible for causing Treponematoses. Pathogenic treponemes is a Gram-negative, motile, spirochete pathogen that causes syphilis in human. Treponema pallidum subsp. endemicum (TEN) causes endemic syphilis (bejel); T. pallidum subsp. pallidum (TPA) causes venereal syphilis; T. pallidum subsp. pertenue (TPE) causes yaws; and T. pallidum subsp. Ccarateum causes pinta. Out of these four high morbidity diseases, venereal syphilis is mediated by sexual contact; the other three diseases are transmitted by close personal contact. The global distribution of syphilis is alarming and there is an increasing need of proper treatment and preventive measures. Unfortunately, effective measures are limited.

RESULTS: Here, the genome sequences of 53 T. pallidum strains isolated from different parts of the world and a diverse range of hosts were comparatively analysed using pan-genomic strategy. Phylogenomic, pan-genomic, core genomic and singleton analysis disclosed the close connection among all strains of the pathogen T. pallidum, its clonal behaviour and showed increases in the sizes of the pan-genome. Based on the genome plasticity analysis of the subsets containing the subspecies T pallidum subsp. pallidum, T. pallidum subsp. endemicum and T. pallidum subsp. pertenue, we found differences in the presence/absence of pathogenicity islands (PAIs) and genomic islands (GIs) on subsp.-based study.

CONCLUSIONS: In summary, we identified four pathogenicity islands (PAIs), eight genomic islands (GIs) in subsp. pallidum, whereas subsp. endemicum has three PAIs and seven GIs and subsp. pertenue harbours three PAIs and eight GIs. Concerning the presence of genes in PAIs and GIs, we found some genes related to lipid and amino acid biosynthesis that were only present in the subsp. of T. pallidum, compared to T. pallidum subsp. endemicum and T. pallidum subsp. pertenue.}, } @article {pmid31917358, year = {2020}, author = {Si-Tuan, N and Ngoc, HM and Nhat, LD and Nguyen, C and Pham, HQ and Huong, NT}, title = {Genomic features, whole-genome phylogenetic and comparative genomic analysis of extreme-drug-resistant ventilator-associated-pneumonia Acinetobacter baumannii strain in a Vietnam hospital.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {80}, number = {}, pages = {104178}, doi = {10.1016/j.meegid.2020.104178}, pmid = {31917358}, issn = {1567-7257}, abstract = {OBJECTIVES: Acinetobacter baumannii is a major cause of ventilator-associated-pneumonia (VAP) worldwide due to its impressive propensity to rapidly acquire resistance elements to a wide range of antibacterial agents. We sought to explore the genomic features of this pathogen from a sputum specimen of a VAP male patient.

METHODS: Whole genome analysis of A. baumannii DMS06670 included de novo assembly; functional annotation, whole-genome-phylogenetic analysis, antibiotics genes identification, prophage regions, virulent factor and pan-genome analysis.

RESULTS: Assembly of whole-genome shotgun sequences of strain DMS06670 yielded an estimated genome size of 3.8 Mb with Sequence Type 447. Functional annotation and orthologous protein cluster analysis identified several potential antibiotic resistance genes was conducted (with 1 novel gene), prophage regions, virulent factors. The clusters of orthologous groups (COGs) analysis in protein sequence of the A. baumannii strain was compared with the other five genomes showed that the orthologous protein clusters responsible for multi-drug exist inside highly antimicrobial resistant strains. Whole-genome phylogenetic and in silico MLST analysis revealed that this A. baumannii strain is in the same clade as strains LAC-4 and BJAB0715. Comparative analysis of 23 available genomes of A. baumannii revealed a pan-genome consisting of 15,883 genes.

CONCLUSION: Our findings provide insight into the virulence-associated genes and then compared with the genomes of other A. baumannii strains by calculation of ANI values and pan-genome analysis. Functional studies of these pathogens are required to validate these findings.}, } @article {pmid31914919, year = {2020}, author = {Rodriguez, CI and Martiny, JBH}, title = {Evolutionary relationships among bifidobacteria and their hosts and environments.}, journal = {BMC genomics}, volume = {21}, number = {1}, pages = {26}, pmid = {31914919}, issn = {1471-2164}, support = {na//University of California, Irvine/ ; na//University of California, Irvine/ ; }, mesh = {Animals ; Bifidobacterium/classification/*genetics ; Biological Evolution ; Gastrointestinal Microbiome/genetics ; Genome, Bacterial/genetics ; Humans ; Phylogeny ; }, abstract = {BACKGROUND: The assembly of animal microbiomes is influenced by multiple environmental factors and host genetics, although the relative importance of these factors remains unclear. Bifidobacteria (genus Bifidobacterium, phylum Actinobacteria) are common first colonizers of gut microbiomes in humans and inhabit other mammals, social insects, food, and sewages. In humans, the presence of bifidobacteria in the gut has been correlated with health-promoting benefits. Here, we compared the genome sequences of a subset of the over 400 Bifidobacterium strains publicly available to investigate the adaptation of bifidobacteria diversity. We tested 1) whether bifidobacteria show a phylogenetic signal with their isolation sources (hosts and environments) and 2) whether key traits encoded by the bifidobacteria genomes depend on the host or environment from which they were isolated. We analyzed Bifidobacterium genomes available in the PATRIC and NCBI repositories and identified the hosts and/or environment from which they were isolated. A multilocus phylogenetic analysis was conducted to compare the genetic relatedness the strains harbored by different hosts and environments. Furthermore, we examined differences in genomic traits and genes related to amino acid biosynthesis and degradation of carbohydrates.

RESULTS: We found that bifidobacteria diversity appears to have evolved with their hosts as strains isolated from the same host were non-randomly associated with their phylogenetic relatedness. Moreover, bifidobacteria isolated from different sources displayed differences in genomic traits such as genome size and accessory gene composition and on particular traits related to amino acid production and degradation of carbohydrates. In contrast, when analyzing diversity within human-derived bifidobacteria, we observed no phylogenetic signal or differences on specific traits (amino acid biosynthesis genes and CAZymes).

CONCLUSIONS: Overall, our study shows that bifidobacteria diversity is strongly adapted to specific hosts and environments and that several genomic traits were associated with their isolation sources. However, this signal is not observed in human-derived strains alone. Looking into the genomic signatures of bifidobacteria strains in different environments can give insights into how this bacterial group adapts to their environment and what types of traits are important for these adaptations.}, } @article {pmid31899322, year = {2020}, author = {Garcia Teijeiro, R and Belimov, AA and Dodd, IC}, title = {Microbial inoculum development for ameliorating crop drought stress: A case study of Variovorax paradoxus 5C-2.}, journal = {New biotechnology}, volume = {56}, number = {}, pages = {103-113}, doi = {10.1016/j.nbt.2019.12.006}, pmid = {31899322}, issn = {1876-4347}, mesh = {Agricultural Inoculants/*metabolism ; Comamonadaceae/*metabolism ; Crops, Agricultural/growth & development/metabolism/*microbiology ; *Droughts ; Rhizosphere ; *Stress, Physiological ; }, abstract = {Drought affects plant hormonal homeostasis, including root to shoot signalling. The plant is intimately connected below-ground with soil-dwelling microbes, including plant growth promoting rhizobacteria (PGPR) that can modulate plant hormonal homeostasis. Incorporating PGPR into the rhizosphere often delivers favourable results in greenhouse experiments, while field applications are much less predictable. We review the natural processes that affect the formation and dynamics of the rhizosphere, establishing a model for successful field application of PGPR utilizing an example microbial inoculum, Variovorax paradoxus 5C-2.}, } @article {pmid31897516, year = {2020}, author = {Rasheed, A and Takumi, S and Hassan, MA and Imtiaz, M and Ali, M and Morgunov, AI and Mahmood, T and He, Z}, title = {Appraisal of wheat genomics for gene discovery and breeding applications: a special emphasis on advances in Asia.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {133}, number = {5}, pages = {1503-1520}, doi = {10.1007/s00122-019-03523-w}, pmid = {31897516}, issn = {1432-2242}, support = {31950410563//National Natural Science Foundation of China-Yunnan Joint Fund/ ; 2019//National Natural Science Foundation of China/ ; 16H04862//Grant-in-Aids for Scientific Research/ ; 19H04863//Scientific Research on Innovative Areas/ ; }, abstract = {KEY MESSAGE: We discussed the most recent efforts in wheat functional genomics to discover new genes and their deployment in breeding with special emphasis on advances in Asian countries. Wheat research community is making significant progress to bridge genotype-to-phenotype gap and then applying this knowledge in genetic improvement. The advances in genomics and phenomics have intrigued wheat researchers in Asia to make best use of this knowledge in gene and trait discovery. These advancements include, but not limited to, map-based gene cloning, translational genomics, gene mapping, association genetics, gene editing and genomic selection. We reviewed more than 57 homeologous genes discovered underpinning important traits and multiple strategies used for their discovery. Further, the complementary advancements in wheat phenomics and analytical approaches to understand the genetics of wheat adaptability, resilience to climate extremes and resistance to pest and diseases were discussed. The challenge to build a gold standard reference genome sequence of bread wheat is now achieved and several de novo reference sequences from the cultivars representing different gene pools will be available soon. New pan-genome sequencing resources of wheat will strengthen the foundation required for accelerated gene discovery and provide more opportunities to practice the knowledge-based breeding.}, } @article {pmid31896243, year = {2019}, author = {Sulthana, A and Lakshmi, SG and Madempudi, RS}, title = {High-quality draft genome and characterization of commercially potent probiotic Lactobacillus strains.}, journal = {Genomics & informatics}, volume = {17}, number = {4}, pages = {e43}, pmid = {31896243}, issn = {1598-866X}, abstract = {Lactobacillus acidophilus UBLA-34, L. paracasei UBLPC-35, L. plantarum UBLP-40, and L. reuteri UBLRU-87 were isolated from different varieties of fermented foods. To determine the probiotic safety at the strain level, the whole genome of the respective strains was sequenced, assembled, and characterized. Both the core-genome and pan-genome phylogeny showed that L. reuteri was closest to L. plantarum than to L. acidophilus, which was closest to L. paracasei. The genomic analysis of all the strains confirmed the absence of genes encoding putative virulence factors, antibiotic resistance, and the plasmids.}, } @article {pmid31893442, year = {2020}, author = {Hu, H and Yuan, Y and Bayer, PE and Fernandez, CT and Scheben, A and Golicz, AA and Edwards, D}, title = {Legume Pangenome Construction Using an Iterative Mapping and Assembly Approach.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2107}, number = {}, pages = {35-47}, doi = {10.1007/978-1-0716-0235-5_3}, pmid = {31893442}, issn = {1940-6029}, abstract = {A pangenome is a collection of genomic sequences found in the entire species rather than a single individual. It allows for comprehensive, species-wide characterization of genetic variations and mining of variable genes which may play important roles in phenotypes of interest. Recent advances in sequencing technologies have facilitated draft genome sequence construction and have made pangenome constructions feasible. Here, we present a reference genome-based iterative mapping and assembly method to construct a pangenome for a legume species.}, } @article {pmid31891864, year = {2020}, author = {Kim, Y and Gu, C and Kim, HU and Lee, SY}, title = {Current status of pan-genome analysis for pathogenic bacteria.}, journal = {Current opinion in biotechnology}, volume = {63}, number = {}, pages = {54-62}, doi = {10.1016/j.copbio.2019.12.001}, pmid = {31891864}, issn = {1879-0429}, mesh = {*Bacteria/genetics ; *Genome ; Genome, Bacterial/genetics ; }, abstract = {Biological knowledge accumulated over the decades and advances in computational methods have facilitated the implementation of pan-genome analysis that aims at better understanding of genotype-phenotype associations of a specific group of organisms. Pan-genome analysis has been shown to be an effective approach to better understand a clade of pathogenic bacteria because it helps developing various and tailored therapeutic strategies on the basis of their biological similarities and differences. Here, we review recent progress in the pan-genome analysis of pathogenic bacteria. In particular, we focus on computational tools that allow streamlined pan-genome analysis. Also, various applications of pan-genome analysis including those relevant to devising strategies for the prevention and treatment of pathogenic bacteria are reviewed.}, } @article {pmid31884971, year = {2019}, author = {Coutinho, FH and Edwards, RA and Rodríguez-Valera, F}, title = {Charting the diversity of uncultured viruses of Archaea and Bacteria.}, journal = {BMC biology}, volume = {17}, number = {1}, pages = {109}, pmid = {31884971}, issn = {1741-7007}, mesh = {Archaea/*virology ; Bacteria/*virology ; Bacteriophages/*genetics ; *Genome, Viral ; *Microbiota ; Phylogeny ; }, abstract = {BACKGROUND: Viruses of Archaea and Bacteria are among the most abundant and diverse biological entities on Earth. Unraveling their biodiversity has been challenging due to methodological limitations. Recent advances in culture-independent techniques, such as metagenomics, shed light on the unknown viral diversity, revealing thousands of new viral nucleotide sequences at an unprecedented scale. However, these novel sequences have not been properly classified and the evolutionary associations between them were not resolved.

RESULTS: Here, we performed phylogenomic analysis of nearly 200,000 viral nucleotide sequences to establish GL-UVAB: Genomic Lineages of Uncultured Viruses of Archaea and Bacteria. The pan-genome content of the identified lineages shed light on some of their infection strategies, potential to modulate host physiology, and mechanisms to escape host resistance systems. Furthermore, using GL-UVAB as a reference database for annotating metagenomes revealed elusive habitat distribution patterns of viral lineages and environmental drivers of community composition.

CONCLUSIONS: These findings provide insights about the genomic diversity and ecology of viruses of prokaryotes. The source code used in these analyses is freely available at https://sourceforge.net/projects/gluvab/.}, } @article {pmid31882191, year = {2020}, author = {Golicz, AA and Bayer, PE and Bhalla, PL and Batley, J and Edwards, D}, title = {Pangenomics Comes of Age: From Bacteria to Plant and Animal Applications.}, journal = {Trends in genetics : TIG}, volume = {36}, number = {2}, pages = {132-145}, doi = {10.1016/j.tig.2019.11.006}, pmid = {31882191}, issn = {0168-9525}, abstract = {The pangenome refers to a collection of genomic sequence found in the entire species or population rather than in a single individual; the sequence can be core, present in all individuals, or accessory (variable or dispensable), found in a subset of individuals only. While pangenomic studies were first undertaken in bacterial species, developments in genome sequencing and assembly approaches have allowed construction of pangenomes for eukaryotic organisms, fungi, plants, and animals, including two large-scale human pangenome projects. Analysis of the these pangenomes revealed key differences, most likely stemming from divergent evolutionary histories, but also surprising similarities.}, } @article {pmid31881843, year = {2019}, author = {Lee, IPA and Andam, CP}, title = {Pan-genome diversification and recombination in Cronobacter sakazakii, an opportunistic pathogen in neonates, and insights to its xerotolerant lifestyle.}, journal = {BMC microbiology}, volume = {19}, number = {1}, pages = {306}, pmid = {31881843}, issn = {1471-2180}, mesh = {Cronobacter sakazakii/*genetics/*pathogenicity ; *Genome, Bacterial ; Genomics ; Humans ; Multigene Family ; Phylogeny ; *Recombination, Genetic ; Species Specificity ; Virulence ; }, abstract = {BACKGROUND: Cronobacter sakazakii is an emerging opportunistic bacterial pathogen known to cause neonatal and pediatric infections, including meningitis, necrotizing enterocolitis, and bacteremia. Multiple disease outbreaks of C. sakazakii have been documented in the past few decades, yet little is known of its genomic diversity, adaptation, and evolution. Here, we analyzed the pan-genome characteristics and phylogenetic relationships of 237 genomes of C. sakazakii and 48 genomes of related Cronobacter species isolated from diverse sources.

RESULTS: The C. sakazakii pan-genome contains 17,158 orthologous gene clusters, and approximately 19.5% of these constitute the core genome. Phylogenetic analyses reveal the presence of at least ten deep branching monophyletic lineages indicative of ancestral diversification. We detected enrichment of functions involved in proton transport and rotational mechanism in accessory genes exclusively found in human-derived strains. In environment-exclusive accessory genes, we detected enrichment for those involved in tryptophan biosynthesis and indole metabolism. However, we did not find significantly enriched gene functions for those genes exclusively found in food strains. The most frequently detected virulence genes are those that encode proteins associated with chemotaxis, enterobactin synthesis, ferrienterobactin transporter, type VI secretion system, galactose metabolism, and mannose metabolism. The genes fos which encodes resistance against fosfomycin, a broad-spectrum cell wall synthesis inhibitor, and mdf(A) which encodes a multidrug efflux transporter were found in nearly all genomes. We found that a total of 2991 genes in the pan-genome have had a history of recombination. Many of the most frequently recombined genes are associated with nutrient acquisition, metabolism and toxin production.

CONCLUSIONS: Overall, our results indicate that the presence of a large accessory gene pool, ability to switch between ecological niches, a diverse suite of antibiotic resistance, virulence and niche-specific genes, and frequent recombination partly explain the remarkable adaptability of C. sakazakii within and outside the human host. These findings provide critical insights that can help define the development of effective disease surveillance and control strategies for Cronobacter-related diseases.}, } @article {pmid31870294, year = {2019}, author = {Wang, Y and Luo, L and Li, Q and Wang, H and Wang, Y and Sun, H and Xu, J and Lan, R and Ye, C}, title = {Genomic dissection of the most prevalent Listeria monocytogenes clone, sequence type ST87, in China.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {1014}, pmid = {31870294}, issn = {1471-2164}, support = {2018ZZKTB07//State Key Laboratory of Infectious Disease Prevention and Control/ ; 2018SKLID801//State Key Laboratory of Infectious Disease Prevention and Control/ ; 31800004//Young Scientists Fund/ ; }, mesh = {China ; Genome, Bacterial/genetics ; *Genomics ; Listeria monocytogenes/*genetics/pathogenicity/virology ; Multigene Family/genetics ; Phylogeny ; Plasmids/genetics ; Polymorphism, Single Nucleotide ; Prophages/physiology ; Virulence/genetics ; Whole Genome Sequencing/*methods ; }, abstract = {BACKGROUND: Listeria monocytogenes consists of four lineages that occupy a wide variety of ecological niches. Sequence type (ST) 87 (serotype 1/2b), belonging to lineage I, is one of the most common STs isolated from food products, food associated environments and sporadic listeriosis in China. Here, we performed a comparative genomic analysis of the L. monocytogenes ST87 clone by sequencing 71 strains representing a diverse range of sources, different geographical locations and isolation years.

RESULTS: The core genome and pan genome of ST87 contained 2667 genes and 3687 genes respectively. Phylogenetic analysis based on core genome SNPs divided the 71 strains into 10 clades. The clinical strains were distributed among multiple clades. Four clades contained strains from multiple geographic regions and showed high genetic diversity. The major gene content variation of ST87 genomes was due to putative prophages, with eleven hotspots of the genome that harbor prophages. All strains carry an intact CRISRP/Cas system. Two major CRISPR spacer profiles were found which were not clustered phylogenetically. A large plasmid of about 90 Kb, which carried heavy metal resistance genes, was found in 32.4% (23/71) of the strains. All ST87 strains harbored the Listeria pathogenicity island (LIPI)-4 and a unique 10-open read frame (ORF) genomic island containing a novel restriction-modification system.

CONCLUSION: Whole genome sequence analysis of L. monocytogenes ST87 enabled a clearer understanding of the population structure and the evolutionary history of ST87 L. monocytogenes in China. The novel genetic elements identified may contribute to its virulence and adaptation to different environmental niches. Our findings will be useful for the development of effective strategies for the prevention and treatment of listeriosis caused by this prevalent clone.}, } @article {pmid31861401, year = {2019}, author = {Albert, K and Rani, A and Sela, DA}, title = {Comparative Pangenomics of the Mammalian Gut Commensal Bifidobacterium longum.}, journal = {Microorganisms}, volume = {8}, number = {1}, pages = {}, pmid = {31861401}, issn = {2076-2607}, abstract = {Bifidobacterium longum colonizes mammalian gastrointestinal tracts where it could metabolize host-indigestible oligosaccharides. Although B. longum strains are currently segregated into three subspecies that reflect common metabolic capacities and genetic similarity, heterogeneity within subspecies suggests that these taxonomic boundaries may not be completely resolved. To address this, the B. longum pangenome was analyzed from representative strains isolated from a diverse set of sources. As a result, the B. longum pangenome is open and contains almost 17,000 genes, with over 85% of genes found in ≤28 of 191 strains. B. longum genomes share a small core gene set of only ~500 genes, or ~3% of the total pangenome. Although the individual B. longum subspecies pangenomes share similar relative abundances of clusters of orthologous groups, strains show inter- and intrasubspecies differences with respect to carbohydrate utilization gene content and growth phenotypes.}, } @article {pmid31848603, year = {2020}, author = {Sitto, F and Battistuzzi, FU}, title = {Estimating Pangenomes with Roary.}, journal = {Molecular biology and evolution}, volume = {37}, number = {3}, pages = {933-939}, pmid = {31848603}, issn = {1537-1719}, support = {R15 GM121981/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacteria/*classification/genetics ; Bacterial Proteins/*genetics ; Computational Biology/*methods ; Phenotype ; Phylogeny ; Software ; Species Specificity ; }, abstract = {A description of the genetic makeup of a species based on a single genome is often insufficient because it ignores the variability in gene repertoire among multiple strains. The estimation of the pangenome of a species is a solution to this issue as it provides an overview of genes that are shared by all strains and genes that are present in only some of the genomes. These different sets of genes can then be analyzed functionally to explore correlations with unique phenotypes and adaptations. This protocol presents the usage of Roary, a Linux-native pangenome application. Roary is a straightforward software that provides 1) an overview about core and accessory genes for those interested in general trends and, also, 2) detailed information on gene presence/absence in each genome for in-depth analyses. Results are provided both in text and graphic format.}, } @article {pmid31847510, year = {2020}, author = {Heo, S and Lee, JS and Lee, JH and Jeong, DW}, title = {Comparative Genomic Analysis of Food-Originated Coagulase-Negative Staphylococcus: Analysis of Conserved Core Genes and Diversity of the Pan-Genome.}, journal = {Journal of microbiology and biotechnology}, volume = {30}, number = {3}, pages = {341-351}, doi = {10.4014/jmb.1910.10049}, pmid = {31847510}, issn = {1738-8872}, mesh = {Food Microbiology ; *Genome, Bacterial ; Genomics ; Phylogeny ; Plasmids ; Staphylococcus/*genetics/isolation & purification ; Transposases/metabolism ; }, abstract = {To shed light on the genetic differences among food-originated coagulase-negative Staphylococcus (CNS), we performed pan-genome analysis of five species: Staphylococcus carnosus (two strains), Staphylococcus equorum (two strains), Staphylococcus succinus (three strains), Staphylococcus xylosus (two strains), and Staphylococcus saprophyticus (one strain). The pan-genome size increases with each new strain and currently holds about 4,500 genes from 10 genomes. Specific genes were shown to be strain dependent but not species dependent. Most specific genes were of unknown function or encoded restriction-modification enzymes, transposases, or prophages. Our results indicate that unique genes have been acquired or lost by convergent evolution within individual strains.}, } @article {pmid31844108, year = {2019}, author = {Liang, CY and Yang, CH and Lai, CH and Huang, YH and Lin, JN}, title = {Comparative Genomics of 86 Whole-Genome Sequences in the Six Species of the Elizabethkingia Genus Reveals Intraspecific and Interspecific Divergence.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {19167}, pmid = {31844108}, issn = {2045-2322}, mesh = {Base Sequence ; Computer Simulation ; Drug Resistance, Bacterial/genetics ; Evolution, Molecular ; Flavobacteriaceae/*genetics/pathogenicity ; *Genetic Variation ; *Genome, Bacterial ; *Genomics ; Phylogeny ; Species Specificity ; Virulence Factors/genetics ; *Whole Genome Sequencing ; }, abstract = {Bacteria of the genus Elizabethkingia are emerging infectious agents that can cause infection in humans. The number of published whole-genome sequences of Elizabethkingia is rapidly increasing. In this study, we used comparative genomics to investigate the genomes of the six species in the Elizabethkingia genus, namely E. meningoseptica, E. anophelis, E. miricola, E. bruuniana, E. ursingii, and E. occulta. In silico DNA-DNA hybridization, whole-genome sequence-based phylogeny, pan genome analysis, and Kyoto Encyclopedia of Genes and Genomes (KEGG) analyses were performed, and clusters of orthologous groups were evaluated. Of the 86 whole-genome sequences available in GenBank, 21 were complete genome sequences and 65 were shotgun sequences. In silico DNA-DNA hybridization clearly delineated the six Elizabethkingia species. Phylogenetic analysis confirmed that E. bruuniana, E. ursingii, and E. occulta were closer to E. miricola than to E. meningoseptica and E. anophelis. A total of 2,609 clusters of orthologous groups were identified among the six type strains of the Elizabethkingia genus. Metabolism-related clusters of orthologous groups accounted for the majority of gene families in KEGG analysis. New genes were identified that substantially increased the total repertoire of the pan genome after the addition of 86 Elizabethkingia genomes, which suggests that Elizabethkingia has shown adaptive evolution to environmental change. This study presents a comparative genomic analysis of Elizabethkingia, and the results of this study provide knowledge that facilitates a better understanding of this microorganism.}, } @article {pmid31842745, year = {2019}, author = {D'Mello, A and Ahearn, CP and Murphy, TF and Tettelin, H}, title = {ReVac: a reverse vaccinology computational pipeline for prioritization of prokaryotic protein vaccine candidates.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {981}, pmid = {31842745}, issn = {1471-2164}, support = {R01 AI019641/AI/NIAID NIH HHS/United States ; UL1 TR001412/TR/NCATS NIH HHS/United States ; UL1TR001412//National Institutes of Health (US)/ ; R01AI019641//National Institutes of Health (US)/ ; }, mesh = {Bacteria/*genetics/immunology ; Bacterial Proteins/genetics/*immunology ; Bacterial Vaccines/genetics/immunology ; Computational Biology/*methods ; Humans ; Machine Learning ; Software ; Vaccines, Subunit/genetics/immunology ; Vaccinology/*methods ; }, abstract = {BACKGROUND: Reverse vaccinology accelerates the discovery of potential vaccine candidates (PVCs) prior to experimental validation. Current programs typically use one bacterial proteome to identify PVCs through a filtering architecture using feature prediction programs or a machine learning approach. Filtering approaches may eliminate potential antigens based on limitations in the accuracy of prediction tools used. Machine learning approaches are heavily dependent on the selection of training datasets with experimentally validated antigens (positive control) and non-protective-antigens (negative control). The use of one or few bacterial proteomes does not assess PVC conservation among strains, an important feature of vaccine antigens.

RESULTS: We present ReVac, which implements both a panoply of feature prediction programs without filtering out proteins, and scoring of candidates based on predictions made on curated positive and negative control PVCs datasets. ReVac surveys several genomes assessing protein conservation, as well as DNA and protein repeats, which may result in variable expression of PVCs. ReVac's orthologous clustering of conserved genes, identifies core and dispensable genome components. This is useful for determining the degree of conservation of PVCs among the population of isolates for a given pathogen. Potential vaccine candidates are then prioritized based on conservation and overall feature-based scoring. We present the application of ReVac, applied to 69 Moraxella catarrhalis and 270 non-typeable Haemophilus influenzae genomes, prioritizing 64 and 29 proteins as PVCs, respectively.

CONCLUSION: ReVac's use of a scoring scheme ranks PVCs for subsequent experimental testing. It employs a redundancy-based approach in its predictions of features using several prediction tools. The protein's features are collated, and each protein is ranked based on the scoring scheme. Multi-genome analyses performed in ReVac allow for a comprehensive overview of PVCs from a pan-genome perspective, as an essential pre-requisite for any bacterial subunit vaccine design. ReVac prioritized PVCs of two human respiratory pathogens, identifying both novel and previously validated PVCs.}, } @article {pmid31840364, year = {2020}, author = {Haro-Moreno, JM and Rodriguez-Valera, F and Rosselli, R and Martinez-Hernandez, F and Roda-Garcia, JJ and Gomez, ML and Fornas, O and Martinez-Garcia, M and López-Pérez, M}, title = {Ecogenomics of the SAR11 clade.}, journal = {Environmental microbiology}, volume = {22}, number = {5}, pages = {1748-1763}, pmid = {31840364}, issn = {1462-2920}, support = {ACIF/2015/332//Generalitat Valenciana/International ; 5334//Gordon and Betty Moore Foundation/International ; BES-2014-067828//Ministerio de Economía y Competitividad/International ; CGL2013-40564-R//Ministerio de Economía y Competitividad/International ; CGL2016-76273-P//Ministerio de Economía y Competitividad/International ; IJCI-2017-34002//Ministerio de Economía y Competitividad/International ; SAF2013-49267-EXP//Ministerio de Economía y Competitividad/International ; }, abstract = {Members of the SAR11 clade, despite their high abundance, are often poorly represented by metagenome-assembled genomes. This fact has hampered our knowledge about their ecology and genetic diversity. Here we examined 175 SAR11 genomes, including 47 new single-amplified genomes. The presence of the first genomes associated with subclade IV suggests that, in the same way as subclade V, they might be outside the proposed Pelagibacterales order. An expanded phylogenomic classification together with patterns of metagenomic recruitment at a global scale have allowed us to define new ecogenomic units of classification (genomospecies), appearing at different, and sometimes restricted, metagenomic data sets. We detected greater microdiversity across the water column at a single location than in samples collected from similar depth across the global ocean, suggesting little influence of biogeography. In addition, pangenome analysis revealed that the flexible genome was essential to shape genomospecies distribution. In one genomospecies preferentially found within the Mediterranean, a set of genes involved in phosphonate utilization was detected. While another, with a more cosmopolitan distribution, was unique in having an aerobic purine degradation pathway. Together, these results provide a glimpse of the enormous genomic diversity within this clade at a finer resolution than the currently defined clades.}, } @article {pmid31838800, year = {2020}, author = {Choi, JY and Kim, SC and Lee, PC}, title = {Comparative Genome Analysis of Psychrobacillus Strain PB01, Isolated from an Iceberg.}, journal = {Journal of microbiology and biotechnology}, volume = {30}, number = {2}, pages = {237-243}, doi = {10.4014/jmb.1909.09008}, pmid = {31838800}, issn = {1738-8872}, abstract = {A novel psychrotolerant Psychrobacillus strain PB01, isolated from an Antarctic iceberg, was comparatively analyzed with five related strains. The complete genome of strain PB01 consists of a single circular chromosome (4.3 Mb) and a plasmid (19 Kb). As potential low-temperature adaptation strategies, strain PB01 has four genes encoding cold-shock proteins, two genes encoding DEAD-box RNA helicases, and eight genes encoding transporters for glycine betaine, which can serve as a cryoprotectant, on the genome. The pan-genome structure of the six Psychrobacillus strains suggests that strain PB01 might have evolved to adapt to extreme environments by changing its genome content to gain higher capacity for DNA repair, translation, and membrane transport. Notably, strain PB01 possesses a complete TCA cycle consisting of eight enzymes as well as three additional Helicobacter pylori-type enzymes: ferredoxin-dependent 2-oxoglutarate synthase, succinyl-CoA/acetoacetyl-CoA transferase, and malate/quinone oxidoreductase. The co-existence of the genes for TCA cycle enzymes has also been identified in the other five Psychrobacillus strains.}, } @article {pmid31824466, year = {2019}, author = {Lee, BH and Cole, S and Badel-Berchoux, S and Guillier, L and Felix, B and Krezdorn, N and Hébraud, M and Bernardi, T and Sultan, I and Piveteau, P}, title = {Biofilm Formation of Listeria monocytogenes Strains Under Food Processing Environments and Pan-Genome-Wide Association Study.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2698}, pmid = {31824466}, issn = {1664-302X}, abstract = {Concerns about food contamination by Listeria monocytogenes are on the rise with increasing consumption of ready-to-eat foods. Biofilm production of L. monocytogenes is presumed to be one of the ways that confer its increased resistance and persistence in the food chain. In this study, a collection of isolates from foods and food processing environments (FPEs) representing persistent, prevalent, and rarely detected genotypes was evaluated for biofilm forming capacities including adhesion and sessile biomass production under diverse environmental conditions. The quantity of sessile biomass varied according to growth conditions, lineage, serotype as well as genotype but association of clonal complex (CC) 26 genotype with biofilm production was evidenced under cold temperature. In general, relative biofilm productivity of each strain varied inconsistently across growth conditions. Under our experimental conditions, there were no clear associations between biofilm formation efficiency and persistent or prevalent genotypes. Distinct extrinsic factors affected specific steps of biofilm formation. Sudden nutrient deprivation enhanced cellular adhesion while a prolonged nutrient deficiency impeded biofilm maturation. Salt addition increased biofilm production, moreover, nutrient limitation supplemented by salt significantly stimulated biofilm formation. Pan-genome-wide association study (Pan-GWAS) assessed genetic composition with regard to biofilm phenotypes for the first time. The number of reported genes differed depending on the growth conditions and the number of common genes was low. However, a broad overview of the ontology contents revealed similar patterns regardless of the conditions. Functional analysis showed that functions related to transformation/competence and surface proteins including Internalins were highly enriched.}, } @article {pmid31815935, year = {2019}, author = {Jandrasits, C and Kröger, S and Haas, W and Renard, BY}, title = {Computational pan-genome mapping and pairwise SNP-distance improve detection of Mycobacterium tuberculosis transmission clusters.}, journal = {PLoS computational biology}, volume = {15}, number = {12}, pages = {e1007527}, pmid = {31815935}, issn = {1553-7358}, mesh = {Chromosome Mapping ; Computational Biology ; Computer Simulation ; DNA, Bacterial/genetics ; Databases, Genetic/statistics & numerical data ; Disease Outbreaks/statistics & numerical data ; Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Humans ; Molecular Epidemiology/statistics & numerical data ; Mycobacterium tuberculosis/classification/*genetics ; Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; Tuberculosis/epidemiology/microbiology/*transmission ; Whole Genome Sequencing ; }, abstract = {Next-generation sequencing based base-by-base distance measures have become an integral complement to epidemiological investigation of infectious disease outbreaks. This study introduces PANPASCO, a computational pan-genome mapping based, pairwise distance method that is highly sensitive to differences between cases, even when located in regions of lineage specific reference genomes. We show that our approach is superior to previously published methods in several datasets and across different Mycobacterium tuberculosis lineages, as its characteristics allow the comparison of a high number of diverse samples in one analysis-a scenario that becomes more and more likely with the increased usage of whole-genome sequencing in transmission surveillance.}, } @article {pmid31804713, year = {2020}, author = {Emery, A and Marpaux, N and Naegelen, C and Valot, B and Morel, P and Hocquet, D}, title = {Genotypic study of Citrobacter koseri, an emergent platelet contaminant since 2012 in France.}, journal = {Transfusion}, volume = {60}, number = {2}, pages = {245-249}, doi = {10.1111/trf.15617}, pmid = {31804713}, issn = {1537-2995}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics/metabolism ; Citrobacter koseri/drug effects/*genetics ; France ; Genotype ; Humans ; Phylogeny ; }, abstract = {BACKGROUND: Transfusion-transmitted bacterial infection is a rare occurrence but the most feared complication in transfusion practices. Between 2012 and 2017, five cases of platelet concentrates (PCs) contaminated with the bacterial pathogen Citrobacter koseri (PC-Ck) have been reported in France, with two leading to the death of the recipients. We tested the possibilities of the emergence of a PC-specific clone of C. koseri (Ck) and of specific bacterial genes associated with PC contamination.

STUDY DESIGN AND METHODS: The phylogenetic network, based on a homemade Ck core genome scheme, inferred from the genomes of 20 worldwide Ck isolates unrelated to PC contamination taken as controls (U-Ck) and the genomes of the five PC-Ck, explored the clonal relationship between the genomes and evaluated the distribution of PC-Ck throughout the species. Along with this core genome multilocus sequence typing approach, a Ck pan genome has been used to seek genes specific to PC-Ck isolates.

RESULTS: Our genomic approach suggested that the population of C. koseri is nonclonal, although it also identified a cluster containing three PC-Ck and eight U-Ck. Indeed, the PC-Ck did not share any specific genes.

CONCLUSION: The elevated incidence of PCs contaminated by C. koseri in France between 2012 and 2017 was not due to the dissemination of a clone. The determinants of the recent outbreaks of PC contamination with C. koseri are still unknown.}, } @article {pmid31803240, year = {2019}, author = {Li, R and Fu, W and Su, R and Tian, X and Du, D and Zhao, Y and Zheng, Z and Chen, Q and Gao, S and Cai, Y and Wang, X and Li, J and Jiang, Y}, title = {Towards the Complete Goat Pan-Genome by Recovering Missing Genomic Segments From the Reference Genome.}, journal = {Frontiers in genetics}, volume = {10}, number = {}, pages = {1169}, pmid = {31803240}, issn = {1664-8021}, abstract = {It is broadly expected that next generation sequencing will ultimately generate a complete genome as is the latest goat reference genome (ARS1), which is considered to be one of the most continuous assemblies in livestock. However, the rich diversity of worldwide goat breeds indicates that a genome from one individual would be insufficient to represent the whole genomic contents of goats. By comparing nine de novo assemblies from seven sibling species of domestic goat with ARS1 and using resequencing and transcriptome data from goats for verification, we identified a total of 38.3 Mb sequences that were absent in ARS1. The pan-sequences contain genic fractions with considerable expression. Using the pan-genome (ARS1 together with the pan-sequences) as a reference genome, variation calling efficacy can be appreciably improved. A total of 56,657 spurious SNPs per individual were repressed and 24,414 novel SNPs per individual on average were recovered as a result of better reads mapping quality. The transcriptomic mapping rate was also increased by ∼1.15%. Our study demonstrated that comparing de novo assemblies from closely related species is an efficient and reliable strategy for finding missing sequences from the reference genome and could be applicable to other species. Pan-genome can serve as an improved reference genome in animals for a better exploration of the underlying genomic variations and could increase the probability of finding genotype-phenotype associations assessed by a comprehensive variation database containing much more differences between individuals. We have constructed a goat pan-genome web interface for data visualization (http://animal.nwsuaf.edu.cn/panGoat).}, } @article {pmid31798566, year = {2019}, author = {Sutton, D and Livingstone, PG and Furness, E and Swain, MT and Whitworth, DE}, title = {Genome-Wide Identification of Myxobacterial Predation Genes and Demonstration of Formaldehyde Secretion as a Potentially Predation-Resistant Trait of Pseudomonas aeruginosa.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2650}, pmid = {31798566}, issn = {1664-302X}, abstract = {Despite widespread use in human biology, genome-wide association studies (GWAS) of bacteria are few and have, to date, focused primarily on pathogens. Myxobacteria are predatory microbes with large patchwork genomes, with individual strains secreting unique cocktails of predatory proteins and metabolites. We investigated whether a GWAS strategy could be applied to myxobacteria to identify genes associated with predation. Deduced proteomes from 29 myxobacterial genomes (including eight Myxococcus genomes sequenced for this study), were clustered into orthologous groups, and the presence/absence of orthologues assessed in superior and inferior predators of ten prey organisms. 139 'predation genes' were identified as being associated significantly with predation, including some whose annotation suggested a testable predatory mechanism. Formaldehyde dismutase (fdm) was associated with superior predation of Pseudomonas aeruginosa, and predatory activity of a strain lacking fdm could be increased by the exogenous addition of a formaldehyde detoxifying enzyme, suggesting that production of formaldehyde by P. aeruginosa acts as an anti-predation behaviour. This study establishes the utility of bacterial GWAS to investigate microbial processes beyond pathogenesis, giving plausible and verifiable associations between gene presence/absence and predatory phenotype. We propose that the slow growth rate of myxobacteria, coupled with their predatory mechanism of constitutive secretion, has rendered them relatively resistant to genome streamlining. The resultant genome expansion made possible their observed accumulation of prey-specific predatory genes, without requiring them to be selected for by frequent or recent predation on diverse prey, potentially explaining both the large pan-genome and broad prey range of myxobacteria.}, } @article {pmid31796569, year = {2019}, author = {Yuan, J and Li, YY and Xu, Y and Sun, BJ and Shao, J and Zhang, D and Li, K and Fan, DD and Xue, ZB and Chen, WH and Pak, C and Lou, YL and Su, JZ and Zheng, MQ}, title = {Molecular Signatures Related to the Virulence of Bacillus cereus Sensu Lato, a Leading Cause of Devastating Endophthalmitis.}, journal = {mSystems}, volume = {4}, number = {6}, pages = {}, pmid = {31796569}, issn = {2379-5077}, abstract = {Bacillus endophthalmitis is a devastating eye infection that causes rapid blindness through extracellular tissue-destructive exotoxins. Despite its importance, knowledge of the phylogenetic relationships and population structure of intraocular Bacillus spp. is lacking. In this study, we sequenced the whole genomes of eight Bacillus intraocular pathogens independently isolated from 8/52 patients with posttraumatic Bacillus endophthalmitis infections in the Eye Hospital of Wenzhou Medical University between January 2010 and December 2018. Phylogenetic analysis revealed that the pathogenic intraocular isolates belonged to Bacillus cereus, Bacillus thuringiensis and Bacillus toyonensis To determine the virulence of the ocular isolates, three representative strains were injected into mouse models, and severe endophthalmitis leading to blindness was observed. Through incorporating publicly available genomes for Bacillus spp., we found that the intraocular pathogens could be isolated independently but displayed a similar genetic context. In addition, our data provide genome-wide support for intraocular and gastrointestinal sources of Bacillus spp. belonging to different lineages. Importantly, we identified five molecular signatures of virulence and motility genes associated with intraocular infection, namely, plcA-2, InhA-3, InhA-4, hblA-5, and fliD using pangenome-wide association studies. The characterization of overrepresented genes in the intraocular isolates holds value to predict bacterial evolution and for the design of future intervention strategies in patients with endophthalmitis.IMPORTANCE In this study, we provided a detailed and comprehensive clinicopathological and pathogenic report of Bacillus endophthalmitis over the 8 years of the study period. We first reported the whole-genome sequence of Bacillus spp. causing devastating endophthalmitis and found that Bacillus toyonensis is able to cause endophthalmitis. Finally, we revealed significant endophthalmitis-associated virulence genes involved in hemolysis, immunity inhibition, and pathogenesis. Overall, as more sequencing data sets become available, these data will facilitate comparative research and will reveal the emergence of pathogenic "ocular bacteria."}, } @article {pmid31787539, year = {2020}, author = {Khan, AW and Garg, V and Roorkiwal, M and Golicz, AA and Edwards, D and Varshney, RK}, title = {Super-Pangenome by Integrating the Wild Side of a Species for Accelerated Crop Improvement.}, journal = {Trends in plant science}, volume = {25}, number = {2}, pages = {148-158}, pmid = {31787539}, issn = {1878-4372}, mesh = {*Genetic Variation ; *Genome, Plant ; Genomics ; }, abstract = {The pangenome provides genomic variations in the cultivated gene pool for a given species. However, as the crop's gene pool comprises many species, especially wild relatives with diverse genetic stock, here we suggest using accessions from all available species of a given genus for the development of a more comprehensive and complete pangenome, which we refer to as a super-pangenome. The super-pangenome provides a complete genomic variation repertoire of a genus and offers unprecedented opportunities for crop improvement. This opinion article focuses on recent developments in crop pangenomics, the need for a super-pangenome that should include wild species, and its application for crop improvement.}, } @article {pmid31785311, year = {2020}, author = {Chaudhry, V and Patil, PB}, title = {Evolutionary insights into adaptation of Staphylococcus haemolyticus to human and non-human niches.}, journal = {Genomics}, volume = {112}, number = {2}, pages = {2052-2062}, doi = {10.1016/j.ygeno.2019.11.018}, pmid = {31785311}, issn = {1089-8646}, abstract = {Staphylococcus haemolyticus is a well-known member of human skin microbiome and an emerging opportunistic human pathogen. Presently, evolutionary studies are limited to human isolates even though it is reported from plants with beneficial properties and in environmental settings. In the present study, we report isolation of novel S. haemolyticus strains from surface sterilized rice seeds and compare their genome to other isolates from diverse niches available in public domain. The study showed expanding nature of pan-genome and revealed set of genes with putative functions related to its adaptability. This is seen by presence of type II lanthipeptide cluster in rice isolates, metal homeostasis genes in an isolate from copper coin and gene encoding methicillin resistance in human isolates. The present study on differential genome dynamics and role of horizontal gene transfers has provided novel insights into capability for ecological diversification of a bacterium of significance to human health.}, } @article {pmid31781066, year = {2019}, author = {Peeters, C and De Canck, E and Cnockaert, M and De Brandt, E and Snauwaert, C and Verheyde, B and Depoorter, E and Spilker, T and LiPuma, JJ and Vandamme, P}, title = {Comparative Genomics of Pandoraea, a Genus Enriched in Xenobiotic Biodegradation and Metabolism.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2556}, pmid = {31781066}, issn = {1664-302X}, abstract = {Comparative analysis of partial gyrB, recA, and gltB gene sequences of 84 Pandoraea reference strains and field isolates revealed several clusters that included no taxonomic reference strains. The gyrB, recA, and gltB phylogenetic trees were used to select 27 strains for whole-genome sequence analysis and for a comparative genomics study that also included 41 publicly available Pandoraea genome sequences. The phylogenomic analyses included a Genome BLAST Distance Phylogeny approach to calculate pairwise digital DNA-DNA hybridization values and their confidence intervals, average nucleotide identity analyses using the OrthoANIu algorithm, and a whole-genome phylogeny reconstruction based on 107 single-copy core genes using bcgTree. These analyses, along with subsequent chemotaxonomic and traditional phenotypic analyses, revealed the presence of 17 novel Pandoraea species among the strains analyzed, and allowed the identification of several unclassified Pandoraea strains reported in the literature. The genus Pandoraea has an open pan genome that includes many orthogroups in the 'Xenobiotics biodegradation and metabolism' KEGG pathway, which likely explains the enrichment of these species in polluted soils and participation in the biodegradation of complex organic substances. We propose to formally classify the 17 novel Pandoraea species as P. anapnoica sp. nov. (type strain LMG 31117T = CCUG 73385T), P. anhela sp. nov. (type strain LMG 31108T = CCUG 73386T), P. aquatica sp. nov. (type strain LMG 31011T = CCUG 73384T), P. bronchicola sp. nov. (type strain LMG 20603T = ATCC BAA-110T), P. capi sp. nov. (type strain LMG 20602T = ATCC BAA-109T), P. captiosa sp. nov. (type strain LMG 31118T = CCUG 73387T), P. cepalis sp. nov. (type strain LMG 31106T = CCUG 39680T), P. commovens sp. nov. (type strain LMG 31010T = CCUG 73378T), P. communis sp. nov. (type strain LMG 31110T = CCUG 73383T), P. eparura sp. nov. (type strain LMG 31012T = CCUG 73380T), P. horticolens sp. nov. (type strain LMG 31112T = CCUG 73379T), P. iniqua sp. nov. (type strain LMG 31009T = CCUG 73377T), P. morbifera sp. nov. (type strain LMG 31116T = CCUG 73389T), P. nosoerga sp. nov. (type strain LMG 31109T = CCUG 73390T), P. pneumonica sp. nov. (type strain LMG 31114T = CCUG 73388T), P. soli sp. nov. (type strain LMG 31014T = CCUG 73382T), and P. terrigena sp. nov. (type strain LMG 31013T = CCUG 73381T).}, } @article {pmid31778355, year = {2019}, author = {Lupolova, N and Lycett, SJ and Gally, DL}, title = {A guide to machine learning for bacterial host attribution using genome sequence data.}, journal = {Microbial genomics}, volume = {5}, number = {12}, pages = {}, pmid = {31778355}, issn = {2057-5858}, support = {BB/P02095X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Animals ; Birds ; Cattle ; Genome, Bacterial ; Genome-Wide Association Study/methods ; *Host Specificity ; Humans ; *Machine Learning ; Salmonella Infections/*microbiology ; Salmonella Infections, Animal/*microbiology ; Salmonella typhimurium/*genetics/isolation & purification/pathogenicity ; Swine ; Whole Genome Sequencing/*methods ; }, abstract = {With the ever-expanding number of available sequences from bacterial genomes, and the expectation that this data type will be the primary one generated from both diagnostic and research laboratories for the foreseeable future, then there is both an opportunity and a need to evaluate how effectively computational approaches can be used within bacterial genomics to predict and understand complex phenotypes, such as pathogenic potential and host source. This article applied various quantitative methods such as diversity indexes, pangenome-wide association studies (GWAS) and dimensionality reduction techniques to better understand the data and then compared how well unsupervised and supervised machine learning (ML) methods could predict the source host of the isolates. The study uses the example of the pangenomes of 1203 Salmonella enterica serovar Typhimurium isolates in order to predict 'host of isolation' using these different methods. The article is aimed as a review of recent applications of ML in infection biology, but also, by working through this specific dataset, it allows discussion of the advantages and drawbacks of the different techniques. As with all such sub-population studies, the biological relevance will be dependent on the quality and diversity of the input data. Given this major caveat, we show that supervised ML has the potential to add real value to interpretation of bacterial genomic data, as it can provide probabilistic outcomes for important phenotypes, something that is very difficult to achieve with the other methods.}, } @article {pmid31776332, year = {2019}, author = {Eggertsson, HP and Kristmundsdottir, S and Beyter, D and Jonsson, H and Skuladottir, A and Hardarson, MT and Gudbjartsson, DF and Stefansson, K and Halldorsson, BV and Melsted, P}, title = {GraphTyper2 enables population-scale genotyping of structural variation using pangenome graphs.}, journal = {Nature communications}, volume = {10}, number = {1}, pages = {5402}, doi = {10.1038/s41467-019-13341-9}, pmid = {31776332}, issn = {2041-1723}, mesh = {Computer Graphics ; Databases, Genetic ; Genetics, Population ; *Genome, Human ; *Genomic Structural Variation ; Genotyping Techniques/*methods/statistics & numerical data ; Humans ; Iceland ; Pedigree ; Polymorphism, Single Nucleotide ; Reproducibility of Results ; *Software ; Workflow ; }, abstract = {Analysis of sequence diversity in the human genome is fundamental for genetic studies. Structural variants (SVs) are frequently omitted in sequence analysis studies, although each has a relatively large impact on the genome. Here, we present GraphTyper2, which uses pangenome graphs to genotype SVs and small variants using short-reads. Comparison to the syndip benchmark dataset shows that our SV genotyping is sensitive and variant segregation in families demonstrates the accuracy of our approach. We demonstrate that incorporating public assembly data into our pipeline greatly improves sensitivity, particularly for large insertions. We validate 6,812 SVs on average per genome using long-read data of 41 Icelanders. We show that GraphTyper2 can simultaneously genotype tens of thousands of whole-genomes by characterizing 60 million small variants and half a million SVs in 49,962 Icelanders, including 80 thousand SVs with high-confidence.}, } @article {pmid31771309, year = {2019}, author = {Chernysheva, N and Bystritskaya, E and Stenkova, A and Golovkin, I and Nedashkovskaya, O and Isaeva, M}, title = {Comparative Genomics and CAZyme Genome Repertoires of Marine Zobellia amurskyensis KMM 3526T and Zobellia laminariae KMM 3676T.}, journal = {Marine drugs}, volume = {17}, number = {12}, pages = {}, pmid = {31771309}, issn = {1660-3397}, support = {17-14-01065//Russian Science Foundation/ ; }, mesh = {Aquatic Organisms/enzymology/*genetics ; Bacterial Proteins/*genetics/metabolism ; Biotechnology/methods ; Carbohydrate Metabolism ; Flavobacteriaceae/enzymology/*genetics ; Genome, Bacterial/*genetics ; *Genomics ; Phylogeny ; Polysaccharides/metabolism ; Seaweed/chemistry/metabolism ; Sequence Analysis, DNA ; }, abstract = {We obtained two novel draft genomes of type Zobellia strains with estimated genome sizes of 5.14 Mb for Z. amurskyensis KMM 3526Т and 5.16 Mb for Z. laminariae KMM 3676Т. Comparative genomic analysis has been carried out between obtained and known genomes of Zobellia representatives. The pan-genome of Zobellia genus is composed of 4853 orthologous clusters and the core genome was estimated at 2963 clusters. The genus CAZome was represented by 775 GHs classified into 62 families, 297 GTs of 16 families, 100 PLs of 13 families, 112 CEs of 13 families, 186 CBMs of 18 families and 42 AAs of six families. A closer inspection of the carbohydrate-active enzyme (CAZyme) genomic repertoires revealed members of new putative subfamilies of GH16 and GH117, which can be biotechnologically promising for production of oligosaccharides and rare monomers with different bioactivities. We analyzed AA3s, among them putative FAD-dependent glycoside oxidoreductases (FAD-GOs) being of particular interest as promising biocatalysts for glycoside deglycosylation in food and pharmaceutical industries.}, } @article {pmid31768302, year = {2019}, author = {Cabrera-Contreras, R and Santamaría, RI and Bustos, P and Martínez-Flores, I and Meléndez-Herrada, E and Morelos-Ramírez, R and Barbosa-Amezcua, M and González-Covarrubias, V and Silva-Herzog, E and Soberón, X and González, V}, title = {Genomic diversity of prevalent Staphylococcus epidermidis multidrug-resistant strains isolated from a Children's Hospital in México City in an eight-years survey.}, journal = {PeerJ}, volume = {7}, number = {}, pages = {e8068}, pmid = {31768302}, issn = {2167-8359}, abstract = {Staphylococcus epidermidis is a human commensal and pathogen worldwide distributed. In this work, we surveyed for multi-resistant S. epidermidis strains in eight years at a children's health-care unit in México City. Multidrug-resistant S. epidermidis were present in all years of the study, including resistance to methicillin, beta-lactams, fluoroquinolones, and macrolides. To understand the genetic basis of antibiotic resistance and its association with virulence and gene exchange, we sequenced the genomes of 17 S. epidermidis isolates. Whole-genome nucleotide identities between all the pairs of S. epidermidis strains were about 97% to 99%. We inferred a clonal structure and eight Multilocus Sequence Types (MLSTs) in the S. epidermidis sequenced collection. The profile of virulence includes genes involved in biofilm formation and phenol-soluble modulins (PSMs). Half of the S. epidermidis analyzed lacked the ica operon for biofilm formation. Likely, they are commensal S. epidermidis strains but multi-antibiotic resistant. Uneven distribution of insertion sequences, phages, and CRISPR-Cas immunity phage systems suggest frequent horizontal gene transfer. Rates of recombination between S. epidermidis strains were more prevalent than the mutation rate and affected the whole genome. Therefore, the multidrug resistance, independently of the pathogenic traits, might explain the persistence of specific highly adapted S. epidermidis clonal lineages in nosocomial settings.}, } @article {pmid31762508, year = {2019}, author = {Sujitha, S and Vishnu, US and Karthikeyan, R and Sankarasubramanian, J and Gunasekaran, P and Rajendhran, J}, title = {Genome Investigation of a Cariogenic Pathogen with Implications in Cardiovascular Diseases.}, journal = {Indian journal of microbiology}, volume = {59}, number = {4}, pages = {451-459}, pmid = {31762508}, issn = {0046-8991}, abstract = {The proportion of people suffering from cardiovascular diseases has risen by 34% in the last 15 years in India. Cardiomyopathy is among the many forms of CVD s present. Infection of heart muscles is the suspected etiological agent for the same. Oral pathogens gaining entry into the bloodstream are responsible for such infections. Streptococcus mutans is an oral pathogen with implications in cardiovascular diseases. Previous studies have shown certain strains of S. mutans are found predominantly within atherosclerotic plaques and extirpated valves. To decipher the genetic differences responsible for endothelial cell invasion, we have sequenced the genome of Streptococcus mutans B14. Pan-genome analysis, search for adhesion proteins through a special algorithm, and protein-protein interactions search through HPIDB have been done. Pan-genome analysis of 187 whole genomes, assemblies revealed 6965 genes in total and 918 genes forming the core gene cluster. Adhesion to the endothelial cell is a critical virulence factor distinguishing virulent and non-virulent strains. Overall, 4% of the total proteins in S. mutans B14 were categorized as adhesion proteins. Protein-protein interaction between putative adhesion proteins and Human extracellular matrix components was predicted, revealing novel interactions. A conserved gene catalyzing the synthesis of branched-chain amino acids in S. mutans B14 shows possible interaction with isoforms of cathepsin protein of the ECM. This genome sequence analysis indicates towards other proteins in the S. mutans genome, which might have a specific role to play in host cell interaction.}, } @article {pmid31758048, year = {2019}, author = {Decano, AG and Downing, T}, title = {An Escherichia coli ST131 pangenome atlas reveals population structure and evolution across 4,071 isolates.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {17394}, doi = {10.1038/s41598-019-54004-5}, pmid = {31758048}, issn = {2045-2322}, mesh = {Drug Resistance, Multiple, Bacterial/genetics ; Epidemics ; Escherichia coli/classification/*genetics/isolation & purification/pathogenicity ; Escherichia coli Infections/epidemiology/*microbiology ; Escherichia coli Proteins/genetics ; *Evolution, Molecular ; Genome, Bacterial/*genetics ; Genomics ; Genotype ; Geography ; Humans ; Molecular Epidemiology ; Phylogeny ; Plasmids/genetics ; Sequence Analysis, DNA ; beta-Lactamases/genetics ; }, abstract = {Escherichia coli ST131 is a major cause of infection with extensive antimicrobial resistance (AMR) facilitated by widespread beta-lactam antibiotic use. This drug pressure has driven extended-spectrum beta-lactamase (ESBL) gene acquisition and evolution in pathogens, so a clearer resolution of ST131's origin, adaptation and spread is essential. E. coli ST131's ESBL genes are typically embedded in mobile genetic elements (MGEs) that aid transfer to new plasmid or chromosomal locations, which are mobilised further by plasmid conjugation and recombination, resulting in a flexible ESBL, MGE and plasmid composition with a conserved core genome. We used population genomics to trace the evolution of AMR in ST131 more precisely by extracting all available high-quality Illumina HiSeq read libraries to investigate 4,071 globally-sourced genomes, the largest ST131 collection examined so far. We applied rigorous quality-control, genome de novo assembly and ESBL gene screening to resolve ST131's population structure across three genetically distinct Clades (A, B, C) and abundant subclades from the dominant Clade C. We reconstructed their evolutionary relationships across the core and accessory genomes using published reference genomes, long read assemblies and k-mer-based methods to contextualise pangenome diversity. The three main C subclades have co-circulated globally at relatively stable frequencies over time, suggesting attaining an equilibrium after their origin and initial rapid spread. This contrasted with their ESBL genes, which had stronger patterns across time, geography and subclade, and were located at distinct locations across the chromosomes and plasmids between isolates. Within the three C subclades, the core and accessory genome diversity levels were not correlated due to plasmid and MGE activity, unlike patterns between the three main clades, A, B and C. This population genomic study highlights the dynamic nature of the accessory genomes in ST131, suggesting that surveillance should anticipate genetically variable outbreaks with broader antibiotic resistance levels. Our findings emphasise the potential of evolutionary pangenomics to improve our understanding of AMR gene transfer, adaptation and transmission to discover accessory genome changes linked to novel subtypes.}, } @article {pmid31749118, year = {2020}, author = {de Fátima Rauber Würfel, S and Jorge, S and de Oliveira, NR and Kremer, FS and Sanchez, CD and Campos, VF and da Silva Pinto, L and da Silva, WP and Dellagostin, OA}, title = {Campylobacter jejuni isolated from poultry meat in Brazil: in silico analysis and genomic features of two strains with different phenotypes of antimicrobial susceptibility.}, journal = {Molecular biology reports}, volume = {47}, number = {1}, pages = {671-681}, doi = {10.1007/s11033-019-05174-y}, pmid = {31749118}, issn = {1573-4978}, support = {Finance Code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 0//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 0//Fundação de Amparo à Pesquisa do Estado do Rio Grande do Sul/ ; }, mesh = {Animals ; Anti-Bacterial Agents/*pharmacology ; Brazil ; *Campylobacter jejuni/drug effects/genetics/isolation & purification/pathogenicity ; Drug Resistance, Multiple, Bacterial/*genetics ; Genome, Bacterial/genetics ; Genomics ; Meat/*microbiology ; Multilocus Sequence Typing ; Plasmids/genetics ; Poultry ; Virulence Factors/genetics ; }, abstract = {Campylobacter jejuni is the most common bacterial cause of foodborne diarrheal disease worldwide and is among the antimicrobial resistant "priority pathogens" that pose greatest threat to public health. The genomes of two C. jejuni isolated from poultry meat sold on the retail market in Southern Brazil phenotypically characterized as multidrug-resistant (CJ100) and susceptible (CJ104) were sequenced and analyzed by bioinformatic tools. The isolates CJ100 and CJ104 showed distinct multilocus sequence types (MLST). Comparative genomic analysis revealed a large number of single nucleotide polymorphisms, rearrangements, and inversions in both genomes, in addition to virulence factors, genomic islands, prophage sequences, and insertion sequences. A circular 103-kilobase megaplasmid carrying virulence factors was identified in the genome of CJ100, in addition to resistance mechanisms to aminoglycosides, beta-lactams, macrolides, quinolones, and tetracyclines. The molecular characterization of distinct phenotypes of foodborne C. jejuni and the discovery of a novel virulence megaplasmid provide useful data for pan-genome and large-scale studies to monitor the virulent C. jejuni in poultry meat is warranted.}, } @article {pmid31745243, year = {2019}, author = {Chapeton-Montes, D and Plourde, L and Bouchier, C and Ma, L and Diancourt, L and Criscuolo, A and Popoff, MR and Brüggemann, H}, title = {Author Correction: The population structure of Clostridium tetani deduced from its pan-genome.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {17409}, doi = {10.1038/s41598-019-53688-z}, pmid = {31745243}, issn = {2045-2322}, abstract = {An amendment to this paper has been published and can be accessed via a link at the top of the paper.}, } @article {pmid31740752, year = {2020}, author = {Lawson, MAE and O'Neill, IJ and Kujawska, M and Gowrinadh Javvadi, S and Wijeyesekera, A and Flegg, Z and Chalklen, L and Hall, LJ}, title = {Breast milk-derived human milk oligosaccharides promote Bifidobacterium interactions within a single ecosystem.}, journal = {The ISME journal}, volume = {14}, number = {2}, pages = {635-648}, doi = {10.1038/s41396-019-0553-2}, pmid = {31740752}, issn = {1751-7370}, support = {100974/WT_/Wellcome Trust/United Kingdom ; BB/J004529/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/00044409/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012490/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 100/974/C/13/Z/WT_/Wellcome Trust/United Kingdom ; BBS/E/F/000PR10356/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/F/000PR10353/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/M011216/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bifidobacterium/genetics/isolation & purification/physiology ; Breast Feeding ; Carbohydrate Metabolism/*genetics ; Ecosystem ; Female ; Genes, Bacterial ; Genetic Variation ; Genome, Bacterial ; Humans ; Infant ; Metagenome/genetics/physiology ; Microbial Interactions ; Microbiota ; *Milk, Human/chemistry ; Oligosaccharides/*genetics/metabolism ; }, abstract = {Diet-microbe interactions play an important role in modulating the early-life microbiota, with Bifidobacterium strains and species dominating the gut of breast-fed infants. Here, we sought to explore how infant diet drives distinct bifidobacterial community composition and dynamics within individual infant ecosystems. Genomic characterisation of 19 strains isolated from breast-fed infants revealed a diverse genomic architecture enriched in carbohydrate metabolism genes, which was distinct to each strain, but collectively formed a pangenome across infants. Presence of gene clusters implicated in digestion of human milk oligosaccharides (HMOs) varied between species, with growth studies indicating that within single infants there were differences in the ability to utilise 2'FL and LNnT HMOs between strains. Cross-feeding experiments were performed with HMO degraders and non-HMO users (using spent or 'conditioned' media and direct co-culture). Further 1H-NMR analysis identified fucose, galactose, acetate, and N-acetylglucosamine as key by-products of HMO metabolism; as demonstrated by modest growth of non-HMO users on spend media from HMO metabolism. These experiments indicate how HMO metabolism permits the sharing of resources to maximise nutrient consumption from the diet and highlights the cooperative nature of bifidobacterial strains and their role as 'foundation' species in the infant ecosystem. The intra- and inter-infant bifidobacterial community behaviour may contribute to the diversity and dominance of Bifidobacterium in early life and suggests avenues for future development of new diet and microbiota-based therapies to promote infant health.}, } @article {pmid31738764, year = {2019}, author = {Robertson, J and Lin, J and Wren-Hedgus, A and Arya, G and Carrillo, C and Nash, JHE}, title = {Development of a multi-locus typing scheme for an Enterobacteriaceae linear plasmid that mediates inter-species transfer of flagella.}, journal = {PloS one}, volume = {14}, number = {11}, pages = {e0218638}, pmid = {31738764}, issn = {1932-6203}, mesh = {Enterobacteriaceae/classification/*genetics ; Flagella/*genetics ; Gene Transfer, Horizontal ; Genes, Bacterial ; Humans ; Multilocus Sequence Typing/*methods ; Phylogeny ; Plasmids/classification/genetics ; Salmonella typhi/classification/genetics ; Serogroup ; Species Specificity ; }, abstract = {Due to the public health importance of flagellar genes for typing, it is important to understand mechanisms that could alter their expression or presence. Phenotypic novelty in flagellar genes arise predominately through accumulation of mutations but horizontal transfer is known to occur. A linear plasmid termed pBSSB1 previously identified in Salmonella Typhi, was found to encode a flagellar operon that can mediate phase variation, which results in the rare z66 flagella phenotype. The identification and tracking of homologs of pBSSB1 is limited because it falls outside the normal replicon typing schemes for plasmids. Here we report the generation of nine new pBSSB1-family sequences using Illumina and Nanopore sequence data. Homologs of pBSSB1 were identified in 154 genomes representing 25 distinct serotypes from 67,758 Salmonella public genomes. Pangenome analysis of pBSSB1-family contigs was performed using roary and we identified three core genes amenable to a minimal pMLST scheme. Population structure analysis based on the newly developed pMLST scheme identified three major lineages representing 35 sequence types, and the distribution of these sequence types was found to span multiple serovars across the globe. This in silico pMLST scheme has shown utility in tracking and subtyping pBSSB1-family plasmids and it has been incorporated into the plasmid MLST database under the name "pBSSB1-family".}, } @article {pmid31736915, year = {2019}, author = {Suresh, G and Lodha, TD and Indu, B and Sasikala, C and Ramana, CV}, title = {Taxogenomics Resolves Conflict in the Genus Rhodobacter: A Two and Half Decades Pending Thought to Reclassify the Genus Rhodobacter.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2480}, pmid = {31736915}, issn = {1664-302X}, abstract = {The genus Rhodobacter is taxonomically well studied, and some members are model organisms. However, this genus is comprised of a heterogeneous group of members. 16S rRNA gene-based phylogeny of the genus Rhodobacter indicates a motley assemblage of anoxygenic phototrophic bacteria (genus Rhodobacter) with interspersing members of other genera (chemotrophs) making the genus polyphyletic. Taxogenomics was performed to resolve the taxonomic conflicts of the genus Rhodobacter using twelve type strains. The phylogenomic analysis showed that Rhodobacter spp. can be grouped into four monophyletic clusters with interspersing chemotrophs. Genomic indices (ANI and dDDH) confirmed that all the current species are well defined, except Rhodobacter megalophilus. The average amino acid identity values between the monophyletic clusters of Rhodobacter members, as well as with the chemotrophic genera, are less than 80% whereas the percentage of conserved proteins values were below 70%, which has been observed among several genera related to Rhodobacter. The pan-genome analysis has shown that there are only 1239 core genes shared between the 12 species of the genus Rhodobacter. The polyphasic taxonomic analysis supports the phylogenomic and genomic studies in distinguishing the four Rhodobacter clusters. Each cluster is comprised of one to seven species according to the current Rhodobacter taxonomy. Therefore, to address this taxonomic discrepancy we propose to reclassify the members of the genus Rhodobacter into three new genera, Luteovulum gen. nov., Phaeovulum gen. nov. and Fuscovulum gen. nov., and provide an emended description of the genus Rhodobacter sensu stricto. Also, we propose reclassification of Rhodobacter megalophilus as a sub-species of Rhodobacter sphaeroides.}, } @article {pmid31731444, year = {2019}, author = {Ghosh, S and Sarangi, AN and Mukherjee, M and Bhowmick, S and Tripathy, S}, title = {Reanalysis of Lactobacillus paracasei Lbs2 Strain and Large-Scale Comparative Genomics Places Many Strains into Their Correct Taxonomic Position.}, journal = {Microorganisms}, volume = {7}, number = {11}, pages = {}, pmid = {31731444}, issn = {2076-2607}, abstract = {Lactobacillus paracasei are diverse Gram-positive bacteria that are very closely related to Lactobacillus casei, belonging to the Lactobacillus casei group. Due to extreme genome similarities between L. casei and L. paracasei, many strains have been cross placed in the other group. We had earlier sequenced and analyzed the genome of Lactobacillus paracasei Lbs2, but mistakenly identified it as L. casei. We re-analyzed Lbs2 reads into a 2.5 MB genome that is 91.28% complete with 0.8% contamination, which is now suitably placed under L. paracasei based on Average Nucleotide Identity and Average Amino Acid Identity. We took 74 sequenced genomes of L. paracasei from GenBank with assembly sizes ranging from 2.3 to 3.3 MB and genome completeness between 88% and 100% for comparison. The pan-genome of 75 L. paracasei strains hold 15,945 gene families (21,5232 genes), while the core genome contained about 8.4% of the total genes (243 gene families with 18,225 genes) of pan-genome. Phylogenomic analysis based on core gene families revealed that the Lbs2 strain has a closer relationship with L. paracasei subsp. tolerans DSM20258. Finally, the in-silico analysis of the L. paracasei Lbs2 genome revealed an important pathway that could underpin the production of thiamin, which may contribute to the host energy metabolism.}, } @article {pmid31728978, year = {2020}, author = {Seribelli, AA and Gonzales, JC and de Almeida, F and Benevides, L and Cazentini Medeiros, MI and Dos Prazeres Rodrigues, D and de C Soares, S and Allard, MW and Falcão, JP}, title = {Phylogenetic analysis revealed that Salmonella Typhimurium ST313 isolated from humans and food in Brazil presented a high genomic similarity.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {51}, number = {1}, pages = {53-64}, pmid = {31728978}, issn = {1678-4405}, support = {2017/06633-6//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; 2016/24716-3//Fundação de Amparo à Pesquisa do Estado de São Paulo/ ; }, mesh = {Bacterial Typing Techniques ; Brazil ; Feces/microbiology ; *Food Microbiology ; *Genome, Bacterial ; Genomics ; Genotype ; Humans ; Multilocus Sequence Typing ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Salmonella Infections/microbiology ; Salmonella typhimurium/*classification ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Salmonella Typhimurium sequence type 313 (S. Typhimurium ST313) has caused invasive disease mainly in sub-Saharan Africa. In Brazil, ST313 strains have been recently described, and there is a lack of studies that assessed by whole genome sequencing (WGS)-the relationship of these strains. The aims of this work were to study the phylogenetic relationship of 70 S. Typhimurium genomes comparing strains of ST313 (n = 9) isolated from humans and food in Brazil among themselves, with other STs isolated in this country (n = 31) and in other parts of the globe (n = 30) by 16S rRNA sequences, the Gegenees software, whole genome multilocus sequence typing (wgMLST), and average nucleotide identity (ANI) for the genomes of ST313. Additionally, pangenome analysis was performed to verify the heterogeneity of these genomes. The phylogenetic analyses showed that the ST313 genomes were very similar among themselves. However, the ST313 genomes were usually clustered more distantly to other STs of strains isolated in Brazil and in other parts of the world. By pangenome calculation, the core genome was 2,880 CDSs and 4,171 CDSs singletons for all the 70 S. Typhimurium genomes studied. Considering the 10 ST313 genomes analyzed the core genome was 4,112 CDSs and 76 CDSs singletons. In conclusion, the ST313 genomes from Brazil showed a high similarity among them which information might eventually help in the development of vaccines and antibiotics. The pangenome analysis showed that the S. Typhimurium genomes studied presented an open pangenome, but specifically tending to become close for the ST313 strains.}, } @article {pmid31719113, year = {2020}, author = {Chhotaray, C and Wang, S and Tan, Y and Ali, A and Shehroz, M and Fang, C and Liu, Y and Lu, Z and Cai, X and Hameed, HMA and Islam, MM and Surineni, G and Tan, S and Liu, J and Zhang, T}, title = {Comparative Analysis of Whole-Genome and Methylome Profiles of a Smooth and a Rough Mycobacterium abscessus Clinical Strain.}, journal = {G3 (Bethesda, Md.)}, volume = {10}, number = {1}, pages = {13-22}, pmid = {31719113}, issn = {2160-1836}, mesh = {DNA Methylation ; *Epigenome ; *Genome, Bacterial ; Mycobacterium abscessus/*genetics ; *Phenotype ; Polymorphism, Genetic ; }, abstract = {Mycobacterium abscessus is a fast growing Mycobacterium species mainly causing skin and respiratory infections in human. M. abscessus is resistant to numerous drugs, which is a major challenge for the treatment. In this study, we have sequenced the genomes of two clinical M. abscessus strains having rough and smooth morphology, using the single molecule real-time and Illumina HiSeq sequencing technology. In addition, we reported the first comparative methylome profiles of a rough and a smooth M. abscessus clinical strains. The number of N4-methylcytosine (4mC) and N6-methyladenine (6mA) modified bases obtained from smooth phenotype were two-fold and 1.6 fold respectively higher than that of rough phenotype. We have also identified 4 distinct novel motifs in two clinical strains and genes encoding antibiotic-modifying/targeting enzymes and genes associated with intracellular survivability having different methylation patterns. To our knowledge, this is the first report about genome-wide methylation profiles of M. abscessus strains and identification of a natural linear plasmid (15 kb) in this critical pathogen harboring methylated bases. The pan-genome analysis of 25 M. abscessus strains including two clinical strains revealed an open pan genome comprises of 7596 gene clusters. Likewise, structural variation analysis revealed that the genome of rough phenotype strain contains more insertions and deletions than the smooth phenotype and that of the reference strain. A total of 391 single nucleotide variations responsible for the non-synonymous mutations were detected in clinical strains compared to the reference genome. The comparative genomic analysis elucidates the genome plasticity in this emerging pathogen. Furthermore, the detection of genome-wide methylation profiles of M. abscessus clinical strains may provide insight into the significant role of DNA methylation in pathogenicity and drug resistance in this opportunistic pathogen.}, } @article {pmid31703875, year = {2020}, author = {Kim, KH and Chun, BH and Baek, JH and Roh, SW and Lee, SH and Jeon, CO}, title = {Genomic and metabolic features of Lactobacillus sakei as revealed by its pan-genome and the metatranscriptome of kimchi fermentation.}, journal = {Food microbiology}, volume = {86}, number = {}, pages = {103341}, doi = {10.1016/j.fm.2019.103341}, pmid = {31703875}, issn = {1095-9998}, mesh = {Biogenic Amines/metabolism ; Brassica/*microbiology ; Fermentation ; Fermented Foods and Beverages/microbiology ; Food Microbiology ; Gene Expression Profiling ; *Genome, Bacterial ; Genomics ; Glucuronic Acid/metabolism ; Lactic Acid/metabolism ; Lactobacillus sakei/*genetics/isolation & purification/*metabolism ; Metabolic Networks and Pathways ; Vegetables/*microbiology ; }, abstract = {The genomic and metabolic features of Lactobacillus sakei were investigated using its pan-genome and by analyzing the metatranscriptome of kimchi fermentation. In the genome-based relatedness analysis, the strains were divided into the Lb. sakei ssp. sakei and Lb. sakei ssp. carnosus lineage groups. Genomic and metabolic pathway analysis revealed that all Lb. sakei strains have the capability of producing d/l-lactate, ethanol, acetate, CO2, formate, l-malate, diacetyl, acetoin, and 2,3-butanediol from d-glucose, d-fructose, d-galactose, sucrose, d-lactose, l-arabinose, cellobiose, d-mannose, d-gluconate, and d-ribose through homolactic and heterolactic fermentation, whereas their capability of d-maltose, d-xylose, l-xylulose, d-galacturonate, and d-glucuronate metabolism is strain-specific. All strains carry genes for the biosynthesis of folate and thiamine, whereas genes for biogenic amine and toxin production, hemolysis, and antibiotic resistance were not identified. The metatranscriptomic analysis showed that the expression of Lb. sakei transcripts involved in carbohydrate metabolism increased as kimchi fermentation progressed, suggesting that Lb. sakei is more competitive during late fermentation stage. Homolactic fermentation pathway was highly expressed and generally constant during kimchi fermentation, whereas expression of heterolactic fermentation pathway increased gradually as fermentation progressed. l-Lactate dehydrogenase was more highly expressed than d-lactate dehydrogenase, suggesting that l-lactate is the major lactate metabolized by Lb. sakei.}, } @article {pmid31695182, year = {2020}, author = {Bernheim, A and Sorek, R}, title = {The pan-immune system of bacteria: antiviral defence as a community resource.}, journal = {Nature reviews. Microbiology}, volume = {18}, number = {2}, pages = {113-119}, pmid = {31695182}, issn = {1740-1534}, mesh = {Bacteria/immunology/*virology ; Bacteriophages/*physiology ; }, abstract = {Viruses and their hosts are engaged in a constant arms race leading to the evolution of antiviral defence mechanisms. Recent studies have revealed that the immune arsenal of bacteria against bacteriophages is much more diverse than previously envisioned. These discoveries have led to seemingly contradictory observations: on one hand, individual microorganisms often encode multiple distinct defence systems, some of which are acquired by horizontal gene transfer, alluding to their fitness benefit. On the other hand, defence systems are frequently lost from prokaryotic genomes on short evolutionary time scales, suggesting that they impose a fitness cost. In this Perspective article, we present the 'pan-immune system' model in which we suggest that, although a single strain cannot carry all possible defence systems owing to their burden on fitness, it can employ horizontal gene transfer to access immune defence mechanisms encoded by closely related strains. Thus, the 'effective' immune system is not the one encoded by the genome of a single microorganism but rather by its pan-genome, comprising the sum of all immune systems available for a microorganism to horizontally acquire and use.}, } @article {pmid31694533, year = {2019}, author = {Vila Nova, M and Durimel, K and La, K and Felten, A and Bessières, P and Mistou, MY and Mariadassou, M and Radomski, N}, title = {Genetic and metabolic signatures of Salmonella enterica subsp. enterica associated with animal sources at the pangenomic scale.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {814}, pmid = {31694533}, issn = {1471-2164}, support = {Typautobac//Agence Nationale de Sécurité Sanitaire de l'Alimentation, de l'Environnement et du Travail/ ; Typautobac//Institut National de la Recherche Agronomique/ ; 643476//COMPARE/ ; }, mesh = {Animals ; Genome-Wide Association Study ; *Genomics ; Mutation ; Phylogeny ; Salmonella enterica/*genetics/*metabolism ; }, abstract = {BACKGROUND: Salmonella enterica subsp. enterica is a public health issue related to food safety, and its adaptation to animal sources remains poorly described at the pangenome scale. Firstly, serovars presenting potential mono- and multi-animal sources were selected from a curated and synthetized subset of Enterobase. The corresponding sequencing reads were downloaded from the European Nucleotide Archive (ENA) providing a balanced dataset of 440 Salmonella genomes in terms of serovars and sources (i). Secondly, the coregenome variants and accessory genes were detected (ii). Thirdly, single nucleotide polymorphisms and small insertions/deletions from the coregenome, as well as the accessory genes were associated to animal sources based on a microbial Genome Wide Association Study (GWAS) integrating an advanced correction of the population structure (iii). Lastly, a Gene Ontology Enrichment Analysis (GOEA) was applied to emphasize metabolic pathways mainly impacted by the pangenomic mutations associated to animal sources (iv).

RESULTS: Based on a genome dataset including Salmonella serovars from mono- and multi-animal sources (i), 19,130 accessory genes and 178,351 coregenome variants were identified (ii). Among these pangenomic mutations, 52 genomic signatures (iii) and 9 over-enriched metabolic signatures (iv) were associated to avian, bovine, swine and fish sources by GWAS and GOEA, respectively.

CONCLUSIONS: Our results suggest that the genetic and metabolic determinants of Salmonella adaptation to animal sources may have been driven by the natural feeding environment of the animal, distinct livestock diets modified by human, environmental stimuli, physiological properties of the animal itself, and work habits for health protection of livestock.}, } @article {pmid31666129, year = {2019}, author = {Aguirre de Cárcer, D}, title = {A conceptual framework for the phylogenetically constrained assembly of microbial communities.}, journal = {Microbiome}, volume = {7}, number = {1}, pages = {142}, pmid = {31666129}, issn = {2049-2618}, mesh = {Ecosystem ; *Microbial Interactions ; *Microbiota ; *Phylogeny ; }, abstract = {Microbial communities play essential and preponderant roles in all ecosystems. Understanding the rules that govern microbial community assembly will have a major impact on our ability to manage microbial ecosystems, positively impacting, for instance, human health and agriculture. Here, I present a phylogenetically constrained community assembly principle grounded on the well-supported facts that deterministic processes have a significant impact on microbial community assembly, that microbial communities show significant phylogenetic signal, and that microbial traits and ecological coherence are, to some extent, phylogenetically conserved. From these facts, I derive a few predictions which form the basis of the framework. Chief among them is the existence, within most microbial ecosystems, of phylogenetic core groups (PCGs), defined as discrete portions of the phylogeny of varying depth present in all instances of the given ecosystem, and related to specific niches whose occupancy requires a specific phylogenetically conserved set of traits. The predictions are supported by the recent literature, as well as by dedicated analyses. Integrating the effect of ecosystem patchiness, microbial social interactions, and scale sampling pitfalls takes us to a comprehensive community assembly model that recapitulates the characteristics most commonly observed in microbial communities. PCGs' identification is relatively straightforward using high-throughput 16S amplicon sequencing, and subsequent bioinformatic analysis of their phylogeny, estimated core pan-genome, and intra-group co-occurrence should provide valuable information on their ecophysiology and niche characteristics. Such a priori information for a significant portion of the community could be used to prime complementing analyses, boosting their usefulness. Thus, the use of the proposed framework could represent a leap forward in our understanding of microbial community assembly and function.}, } @article {pmid31661016, year = {2019}, author = {Alonge, M and Soyk, S and Ramakrishnan, S and Wang, X and Goodwin, S and Sedlazeck, FJ and Lippman, ZB and Schatz, MC}, title = {RaGOO: fast and accurate reference-guided scaffolding of draft genomes.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {224}, pmid = {31661016}, issn = {1474-760X}, support = {R01-HG006677/NH/NIH HHS/United States ; UM1 HG008898/NH/NIH HHS/United States ; }, mesh = {Arabidopsis/genetics ; Genome, Plant ; Genomic Structural Variation ; Genomics/*methods ; Lycopersicon esculentum/genetics ; *Software ; }, abstract = {We present RaGOO, a reference-guided contig ordering and orienting tool that leverages the speed and sensitivity of Minimap2 to accurately achieve chromosome-scale assemblies in minutes. After the pseudomolecules are constructed, RaGOO identifies structural variants, including those spanning sequencing gaps. We show that RaGOO accurately orders and orients 3 de novo tomato genome assemblies, including the widely used M82 reference cultivar. We then demonstrate the scalability and utility of RaGOO with a pan-genome analysis of 103 Arabidopsis thaliana accessions by examining the structural variants detected in the newly assembled pseudomolecules. RaGOO is available open source at https://github.com/malonge/RaGOO .}, } @article {pmid31659686, year = {2019}, author = {Oh, YJ and Kim, JY and Park, HK and Jang, JY and Lim, SK and Kwon, MS and Choi, HJ}, title = {Salicibibacter halophilus sp. nov., a moderately halophilic bacterium isolated from kimchi.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {57}, number = {11}, pages = {997-1002}, pmid = {31659686}, issn = {1976-3794}, mesh = {Bacillaceae/*classification/genetics/*isolation & purification/physiology ; Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; Diaminopimelic Acid/metabolism ; Fatty Acids/chemistry ; Fermented Foods and Beverages/*microbiology ; Genes, Bacterial/genetics ; Genomics ; Halobacteriales ; Hydrogen-Ion Concentration ; Peptidoglycan/chemistry ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Republic of Korea ; Salt Tolerance ; Sequence Analysis, DNA ; Sodium Chloride/metabolism ; Vitamin K 2/analogs & derivatives/chemistry ; Whole Genome Sequencing ; }, abstract = {A Gram-stain-positive, rod-shaped, alkalitolerant, and halophilic bacterium-designated as strain NKC3-5T-was isolated from kimchi that was collected from the Geumsan area in the Republic of Korea. Cells of isolated strain NKC3-5T were 0.5-0.7 μm wide and 1.4-2.8 μm long. The strain NKC3-5T could grow at up to 20.0% (w/v) NaCl (optimum 10%), pH 6.5-10.0 (optimum pH 9.0), and 25-40°C (optimum 35°C). The cells were able to reduce nitrate under aerobic conditions, which is the first report in the genus Salicibibacter. The genome size and genomic G + C content of strain NKC3-5T were 3,754,174 bp and 45.9 mol%, respectively; it contained 3,630 coding sequences, 16S rRNA genes (six 16S, five 5S, and five 23S), and 59 tRNA genes. Phylogenetic analysis based on 16S rRNA showed that strain NKC3-5T clustered with bacterium Salicibibacter kimchii NKC1-1T, with a similarity of 96.2-97.6%, but formed a distinct branch with other published species of the family Bacillaceae. In addition, OrthoANI value between strain NKC3-5T and Salicibibacter kimchii NKC1-1T was far lower than the species demarcation threshold. Using functional genome annotation, the result found that carbohydrate, amino acid, and vitamin metabolism related genes were highly distributed in the genome of strain NKC3-5T. Comparative genomic analysis revealed that strain NKC3-5T had 716 pan-genome orthologous groups (POGs), dominated with carbohydrate metabolism. Phylogenomic analysis based on the concatenated core POGs revealed that strain NKC3-5T was closely related to Salicibibacter kimchii. The predominant polar lipids were phosphatidylglycerol and two unidentified lipids. Anteiso-C15:0, iso-C17:0, anteiso-C17:0, and iso-C15:0 were the major cellular fatty acids, and menaquinone-7 was the major isoprenoid quinone present in strain NKC3-5T. Cell wall peptidoglycan analysis of strain NKC3-5T showed that meso-diaminopimelic acid was the diagnostic diamino acid. The phephenotypic, genomic, phylogenetic, and chemotaxonomic properties reveal that the strain represents a novel species of the genus Salicibibacter, for which the name Salicibibacter halophilus sp. nov. is proposed, with the type strain NKC3-5T (= KACC 21230T = JCM 33437T).}, } @article {pmid31654228, year = {2020}, author = {Zhu, D and Yang, Z and Xu, J and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Liu, Y and Zhang, L and Yu, Y and Chen, X and Cheng, A}, title = {Pan-genome analysis of Riemerella anatipestifer reveals its genomic diversity and acquired antibiotic resistance associated with genomic islands.}, journal = {Functional & integrative genomics}, volume = {20}, number = {3}, pages = {307-320}, pmid = {31654228}, issn = {1438-7948}, support = {2017YFD050080//National Key Research and Development Program of China/ ; CARS-42-17//China Agricultural Research System/ ; 2017HH0026//International S&T Cooperation Program of Sichuan Province/ ; (2017)03//Science and Technology Innovation Program of Guizhou Academy of Agricultural Science/ ; CARS-SVDIP//Sichuan Veterinary Medicine and Drug Innovation Group of China Agricultural Research System/ ; }, abstract = {Riemerella anatipestifer is a gram-negative bacterium that leads to severe contagious septicemia in ducks, turkeys, chickens, and wild waterfowl. Here, a pan-genome with 32 R. anatipestifer genomes is re-established, and the mathematical model is calculated to evaluate the expansion of R. anatipestifer genomes, which were determined to be open. Average nucleotide identity (ANI) and phylogenetic analysis preliminarily clarify intraspecies variation and distance. Comparative genomic analysis of R. anatipestifer found that horizontal gene transfer events, which provide an expressway for the recruitment of novel functionalities and facilitate genetic diversity in microbial genomes, play a key role in the process of acquiring and transmitting antibiotic-resistance genes in R. anatipestifer. Furthermore, a new antibiotic-resistance gene cluster was identified in the same loci in 14 genomes. The uneven distribution of virulence factors was also confirmed by our results. Our study suggests that the ability to acquire foreign genes (such as antibiotic-resistance genes) increases the adaptability of R. anatipestifer, and the virulence genes with little mobility are highly conserved in R. anatipestifer.}, } @article {pmid31647104, year = {2020}, author = {Vallenet, D and Calteau, A and Dubois, M and Amours, P and Bazin, A and Beuvin, M and Burlot, L and Bussell, X and Fouteau, S and Gautreau, G and Lajus, A and Langlois, J and Planel, R and Roche, D and Rollin, J and Rouy, Z and Sabatet, V and Médigue, C}, title = {MicroScope: an integrated platform for the annotation and exploration of microbial gene functions through genomic, pangenomic and metabolic comparative analysis.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D579-D589}, pmid = {31647104}, issn = {1362-4962}, mesh = {Databases, Genetic ; *Genes, Archaeal ; *Genes, Bacterial ; Genomics/*methods ; Metabolic Networks and Pathways ; Molecular Sequence Annotation/*methods ; *Software ; }, abstract = {Large-scale genome sequencing and the increasingly massive use of high-throughput approaches produce a vast amount of new information that completely transforms our understanding of thousands of microbial species. However, despite the development of powerful bioinformatics approaches, full interpretation of the content of these genomes remains a difficult task. Launched in 2005, the MicroScope platform (https://www.genoscope.cns.fr/agc/microscope) has been under continuous development and provides analysis for prokaryotic genome projects together with metabolic network reconstruction and post-genomic experiments allowing users to improve the understanding of gene functions. Here we present new improvements of the MicroScope user interface for genome selection, navigation and expert gene annotation. Automatic functional annotation procedures of the platform have also been updated and we added several new tools for the functional annotation of genes and genomic regions. We finally focus on new tools and pipeline developed to perform comparative analyses on hundreds of genomes based on pangenome graphs. To date, MicroScope contains data for >11 800 microbial genomes, part of which are manually curated and maintained by microbiologists (>4500 personal accounts in September 2019). The platform enables collaborative work in a rich comparative genomic context and improves community-based curation efforts.}, } @article {pmid31647096, year = {2020}, author = {Mende, DR and Letunic, I and Maistrenko, OM and Schmidt, TSB and Milanese, A and Paoli, L and Hernández-Plaza, A and Orakov, AN and Forslund, SK and Sunagawa, S and Zeller, G and Huerta-Cepas, J and Coelho, LP and Bork, P}, title = {proGenomes2: an improved database for accurate and consistent habitat, taxonomic and functional annotations of prokaryotic genomes.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D621-D625}, pmid = {31647096}, issn = {1362-4962}, mesh = {Computational Biology/methods ; *Databases, Genetic ; Ecosystem ; *Genome, Archaeal ; *Genome, Bacterial ; *Genomics ; Internet ; Molecular Sequence Annotation ; Polymorphism, Single Nucleotide ; Prokaryotic Cells ; Reproducibility of Results ; Software ; }, abstract = {Microbiology depends on the availability of annotated microbial genomes for many applications. Comparative genomics approaches have been a major advance, but consistent and accurate annotations of genomes can be hard to obtain. In addition, newer concepts such as the pan-genome concept are still being implemented to help answer biological questions. Hence, we present proGenomes2, which provides 87 920 high-quality genomes in a user-friendly and interactive manner. Genome sequences and annotations can be retrieved individually or by taxonomic clade. Every genome in the database has been assigned to a species cluster and most genomes could be accurately assigned to one or multiple habitats. In addition, general functional annotations and specific annotations of antibiotic resistance genes and single nucleotide variants are provided. In short, proGenomes2 provides threefold more genomes, enhanced habitat annotations, updated taxonomic and functional annotation and improved linkage to the NCBI BioSample database. The database is available at http://progenomes.embl.de/.}, } @article {pmid31646960, year = {2019}, author = {Yin, Z and Yuan, C and Du, Y and Yang, P and Qian, C and Wei, Y and Zhang, S and Huang, D and Liu, B}, title = {Comparative genomic analysis of the Hafnia genus reveals an explicit evolutionary relationship between the species alvei and paralvei and provides insights into pathogenicity.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {768}, pmid = {31646960}, issn = {1471-2164}, support = {No. 81471904, 81772148, and 81611530714//National Natural Science Foundation of China/ ; }, mesh = {Bacterial Secretion Systems/genetics ; Comparative Genomic Hybridization ; Drug Resistance, Bacterial/genetics ; *Genome, Bacterial ; Genotype ; Hafnia/*classification/*pathogenicity ; Phylogeny ; Species Specificity ; *Virulence ; Virulence Factors/genetics ; }, abstract = {BACKGROUND: The Hafnia genus is an opportunistic pathogen that has been implicated in both nosocomial and community-acquired infections. Although Hafnia is fairly often isolated from clinical material, its taxonomy has remained an unsolved riddle, and the involvement and importance of Hafnia in human disease is also uncertain. Here, we used comparative genomic analysis to define the taxonomy of Hafnia, identify species-specific genes that may be the result of ecological and pathogenic specialization, and reveal virulence-related genetic profiles that may contribute to pathogenesis.

RESULTS: One complete genome sequence and 19 draft genome sequences for Hafnia strains were generated and combined with 27 publicly available genomes. We provided high-resolution typing methods by constructing phylogeny and population structure based on single-copy core genes in combination with whole genome average nucleotide identity to identify two distant Hafnia species (alvei and paralvei) and one mislabeled strain. The open pan-genome and the presence of numerous mobile genetic elements reveal that Hafnia has undergone massive gene rearrangements. Presence of species-specific core genomes associated with metabolism and transport suggests the putative niche differentiation between alvei and paralvei. We also identified possession of diverse virulence-related profiles in both Hafnia species., including the macromolecular secretion system, virulence, and antimicrobial resistance. In the macromolecular system, T1SS, Flagellum 1, Tad pilus and T6SS-1 were conserved in Hafnia, whereas T4SS, T5SS, and other T6SSs exhibited the evolution of diversity. The virulence factors in Hafnia are related to adherence, toxin, iron uptake, stress adaptation, and efflux pump. The identified resistance genes are associated with aminoglycoside, beta-lactam, bacitracin, cationic antimicrobial peptide, fluoroquinolone, and rifampin. These virulence-related profiles identified at the genomic level provide insights into Hafnia pathogenesis and the differentiation between alvei and paralvei.

CONCLUSIONS: Our research using core genome phylogeny and comparative genomics analysis of a larger collection of strains provides a comprehensive view of the taxonomy and species-specific traits between Hafnia species. Deciphering the genome of Hafnia strains possessing a reservoir of macromolecular secretion systems, virulence factors, and resistance genes related to pathogenicity may provide insights into addressing its numerous infections and devising strategies to combat the pathogen.}, } @article {pmid31642484, year = {2020}, author = {Lu, F and Wei, Z and Luo, Y and Guo, H and Zhang, G and Xia, Q and Wang, Y}, title = {SilkDB 3.0: visualizing and exploring multiple levels of data for silkworm.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D749-D755}, pmid = {31642484}, issn = {1362-4962}, mesh = {Animals ; Bayes Theorem ; Bombyx/*genetics ; Chromosome Mapping ; Chromosomes/genetics ; Computational Biology/*methods ; Computer Graphics ; *Databases, Genetic ; Exons ; Gene Expression Profiling ; Genetic Variation ; *Genome, Insect ; Genomics ; Introns ; Phylogeny ; *Transcriptome ; User-Computer Interface ; }, abstract = {SilkDB is an open-accessibility database and powerful platform that provides comprehensive information on the silkworm (Bombyx mori) genome. Since SilkDB 2.0 was released 10 years ago, vast quantities of data about multiple aspects of the silkworm have been generated, including genome, transcriptome, Hi-C and pangenome. To visualize data at these different biological levels, we present SilkDB 3.0 (https://silkdb.bioinfotoolkits.net), a visual analytic tool for exploring silkworm data through an interactive user interface. The database contains a high-quality chromosome-level assembly of the silkworm genome, and its coding sequences and gene sets are more accurate than those in the previous version. SilkDB 3.0 provides a view of the information for each gene at the levels of sequence, protein structure, gene family, orthology, synteny, genome organization and gives access to gene expression information, genetic variation and genome interaction map. A set of visualization tools are available to display the abundant information in the above datasets. With an improved interactive user interface for the integration of large data sets, the updated SilkDB 3.0 database will be a valuable resource for the silkworm and insect research community.}, } @article {pmid31642169, year = {2019}, author = {Nanayakkara, BS and O'Brien, CL and Gordon, DM}, title = {Phenotypic characteristics contributing to the enhanced growth of Escherichia coli bloom strains.}, journal = {Environmental microbiology reports}, volume = {11}, number = {6}, pages = {817-824}, doi = {10.1111/1758-2229.12801}, pmid = {31642169}, issn = {1758-2229}, support = {LP120100327//Australian Research Council/International ; Project 1101//Water Research Australia/International ; //Fitzroy River Water and Victorian Department of Health/International ; //Sydney Catchment Authority/International ; //South Australian Water Corporation/International ; //Sydney Water Corporation/International ; //City West Water/International ; //Yarra Valley Water/International ; //South East Water/International ; //Hunter Water Corporation/International ; //Queensland Bulk Water Authority/International ; //Water Corporation of Western Australia/International ; //Melbourne Water/International ; }, mesh = {Bacterial Capsules/metabolism ; Biological Transport ; Escherichia coli/classification/genetics/*growth & development/isolation & purification ; Ferric Compounds/metabolism ; Phylogeny ; *Water Microbiology ; }, abstract = {During bloom events, Escherichia coli cell counts increase to between 10,000 and 100,000 cfu/100 ml of water. The strains responsible for bloom events belong to E. coli phylogenetic groups A and B1, and all have acquired a capsule from Klebsiella. A pan-genome comparison of phylogroup A E. coli revealed that the ferric citrate uptake system (fecIRABCDE) was overrepresented in phylogroup A bloom strains compared with non-bloom E. coli. A series of experiments were carried out to investigate if the capsule together with ferric citrate uptake system could confer a growth rate advantage on E. coli. Capsulated strains had a growth rate advantage regardless of the media composition and the presence/absence of the fec operon, and they had a shorter lag phase compared with capsule-negative strains. The results suggest that the Klebsiella capsule may facilitate nutrient uptake or utilization by a strain. This, together with the protective roles played by the capsule and the shorter lag phase of capsule-positive strains, may explain why it is only capsule-positive strains that produce elevated counts in response to nutrient influx.}, } @article {pmid31641046, year = {2019}, author = {Zhong, C and Han, M and Yang, P and Chen, C and Yu, H and Wang, L and Ning, K}, title = {Comprehensive Analysis Reveals the Evolution and Pathogenicity of Aeromonas, Viewed from Both Single Isolated Species and Microbial Communities.}, journal = {mSystems}, volume = {4}, number = {5}, pages = {}, pmid = {31641046}, issn = {2379-5077}, abstract = {The genus Aeromonas is a common gastrointestinal pathogen associated with human and animal infections. Due to the high level of cross-species similarity, their evolutionary dynamics and genetic diversity are still fragmented. Hereby, we investigated the pan-genomes of 29 Aeromonas species, as well as Aeromonas species in microbial communities, to clarify their evolutionary dynamics and genetic diversity, with special focus on virulence factors and horizontal gene transfer events. Our study revealed an open pan-genome of Aeromonas containing 10,144 gene families. These Aeromonas species exhibited different functional constraints, with the single-copy core genes and most accessory genes experiencing purifying selection. The significant congruence between core genome and pan-genome trees revealed that core genes mainly affected evolutionary divergences of Aeromonas species. Gene gains and losses revealed a high level of genome plasticity, exhibited by hundreds of gene expansions and contractions, horizontally transferred genes, and mobile genetic elements. The selective constraints shaped virulence gene pools of these Aeromonas strains, where genes encoding hemolysin were ubiquitous. Of these strains, Aeromonas aquatica MX16A seemed to be more resistant, as it harbored most resistance genes. Finally, the virulence factors of Aeromonas in microbial communities were quite dynamic in response to environment changes. For example, the virulence diversity of Aeromonas in microbial communities could reach levels that match some of the most virulent Aeromonas species (such as A. hydrophila) in penetrated-air and modified-air packaging. Our work shed some light onto genetic diversity, evolutionary history, and functional features of Aeromonas, which could facilitate the detection and prevention of infections.IMPORTANCEAeromonas has long been known as a gastrointestinal pathogen, yet it has many species whose evolutionary dynamics and genetic diversity had been unclear until now. We have conducted pan-genome analysis for 29 Aeromonas species and revealed a high level of genome plasticity exhibited by hundreds of gene expansions and contractions, horizontally transferred genes, and mobile genetic elements. These species also contained many virulence factors both identified from single isolated species and microbial community. This pan-genome study could elevate the level for detection and prevention of Aeromonas infections.}, } @article {pmid31639358, year = {2019}, author = {Brockhurst, MA and Harrison, E and Hall, JPJ and Richards, T and McNally, A and MacLean, C}, title = {The Ecology and Evolution of Pangenomes.}, journal = {Current biology : CB}, volume = {29}, number = {20}, pages = {R1094-R1103}, doi = {10.1016/j.cub.2019.08.012}, pmid = {31639358}, issn = {1879-0445}, support = {BB/R006253/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R014884/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R006261/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 106918/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {*Biological Evolution ; Evolution, Molecular ; *Genome, Bacterial ; *Metagenome ; Phylogeny ; }, abstract = {Since the first genome-scale comparisons, it has been evident that the genomes of many species are unbound by strict vertical descent: Large differences in gene content can occur among genomes belonging to the same prokaryotic species, with only a fraction of genes being universal to all genomes. These insights gave rise to the pangenome concept. The pangenome is defined as the set of all the genes present in a given species and can be subdivided into the accessory genome, present in only some of the genomes, and the core genome, present in all the genomes. Pangenomes arise due to gene gain by genomes from other species through horizontal gene transfer and differential gene loss among genomes, and have been described in both prokaryotes and eukaryotes. Our current view of pangenome variation is phenomenological and incomplete. In this review, we outline the mechanistic, ecological and evolutionary drivers of and barriers to horizontal gene transfer that are likely to structure pangenomes. We highlight the key role of conflict between the host chromosome(s) and the mobile genetic elements that mediate gene exchange. We identify shortcomings in our current models of pangenome evolution and suggest directions for future research to allow a more complete understanding of how and why pangenomes evolve.}, } @article {pmid31630297, year = {2020}, author = {Zhao, S and Ci, J and Xue, J and Wang, Y and Li, X and Hao, L and Tian, L and Guo, H and Xin, C and Zhao, Y}, title = {Cutibacterium acnes Type II strains are associated with acne in Chinese patients.}, journal = {Antonie van Leeuwenhoek}, volume = {113}, number = {3}, pages = {377-388}, doi = {10.1007/s10482-019-01344-x}, pmid = {31630297}, issn = {1572-9699}, mesh = {Acne Vulgaris/*microbiology ; Actinomycetales Infections/*microbiology ; China ; Genome, Bacterial ; Genomics/methods ; Humans ; Multilocus Sequence Typing ; Phylogeny ; Propionibacterium/*classification/genetics ; Whole Genome Sequencing ; }, abstract = {Acne is a common inflammatory skin disease, especially in adolescents. Certain Cutibacterium acnes subtypes are associated with acne, although more than one subtype of C. acnes strains may simultaneously reside on the surface of the skin of an individual. To better understand the relationship between the genomic characteristics of C. acnes subtypes and acnes, we collected 50 C. acnes strains from the facial skin of 10 people (5 healthy individuals, 5 patients with acne) in Liaoning, China and performed whole genome sequencing of all strains. We demonstrated that the six potential pathogenic C. acnes strains were all Type II subtype, and discovered 90 unique genes of the six strains related to acne using pan-genome analysis. The distribution of 2 of the 90 genes was identified by PCR in bacterial cultures collected from the facial skin of 171 individuals (55 healthy individuals, 52 patients with mild acne and 64 patients with moderate to severe acne). Both the genes were significantly associated with acne (Chi square test, P < 0.01). We conclude that Type II strains are associated with acne in Chinese patients.}, } @article {pmid31626589, year = {2019}, author = {Mangas, EL and Rubio, A and Álvarez-Marín, R and Labrador-Herrera, G and Pachón, J and Pachón-Ibáñez, ME and Divina, F and Pérez-Pulido, AJ}, title = {Pangenome of Acinetobacter baumannii uncovers two groups of genomes, one of them with genes involved in CRISPR/Cas defence systems associated with the absence of plasmids and exclusive genes for biofilm formation.}, journal = {Microbial genomics}, volume = {5}, number = {11}, pages = {}, pmid = {31626589}, issn = {2057-5858}, mesh = {Acinetobacter baumannii/*genetics ; Bacteria/genetics ; Bacterial Proteins/genetics ; Biofilms ; CRISPR-Cas Systems ; Clustered Regularly Interspaced Short Palindromic Repeats ; Genome, Bacterial/genetics ; Genomics ; Phylogeny ; Plasmids/*genetics ; }, abstract = {Acinetobacter baumannii is an opportunistic bacterium that causes hospital-acquired infections with a high mortality and morbidity, since there are strains resistant to virtually any kind of antibiotic. The chase to find novel strategies to fight against this microbe can be favoured by knowledge of the complete catalogue of genes of the species, and their relationship with the specific characteristics of different isolates. In this work, we performed a genomics analysis of almost 2500 strains. Two different groups of genomes were found based on the number of shared genes. One of these groups rarely has plasmids, and bears clustered regularly interspaced short palindromic repeat (CRISPR) sequences, in addition to CRISPR-associated genes (cas genes) or restriction-modification system genes. This fact strongly supports the lack of plasmids. Furthermore, the scarce plasmids in this group also bear CRISPR sequences, and specifically contain genes involved in prokaryotic toxin-antitoxin systems that could either act as the still little known CRISPR type IV system or be the precursors of other novel CRISPR/Cas systems. In addition, a limited set of strains present a new cas9-like gene, which may complement the other cas genes in inhibiting the entrance of new plasmids into the bacteria. Finally, this group has exclusive genes involved in biofilm formation, which would connect CRISPR systems to the biogenesis of these bacterial resistance structures.}, } @article {pmid31623351, year = {2019}, author = {Wan, X}, title = {Comparative Genome Analyses Reveal the Genomic Traits and Host Plant Adaptations of Flavobacterium akiainvivens IK-1T.}, journal = {International journal of molecular sciences}, volume = {20}, number = {19}, pages = {}, pmid = {31623351}, issn = {1422-0067}, support = {63191440//This work was supported by the Fundamental Research Funds for the Central Universities, Nankai University/ ; }, mesh = {*Adaptation, Physiological ; Bacterial Physiological Phenomena ; Biological Evolution ; Computational Biology/methods ; Flavobacterium/classification/*physiology ; *Genome, Bacterial ; *Genomics/methods ; *Host-Pathogen Interactions ; Plant Diseases/microbiology ; Plants/*microbiology ; Quorum Sensing ; Synteny ; }, abstract = {The genus Flavobacterium contains a large group of commensal bacteria identified in diverse terrestrial and aquatic habitats. We compared the genome of a new species Flavobacterium akiainvivens IK-1T to public available genomes of Flavobacterium species to reveal the genomic traits and ecological roles of IK-1T. Principle component analysis (PCA) of carbohydrate-active enzyme classes suggests that IK-1T belongs to a terrestrial clade of Flavobacterium. In addition, type 2 and type 9 secretion systems involved in bacteria-environment interactions were identified in the IK-1T genome. The IK-1T genome encodes eukaryotic-like domain containing proteins including ankyrin repeats, von Willebrand factor type A domain, and major royal jelly proteins, suggesting that IK-1T may alter plant host physiology by secreting eukaryotic-like proteins that mimic host proteins. A novel two-component system FaRpfC-FaYpdB was identified in the IK-1T genome, which may mediate quorum sensing to regulate global gene expressions. Our findings suggest that comparative genome analyses of Flavobacterium spp. reveal that IK-1T has adapted to a terrestrial niche. Further functional characterizations of IK-1T secreted proteins and their regulation systems will shed light on molecular basis of bacteria-plant interactions in environments.}, } @article {pmid31620779, year = {2020}, author = {Zhang, Y and Zhang, Z and Zhang, H and Zhao, Y and Zhang, Z and Xiao, J}, title = {PADS Arsenal: a database of prokaryotic defense systems related genes.}, journal = {Nucleic acids research}, volume = {48}, number = {D1}, pages = {D590-D598}, pmid = {31620779}, issn = {1362-4962}, mesh = {Archaea/*genetics/virology ; Archaeal Viruses/pathogenicity ; Bacteria/*genetics/virology ; Bacteriophages/pathogenicity ; CRISPR-Cas Systems ; DNA Restriction-Modification Enzymes ; *Databases, Genetic ; *Host-Pathogen Interactions ; *Software ; }, abstract = {Defense systems are vital weapons for prokaryotes to resist heterologous DNA and survive from the constant invasion of viruses, and they are widely used in biochemistry investigation and antimicrobial drug research. So far, numerous types of defense systems have been discovered, but there is no comprehensive defense systems database to organize prokaryotic defense gene datasets. To fill this gap, we unveil the prokaryotic antiviral defense system (PADS) Arsenal (https://bigd.big.ac.cn/padsarsenal), a public database dedicated to gathering, storing, analyzing and visualizing prokaryotic defense gene datasets. The initial version of PADS Arsenal integrates 18 distinctive categories of defense system with the annotation of 6 600 264 genes retrieved from 63,701 genomes across 33 390 species of archaea and bacteria. PADS Arsenal provides various ways to retrieve defense systems related genes information and visualize them with multifarious function modes. Moreover, an online analysis pipeline is integrated into PADS Arsenal to facilitate annotation and evolutionary analysis of defense genes. PADS Arsenal can also visualize the dynamic variation information of defense genes from pan-genome analysis. Overall, PADS Arsenal is a state-of-the-art open comprehensive resource to accelerate the research of prokaryotic defense systems.}, } @article {pmid31619167, year = {2019}, author = {Li, R and Tian, X and Yang, P and Fan, Y and Li, M and Zheng, H and Wang, X and Jiang, Y}, title = {Recovery of non-reference sequences missing from the human reference genome.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {746}, pmid = {31619167}, issn = {1471-2164}, support = {31822052//National Natural Science Foundation of China/ ; 31802027//National Natural Science Foundation of China/ ; 2018M631209//Doctoral Program Foundation of Institutions of Higher Education of China/ ; 2452018127//Fundamental Research Funds for the Central Universities/ ; }, mesh = {Alleles ; Chromosome Mapping ; Genetic Variation/genetics ; Genome, Human/*genetics ; Humans ; Mutagenesis, Insertional ; Sequence Alignment ; Sequence Analysis, DNA ; Tandem Repeat Sequences ; }, abstract = {BACKGROUND: The non-reference sequences (NRS) represent structure variations in human genome with potential functional significance. However, besides the known insertions, it is currently unknown whether other types of structure variations with NRS exist.

RESULTS: Here, we compared 31 human de novo assemblies with the current reference genome to identify the NRS and their location. We resolved the precise location of 6113 NRS adding up to 12.8 Mb. Besides 1571 insertions, we detected 3041 alternate alleles, which were defined as having less than 90% (or none) identity with the reference alleles. These alternate alleles overlapped with 1143 protein-coding genes including a putative novel MHC haplotype. Further, we demonstrated that the alternate alleles and their flanking regions had high content of tandem repeats, indicating that their origin was associated with tandem repeats.

CONCLUSIONS: Our study detected a large number of NRS including many alternate alleles which are previously uncharacterized. We suggested that the origin of alternate alleles was associated with tandem repeats. Our results enriched the spectrum of genetic variations in human genome.}, } @article {pmid31611653, year = {2020}, author = {Hoarfrost, A and Nayfach, S and Ladau, J and Yooseph, S and Arnosti, C and Dupont, CL and Pollard, KS}, title = {Global ecotypes in the ubiquitous marine clade SAR86.}, journal = {The ISME journal}, volume = {14}, number = {1}, pages = {178-188}, doi = {10.1038/s41396-019-0516-7}, pmid = {31611653}, issn = {1751-7370}, mesh = {Ecotype ; Gammaproteobacteria/*classification/genetics ; Genes, Bacterial ; Metagenome ; Oceans and Seas ; Phylogeography ; }, abstract = {SAR86 is an abundant and ubiquitous heterotroph in the surface ocean that plays a central role in the function of marine ecosystems. We hypothesized that despite its ubiquity, different SAR86 subgroups may be endemic to specific ocean regions and functionally specialized for unique marine environments. However, the global biogeographical distributions of SAR86 genes, and the manner in which these distributions correlate with marine environments, have not been investigated. We quantified SAR86 gene content across globally distributed metagenomic samples and modeled these gene distributions as a function of 51 environmental variables. We identified five distinct clusters of genes within the SAR86 pangenome, each with a unique geographic distribution associated with specific environmental characteristics. Gene clusters are characterized by the strong taxonomic enrichment of distinct SAR86 genomes and partial assemblies, as well as differential enrichment of certain functional groups, suggesting differing functional and ecological roles of SAR86 ecotypes. We then leveraged our models and high-resolution, remote sensing-derived environmental data to predict the distributions of SAR86 gene clusters across the world's oceans, creating global maps of SAR86 ecotype distributions. Our results reveal that SAR86 exhibits previously unknown, complex biogeography, and provide a framework for exploring geographic distributions of genetic diversity from other microbial clades.}, } @article {pmid31609418, year = {2019}, author = {Tralamazza, SM and Rocha, LO and Oggenfuss, U and Corrêa, B and Croll, D}, title = {Complex Evolutionary Origins of Specialized Metabolite Gene Cluster Diversity among the Plant Pathogenic Fungi of the Fusarium graminearum Species Complex.}, journal = {Genome biology and evolution}, volume = {11}, number = {11}, pages = {3106-3122}, pmid = {31609418}, issn = {1759-6653}, mesh = {DNA Transposable Elements ; Evolution, Molecular ; Fungi/genetics ; Fusariosis/*microbiology ; Fusarium/*genetics ; Gene Transfer, Horizontal ; *Genome, Fungal ; *Multigene Family ; Plant Diseases/microbiology ; Secondary Metabolism/*genetics ; Triticum/microbiology ; }, abstract = {Fungal genomes encode highly organized gene clusters that underlie the production of specialized (or secondary) metabolites. Gene clusters encode key functions to exploit plant hosts or environmental niches. Promiscuous exchange among species and frequent reconfigurations make gene clusters some of the most dynamic elements of fungal genomes. Despite evidence for high diversity in gene cluster content among closely related strains, the microevolutionary processes driving gene cluster gain, loss, and neofunctionalization are largely unknown. We analyzed the Fusarium graminearum species complex (FGSC) composed of plant pathogens producing potent mycotoxins and causing Fusarium head blight on cereals. We de novo assembled genomes of previously uncharacterized FGSC members (two strains of F. austroamericanum, F. cortaderiae, and F. meridionale). Our analyses of 8 species of the FGSC in addition to 15 other Fusarium species identified a pangenome of 54 gene clusters within FGSC. We found that multiple independent losses were a key factor generating extant cluster diversity within the FGSC and the Fusarium genus. We identified a modular gene cluster conserved among distantly related fungi, which was likely reconfigured to encode different functions. We also found strong evidence that a rare cluster in FGSC was gained through an ancient horizontal transfer between bacteria and fungi. Chromosomal rearrangements underlying cluster loss were often complex and were likely facilitated by an enrichment in specific transposable elements. Our findings identify important transitory stages in the birth and death process of specialized metabolism gene clusters among very closely related species.}, } @article {pmid31607556, year = {2019}, author = {Tett, A and Huang, KD and Asnicar, F and Fehlner-Peach, H and Pasolli, E and Karcher, N and Armanini, F and Manghi, P and Bonham, K and Zolfo, M and De Filippis, F and Magnabosco, C and Bonneau, R and Lusingu, J and Amuasi, J and Reinhard, K and Rattei, T and Boulund, F and Engstrand, L and Zink, A and Collado, MC and Littman, DR and Eibach, D and Ercolini, D and Rota-Stabelli, O and Huttenhower, C and Maixner, F and Segata, N}, title = {The Prevotella copri Complex Comprises Four Distinct Clades Underrepresented in Westernized Populations.}, journal = {Cell host & microbe}, volume = {26}, number = {5}, pages = {666-679.e7}, doi = {10.1016/j.chom.2019.08.018}, pmid = {31607556}, issn = {1934-6069}, support = {TL1 TR001447/TR/NCATS NIH HHS/United States ; /ERC_/European Research Council/International ; /HHMI/Howard Hughes Medical Institute/United States ; U54 DE023798/DE/NIDCR NIH HHS/United States ; R01 DK103358/DK/NIDDK NIH HHS/United States ; R01 HG005220/HG/NHGRI NIH HHS/United States ; R24 DK110499/DK/NIDDK NIH HHS/United States ; }, mesh = {Diet ; Ethiopia ; Feces/microbiology ; Fossils/*microbiology ; Gastrointestinal Microbiome/*genetics ; Genetic Variation ; Genome, Bacterial/*genetics ; Ghana ; Humans ; Prevotella/*classification/*genetics/isolation & purification ; Tanzania ; }, abstract = {Prevotella copri is a common human gut microbe that has been both positively and negatively associated with host health. In a cross-continent meta-analysis exploiting >6,500 metagenomes, we obtained >1,000 genomes and explored the genetic and population structure of P. copri. P. copri encompasses four distinct clades (>10% inter-clade genetic divergence) that we propose constitute the P. copri complex, and all clades were confirmed by isolate sequencing. These clades are nearly ubiquitous and co-present in non-Westernized populations. Genomic analysis showed substantial functional diversity in the complex with notable differences in carbohydrate metabolism, suggesting that multi-generational dietary modifications may be driving reduced prevalence in Westernized populations. Analysis of ancient metagenomes highlighted patterns of P. copri presence consistent with modern non-Westernized populations and a clade delineation time pre-dating human migratory waves out of Africa. These findings reveal that P. copri exhibits a high diversity that is underrepresented in Western-lifestyle populations.}, } @article {pmid31600234, year = {2019}, author = {Chen, S and Soehnlen, M and Blom, J and Terrapon, N and Henrissat, B and Walker, ED}, title = {Comparative genomic analyses reveal diverse virulence factors and antimicrobial resistance mechanisms in clinical Elizabethkingia meningoseptica strains.}, journal = {PloS one}, volume = {14}, number = {10}, pages = {e0222648}, pmid = {31600234}, issn = {1932-6203}, support = {R37 AI021884/AI/NIAID NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/therapeutic use ; Biofilms/growth & development ; Clustered Regularly Interspaced Short Palindromic Repeats/genetics ; Comparative Genomic Hybridization ; DNA-Binding Proteins/genetics ; Drug Resistance, Bacterial/*genetics ; Flavobacteriaceae/*genetics/pathogenicity ; Flavobacteriaceae Infections/drug therapy/epidemiology/*genetics/microbiology ; Genome, Bacterial/*genetics ; Genomics/methods ; Humans ; Phylogeny ; Transcription Factors/genetics ; Virulence Factors/genetics ; }, abstract = {Three human clinical isolates of bacteria (designated strains Em1, Em2 and Em3) had high average nucleotide identity (ANI) to Elizabethkingia meningoseptica. Their genome sizes (3.89, 4.04 and 4.04 Mb) were comparable to those of other Elizabethkingia species and strains, and exhibited open pan-genome characteristics, with two strains being nearly identical and the third divergent. These strains were susceptible only to trimethoprim/sulfamethoxazole and ciprofloxacin amongst 16 antibiotics in minimum inhibitory tests. The resistome exhibited a high diversity of resistance genes, including 5 different lactamase- and 18 efflux protein- encoding genes. Forty-four genes encoding virulence factors were conserved among the strains. Sialic acid transporters and curli synthesis genes were well conserved in E. meningoseptica but absent in E. anophelis and E. miricola. E. meningoseptica carried several genes contributing to biofilm formation. 58 glycoside hydrolases (GH) and 25 putative polysaccharide utilization loci (PULs) were found. The strains carried numerous genes encoding two-component system proteins (56), transcription factor proteins (187~191), and DNA-binding proteins (6~7). Several prophages and CRISPR/Cas elements were uniquely present in the genomes.}, } @article {pmid31598686, year = {2019}, author = {Bayliss, SC and Thorpe, HA and Coyle, NM and Sheppard, SK and Feil, EJ}, title = {PIRATE: A fast and scalable pangenomics toolbox for clustering diverged orthologues in bacteria.}, journal = {GigaScience}, volume = {8}, number = {10}, pages = {}, pmid = {31598686}, issn = {2047-217X}, support = {BB/M026388/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Cluster Analysis ; *Genome, Bacterial ; Genomics/*methods ; }, abstract = {BACKGROUND: Cataloguing the distribution of genes within natural bacterial populations is essential for understanding evolutionary processes and the genetic basis of adaptation. Advances in whole genome sequencing technologies have led to a vast expansion in the amount of bacterial genomes deposited in public databases. There is a pressing need for software solutions which are able to cluster, catalogue and characterise genes, or other features, in increasingly large genomic datasets.

RESULTS: Here we present a pangenomics toolbox, PIRATE (Pangenome Iterative Refinement and Threshold Evaluation), which identifies and classifies orthologous gene families in bacterial pangenomes over a wide range of sequence similarity thresholds. PIRATE builds upon recent scalable software developments to allow for the rapid interrogation of thousands of isolates. PIRATE clusters genes (or other annotated features) over a wide range of amino acid or nucleotide identity thresholds and uses the clustering information to rapidly identify paralogous gene families and putative fission/fusion events. Furthermore, PIRATE orders the pangenome using a directed graph, provides a measure of allelic variation, and estimates sequence divergence for each gene family.

CONCLUSIONS: We demonstrate that PIRATE scales linearly with both number of samples and computation resources, allowing for analysis of large genomic datasets, and compares favorably to other popular tools. PIRATE provides a robust framework for analysing bacterial pangenomes, from largely clonal to panmictic species.}, } @article {pmid31589296, year = {2019}, author = {John, J and George, S and Nori, SRC and Nelson-Sathi, S}, title = {Phylogenomic Analysis Reveals the Evolutionary Route of Resistant Genes in Staphylococcus aureus.}, journal = {Genome biology and evolution}, volume = {11}, number = {10}, pages = {2917-2926}, pmid = {31589296}, issn = {1759-6653}, mesh = {Drug Resistance, Bacterial/*genetics ; *Evolution, Molecular ; Gene Transfer, Horizontal ; Genome, Bacterial ; Phylogeny ; Staphylococcus aureus/classification/drug effects/*genetics ; }, abstract = {Multidrug-resistant Staphylococcus aureus is a leading concern worldwide. Coagulase-Negative Staphylococci are claimed to be the reservoir and source of important resistant elements in S. aureus. However, the origin and evolutionary route of resistant genes in S. aureus are still remaining unknown. Here, we performed a detailed phylogenomic analysis of 152 completely sequenced S. aureus strains in comparison with 7,529 non-Staphylococcus aureus reference bacterial genomes. Our results reveal that S. aureus has a large open pan-genome where 97 (55%) of its known resistant-related genes belonging to its accessory genome. Among these genes, 47 (27%) were located within the Staphylococcal Cassette Chromosome mec (SCCmec), a transposable element responsible for resistance against major classes of antibiotics including beta-lactams, macrolides, and aminoglycosides. However, the physically linked mec-box genes (MecA-MecR-MecI) that are responsible for the maintenance of SCCmec elements is not unique to S. aureus, instead it is widely distributed within Staphylococcaceae family. The phyletic patterns of SCCmec-encoded resistant genes in Staphylococcus species are significantly different from that of its core genes indicating frequent exchange of these genes between Staphylococcus species. Our in-depth analysis of SCCmec-resistant gene phylogenies reveals that genes such as blaZ, ble, kmA, and tetK that are responsible for beta-lactam, bleomycin, kanamycin, and tetracycline resistance in S. aureus were laterally transferred from non-Staphylococcus sources. In addition, at least 11 non-SCCmec-encoded resistant genes in S. aureus, were laterally acquired from distantly related species. Our study evidently shows that gene transfers played a crucial role in shaping the evolution of antibiotic resistance in S. aureus.}, } @article {pmid31584869, year = {2020}, author = {Zhao, L and Chen, H and Didelot, X and Li, Z and Li, Y and Chen, M and Du, Y and Zhao, H and Li, J and Hu, Q and Kan, B and Chen, M and Pang, B}, title = {Co-existence of multiple distinct lineages in Vibrio parahaemolyticus serotype O4:K12.}, journal = {Microbial genomics}, volume = {6}, number = {12}, pages = {}, doi = {10.1099/mgen.0.000287}, pmid = {31584869}, issn = {2057-5858}, support = {MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Vibrio parahaemolyticus is an important cause of foodborne gastroenteritis globally. Thermostable direct haemolysin (TDH) and the TDH-related haemolysin are the two key virulence factors in V. parahaemolyticus. Vibrio pathogenicity islands harbour the genes encoding these two haemolysins. The serotyping of V. parahaemolyticus is based on the combination of O and K antigens. Frequent recombination has been observed in V. parahaemolyticus, including in the genomic regions encoding the O and K antigens. V. parahaemolyticus serotype O4:K12 has caused gastroenteritis outbreaks in the USA and Spain. Recently, outbreaks caused by this serotype of V. parahaemolyticus have been reported in China. However, the relationships among this serotype of V. parahaemolyticus strains isolated in different regions have not been addressed. Here, we investigated the genome variation of the V. parahaemolyticus serotype O4:K12 using the whole-genome sequences of 29 isolates. We determined five distinct lineages in this strain collection. We observed frequent recombination among different lineages. In contrast, little recombination was observed within each individual lineage. We showed that the lineage of this serotype of V. parahaemolyticus isolated in America was different from those isolated in Asia and identified genes that exclusively existed in the strains isolated in America. Pan-genome analysis showed that strain-specific and cluster-specific genes were mostly located in the genomic islands. Pan-genome analysis also showed that the vast majority of the accessory genes in the O4:K12 serotype of V. parahaemolyticus were acquired from within the genus Vibrio. Hence, we have shown that multiple distinct lineages exist in V. parahaemolyticus serotype O4:K12 and have provided more evidence about the gene segregation found in V. parahaemolyticus isolated in different continents.}, } @article {pmid31584649, year = {2019}, author = {Li, G and Ji, B and Nielsen, J}, title = {The pan-genome of Saccharomyces cerevisiae.}, journal = {FEMS yeast research}, volume = {19}, number = {7}, pages = {}, doi = {10.1093/femsyr/foz064}, pmid = {31584649}, issn = {1567-1364}, mesh = {Gene Expression Regulation, Fungal ; *Genome, Fungal ; Genotype ; High-Throughput Nucleotide Sequencing ; *Machine Learning ; Phenotype ; Saccharomyces cerevisiae/*genetics ; }, abstract = {Understanding genotype-phenotype relationship is fundamental in biology. With the benefit from next-generation sequencing and high-throughput phenotyping methodologies, there have been generated much genome and phenome data for Saccharomyces cerevisiae. This makes it an excellent model system to understand the genotype-phenotype relationship. In this paper, we presented the reconstruction and application of the yeast pan-genome in resolving genotype-phenotype relationship by a machine learning-assisted approach.}, } @article {pmid31584605, year = {2020}, author = {Ferrés, I and Fresia, P and Iraola, G}, title = {simurg: simulate bacterial pangenomes in R.}, journal = {Bioinformatics (Oxford, England)}, volume = {36}, number = {4}, pages = {1273-1274}, doi = {10.1093/bioinformatics/btz735}, pmid = {31584605}, issn = {1367-4811}, mesh = {Bacteria ; Biological Evolution ; *Genome ; *Software ; }, abstract = {MOTIVATION: The pangenome concept describes genetic variability as the union of genes shared in a set of genomes and constitutes the current paradigm for comparative analysis of bacterial populations. However, there is a lack of tools to simulate pangenome variability and structure using defined evolutionary models.

RESULTS: We developed simurg, an R package that allows to simulate bacterial pangenomes using different combinations of evolutionary constraints such as gene gain, gene loss and mutation rates. Our tool allows the straightforward and reproducible simulation of bacterial pangenomes using real sequence data, providing a valuable tool for benchmarking of pangenome software or comparing evolutionary hypotheses.

The simurg package is released under the GPL-3 license, and is freely available for download from GitHub (https://github.com/iferres/simurg).

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid31579561, year = {2019}, author = {Sabbagh, CRR and Carrere, S and Lonjon, F and Vailleau, F and Macho, AP and Genin, S and Peeters, N}, title = {Pangenomic type III effector database of the plant pathogenic Ralstonia spp.}, journal = {PeerJ}, volume = {7}, number = {}, pages = {e7346}, pmid = {31579561}, issn = {2167-8359}, abstract = {Background: The bacterial plant pathogenic Ralstonia species belong to the beta-proteobacteria class and are soil-borne pathogens causing vascular bacterial wilt disease, affecting a wide range of plant hosts. These bacteria form a heterogeneous group considered as a "species complex" gathering three newly defined species. Like many other Gram negative plant pathogens, Ralstonia pathogenicity relies on a type III secretion system, enabling bacteria to secrete/inject a large repertoire of type III effectors into their plant host cells. Type III-secreted effectors (T3Es) are thought to participate in generating a favorable environment for the pathogen (countering plant immunity and modifying the host metabolism and physiology).

Methods: Expert genome annotation, followed by specific type III-dependent secretion, allowed us to improve our Hidden-Markov-Model and Blast profiles for the prediction of type III effectors.

Results: We curated the T3E repertoires of 12 plant pathogenic Ralstonia strains, representing a total of 12 strains spread over the different groups of the species complex. This generated a pangenome repertoire of 102 T3E genes and 16 hypothetical T3E genes. Using this database, we scanned for the presence of T3Es in the 155 available genomes representing 140 distinct plant pathogenic Ralstonia strains isolated from different host plants in different areas of the globe. All this information is presented in a searchable database. A presence/absence analysis, modulated by a strain sequence/gene annotation quality score, enabled us to redefine core and accessory T3E repertoires.}, } @article {pmid31574156, year = {2019}, author = {Song, B and Song, Y and Fu, Y and Kizito, EB and Kamenya, SN and Kabod, PN and Liu, H and Muthemba, S and Kariba, R and Njuguna, J and Maina, S and Stomeo, F and Djikeng, A and Hendre, PS and Chen, X and Chen, W and Li, X and Sun, W and Wang, S and Cheng, S and Muchugi, A and Jamnadass, R and Shapiro, HY and Van Deynze, A and Yang, H and Wang, J and Xu, X and Odeny, DA and Liu, X}, title = {Draft genome sequence of Solanum aethiopicum provides insights into disease resistance, drought tolerance, and the evolution of the genome.}, journal = {GigaScience}, volume = {8}, number = {10}, pages = {}, pmid = {31574156}, issn = {2047-217X}, mesh = {Acclimatization/genetics ; Disease Resistance/genetics ; Droughts ; Evolution, Molecular ; *Genome, Plant ; Phylogeny ; Polymorphism, Single Nucleotide ; Retroelements ; Solanum/*genetics ; Terminal Repeat Sequences ; }, abstract = {BACKGROUND: The African eggplant (Solanum aethiopicum) is a nutritious traditional vegetable used in many African countries, including Uganda and Nigeria. It is thought to have been domesticated in Africa from its wild relative, Solanum anguivi. S. aethiopicum has been routinely used as a source of disease resistance genes for several Solanaceae crops, including Solanum melongena. A lack of genomic resources has meant that breeding of S. aethiopicum has lagged behind other vegetable crops.

RESULTS: We assembled a 1.02-Gb draft genome of S. aethiopicum, which contained predominantly repetitive sequences (78.9%). We annotated 37,681 gene models, including 34,906 protein-coding genes. Expansion of disease resistance genes was observed via 2 rounds of amplification of long terminal repeat retrotransposons, which may have occurred ∼1.25 and 3.5 million years ago, respectively. By resequencing 65 S. aethiopicum and S. anguivi genotypes, 18,614,838 single-nucleotide polymorphisms were identified, of which 34,171 were located within disease resistance genes. Analysis of domestication and demographic history revealed active selection for genes involved in drought tolerance in both "Gilo" and "Shum" groups. A pan-genome of S. aethiopicum was assembled, containing 51,351 protein-coding genes; 7,069 of these genes were missing from the reference genome.

CONCLUSIONS: The genome sequence of S. aethiopicum enhances our understanding of its biotic and abiotic resistance. The single-nucleotide polymorphisms identified are immediately available for use by breeders. The information provided here will accelerate selection and breeding of the African eggplant, as well as other crops within the Solanaceae family.}, } @article {pmid31572328, year = {2019}, author = {Wang, D and Gao, F}, title = {Comprehensive Analysis of Replication Origins in Saccharomyces cerevisiae Genomes.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2122}, pmid = {31572328}, issn = {1664-302X}, abstract = {DNA replication initiates from multiple replication origins (ORIs) in eukaryotes. Discovery and characterization of replication origins are essential for a better understanding of the molecular mechanism of DNA replication. In this study, the features of autonomously replicating sequences (ARSs) in Saccharomyces cerevisiae have been comprehensively analyzed as follows. Firstly, we carried out the analysis of the ARSs available in S. cerevisiae S288C. By evaluating the sequence similarity of experimentally established ARSs, we found that 94.32% of ARSs are unique across the whole genome of S. cerevisiae S288C and those with high sequence similarity are prone to locate in subtelomeres. Subsequently, we built a non-redundant dataset with a total of 520 ARSs, which are based on ARSs annotation of S. cerevisiae S288C from SGD and then supplemented with those from OriDB and DeOri databases. We conducted a large-scale comparison of ORIs among the diverse budding yeast strains from a population genomics perspective. We found that 82.7% of ARSs are not only conserved in genomic sequence but also relatively conserved in chromosomal position. The non-conserved ARSs tend to distribute in the subtelomeric regions. We also conducted a pan-genome analysis of ARSs among the S. cerevisiae strains, and a total of 183 core ARSs existing in all yeast strains were determined. We extracted the genes adjacent to replication origins among the 104 yeast strains to examine whether there are differences in their gene functions. The result showed that the genes involved in the initiation of DNA replication, such as orc3, mcm2, mcm4, mcm6, and cdc45, are conservatively located adjacent to the replication origins. Furthermore, we found the genes adjacent to conserved ARSs are significantly enriched in DNA binding, enzyme activity, transportation, and energy, whereas for the genes adjacent to non-conserved ARSs are significantly enriched in response to environmental stress, metabolites biosynthetic process and biosynthesis of antibiotics. In general, we characterized the replication origins from the genome-wide and population genomics perspectives, which would provide new insights into the replication mechanism of S. cerevisiae and facilitate the design of algorithms to identify genome-wide replication origins in yeast.}, } @article {pmid31553100, year = {2020}, author = {Dolatabadian, A and Bayer, PE and Tirnaz, S and Hurgobin, B and Edwards, D and Batley, J}, title = {Characterization of disease resistance genes in the Brassica napus pangenome reveals significant structural variation.}, journal = {Plant biotechnology journal}, volume = {18}, number = {4}, pages = {969-982}, pmid = {31553100}, issn = {1467-7652}, mesh = {Brassica napus/*genetics ; DNA Copy Number Variations ; Disease Resistance/*genetics ; *Genes, Plant ; Polymorphism, Single Nucleotide ; Quantitative Trait Loci ; }, abstract = {Methods based on single nucleotide polymorphism (SNP), copy number variation (CNV) and presence/absence variation (PAV) discovery provide a valuable resource to study gene structure and evolution. However, as a result of these structural variations, a single reference genome is unable to cover the entire gene content of a species. Therefore, pangenomics analysis is needed to ensure that the genomic diversity within a species is fully represented. Brassica napus is one of the most important oilseed crops in the world and exhibits variability in its resistance genes across different cultivars. Here, we characterized resistance gene distribution across 50 B. napus lines. We identified a total of 1749 resistance gene analogs (RGAs), of which 996 are core and 753 are variable, 368 of which are not present in the reference genome (cv. Darmor-bzh). In addition, a total of 15 318 SNPs were predicted within 1030 of the RGAs. The results showed that core R-genes harbour more SNPs than variable genes. More nucleotide binding site-leucine-rich repeat (NBS-LRR) genes were located in clusters than as singletons, with variable genes more likely to be found in clusters. We identified 106 RGA candidates linked to blackleg resistance quantitative trait locus (QTL). This study provides a better understanding of resistance genes to target for genomics-based improvement and improved disease resistance.}, } @article {pmid31552103, year = {2019}, author = {Zhang, W and Wang, J and Zhang, D and Liu, H and Wang, S and Wang, Y and Ji, H}, title = {Complete Genome Sequencing and Comparative Genome Characterization of Lactobacillus johnsonii ZLJ010, a Potential Probiotic With Health-Promoting Properties.}, journal = {Frontiers in genetics}, volume = {10}, number = {}, pages = {812}, pmid = {31552103}, issn = {1664-8021}, abstract = {Lactobacillus johnsonii ZLJ010 is a probiotic strain isolated from the feces of a healthy sow and has putative health-promoting properties. To determine the molecular basis underlying the probiotic potential of ZLJ010 and the genes involved in the same, complete genome sequencing and comparative genome analysis with L. johnsonii ZLJ010 were performed. The ZLJ010 genome was found to contain a single circular chromosome of 1,999,879 bp with a guanine-cytosine (GC) content of 34.91% and encoded 18 ribosomal RNA (rRNA) genes and 77 transfer RNA (tRNA) genes. From among the 1,959 protein coding sequences (CDSs), genes known to confer probiotic properties were identified, including genes related to stress adaptation, biosynthesis, metabolism, transport of amino acid, secretion, and the defense machinery. ZLJ010 lacked complete or partial biosynthetic pathways for amino acids but was predicted to compensate for this with an enhanced transport system and some unique amino acid permeases and peptidases that allow it to acquire amino acids and other precursors exogenously. The comparative genomic analysis of L. johnsonii ZLP001 and seven other available L. johnsonii strains, including L. johnsonii NCC533, FI9785, DPC6026, N6.2, BS15, UMNLJ22, and PF01, revealed 2,732 pan-genome orthologous gene clusters and 1,324 core-genome orthologous gene clusters. Phylogenomic analysis based on 1,288 single copy genes showed that ZLJ010 had a closer relationship with the BS15 from yogurt and DPC6026 from the porcine intestinal tract but was located on a relatively standalone branch. The number of clusters of unique, strain-specific genes ranged from 42 to 185. A total of 219 unique genes present in the genome of L. johnsonii ZLJ010 primarily encoded proteins that are putatively involved in replication, recombination and repair, defense mechanisms, transcription, amino acid transport and metabolism, and carbohydrate transport and metabolism. Two unique prophages were predicted in the ZLJ010 genome. The present study helps us understand the ability of L. johnsonii ZLJ010 to better adapt to the gut environment and also its probiotic functionalities.}, } @article {pmid31552006, year = {2019}, author = {Pain, M and Hjerde, E and Klingenberg, C and Cavanagh, JP}, title = {Comparative Genomic Analysis of Staphylococcus haemolyticus Reveals Key to Hospital Adaptation and Pathogenicity.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {2096}, pmid = {31552006}, issn = {1664-302X}, abstract = {Staphylococcus haemolyticus is a skin commensal gaining increased attention as an emerging pathogen of nosocomial infections. However, knowledge about the transition from a commensal to an invasive lifestyle remains sparse and there is a paucity of studies comparing pathogenicity traits between commensal and clinical isolates. In this study, we used a pan-genomic approach to identify factors important for infection and hospital adaptation by exploring the genomic variability of 123 clinical isolates and 46 commensal S. haemolyticus isolates. Phylogenetic reconstruction grouped the 169 isolates into six clades with a distinct distribution of clinical and commensal isolates in the different clades. Phenotypically, multi-drug antibiotic resistance was detected in 108/123 (88%) of the clinical isolates and 5/46 (11%) of the commensal isolates (p < 0.05). In the clinical isolates, we commonly identified a homolog of the serine-rich repeat glycoproteins sraP. Additionally, three novel capsular polysaccharide operons were detected, with a potential role in S. haemolyticus virulence. Clinical S. haemolyticus isolates showed specific signatures associated with successful hospital adaption. Biofilm forming S. haemolyticus isolates that are resistant to oxacillin (mecA) and aminoglycosides (aacA-aphD) are most likely invasive isolates whereas absence of these traits strongly indicates a commensal isolate. We conclude that our data show a clear segregation of isolates of commensal origin, and specific genetic signatures distinguishing the clinical isolates from the commensal isolates. The widespread use of antimicrobial agents has probably promoted the development of successful hospital adapted clones of S. haemolyticus clones through acquisition of mobile genetic elements or beneficial point mutations and rearrangements in surface associated genes.}, } @article {pmid31546297, year = {2019}, author = {Heo, S and Lee, J and Lee, JH and Jeong, DW}, title = {Genomic Insight into the Salt Tolerance of Enterococcus faecium, Enterococcus faecalis and Tetragenococcus halophilus.}, journal = {Journal of microbiology and biotechnology}, volume = {29}, number = {10}, pages = {1591-1602}, doi = {10.4014/jmb.1908.08015}, pmid = {31546297}, issn = {1738-8872}, mesh = {Bacterial Proteins/genetics ; Enterococcaceae/genetics/physiology ; Enterococcus faecalis/genetics/*physiology ; Enterococcus faecium/genetics/physiology ; Genome, Bacterial/*genetics ; Membrane Transport Proteins ; Salt Tolerance/*genetics ; Species Specificity ; }, abstract = {To shed light on the genetic basis of salt tolerance in Enterococcus faecium, Enterococcus faecalis, and Tetragenococcus halophilus, we performed comparative genome analysis of 10 E. faecalis, 11 E. faecium, and three T. halophilus strains. Factors involved in salt tolerance that could be used to distinguish the species were identified. Overall, T. halophilus contained a greater number of potassium transport and osmoprotectant synthesis genes compared with the other two species. In particular, our findings suggested that T. halophilus may be the only one among the three species capable of synthesizing glycine betaine from choline, cardiolipin from glycerol and proline from citrate. These molecules are well-known osmoprotectants; thus, we propose that these genes confer the salt-tolerance of T. halophilus.}, } @article {pmid31544971, year = {2019}, author = {Hatje, K and Mühlhausen, S and Simm, D and Kollmar, M}, title = {The Protein-Coding Human Genome: Annotating High-Hanging Fruits.}, journal = {BioEssays : news and reviews in molecular, cellular and developmental biology}, volume = {41}, number = {11}, pages = {e1900066}, doi = {10.1002/bies.201900066}, pmid = {31544971}, issn = {1521-1878}, mesh = {Algorithms ; Alternative Splicing/genetics ; Animals ; Exons/genetics ; Genome, Human/*genetics ; Genomics/methods ; Humans ; Proteins/*genetics ; RNA Splicing/genetics ; Transcriptome/genetics ; }, abstract = {The major transcript variants of human protein-coding genes are annotated to a certain degree of accuracy combining manual curation, transcript data, and proteomics evidence. However, there is considerable disagreement on the annotation of about 2000 genes-they can be protein-coding, noncoding, or pseudogenes-and on the annotation of most of the predicted alternative transcripts. Pure transcriptome mapping approaches seem to be limited in discriminating functional expression from noise. These limitations have partially been overcome by dedicated algorithms to detect alternative spliced micro-exons and wobble splice variants. Recently, knowledge about splice mechanism and protein structure are incorporated into an algorithm to predict neighboring homologous exons, often spliced in a mutually exclusive manner. Predicted exons are evaluated by transcript data, structural compatibility, and evolutionary conservation, revealing hundreds of novel coding exons and splice mechanism re-assignments. The emerging human pan-genome is necessitating distinctive annotations incorporating differences between individuals and between populations.}, } @article {pmid31529373, year = {2019}, author = {Erwin, DH}, title = {Tempos and modes of collectivity in the history of life.}, journal = {Theory in biosciences = Theorie in den Biowissenschaften}, volume = {}, number = {}, pages = {}, doi = {10.1007/s12064-019-00303-4}, pmid = {31529373}, issn = {1611-7530}, support = {NNA13AA90A//NASA Astrobiology Institute/ ; }, abstract = {Collective integration and processing of information have increased through the history of life, through both the formation of aggregates in which the entities may have very different properties and which jointly coarse-grained environmental variables (ranging from widely varying metabolism in microbial consortia to the ecological diversity of species on reefs) and through collectives of similar entities (such as cells within an organism or social groups). Such increases have been implicated in significant transitions in the history of life, including aspects of the origin of life, the generation of pangenomes among microbes and microbial communities such as stromatolites, multicellularity and social insects. This contribution provides a preliminary overview of the dominant modes of collective information processing in the history of life, their phylogenetic distribution and extent of convergence, and the effects of new modes for integrating and acting upon information on the tempo of evolutionary change.}, } @article {pmid31510914, year = {2019}, author = {Sigalova, OM and Chaplin, AV and Bochkareva, OO and Shelyakin, PV and Filaretov, VA and Akkuratov, EE and Burskaia, V and Gelfand, MS}, title = {Chlamydia pan-genomic analysis reveals balance between host adaptation and selective pressure to genome reduction.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {710}, pmid = {31510914}, issn = {1471-2164}, mesh = {Adaptation, Physiological/*genetics ; Chlamydia/*genetics/*physiology ; Evolution, Molecular ; Genome, Bacterial/genetics ; *Genomics ; Host-Pathogen Interactions/*genetics ; Molecular Sequence Annotation ; *Selection, Genetic ; }, abstract = {BACKGROUND: Chlamydia are ancient intracellular pathogens with reduced, though strikingly conserved genome. Despite their parasitic lifestyle and isolated intracellular environment, these bacteria managed to avoid accumulation of deleterious mutations leading to subsequent genome degradation characteristic for many parasitic bacteria.

RESULTS: We report pan-genomic analysis of sixteen species from genus Chlamydia including identification and functional annotation of orthologous genes, and characterization of gene gains, losses, and rearrangements. We demonstrate the overall genome stability of these bacteria as indicated by a large fraction of common genes with conserved genomic locations. On the other hand, extreme evolvability is confined to several paralogous gene families such as polymorphic membrane proteins and phospholipase D, and likely is caused by the pressure from the host immune system.

CONCLUSIONS: This combination of a large, conserved core genome and a small, evolvable periphery likely reflect the balance between the selective pressure towards genome reduction and the need to adapt to escape from the host immunity.}, } @article {pmid31510650, year = {2019}, author = {Ghaffaari, A and Marschall, T}, title = {Fully-sensitive seed finding in sequence graphs using a hybrid index.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {14}, pages = {i81-i89}, pmid = {31510650}, issn = {1367-4811}, mesh = {*Algorithms ; Alleles ; Diploidy ; *Genome, Human ; Humans ; Sequence Analysis, DNA ; *Software ; }, abstract = {MOTIVATION: Sequence graphs are versatile data structures that are, for instance, able to represent the genetic variation found in a population and to facilitate genome assembly. Read mapping to sequence graphs constitutes an important step for many applications and is usually done by first finding exact seed matches, which are then extended by alignment. Existing methods for finding seed hits prune the graph in complex regions, leading to a loss of information especially in highly polymorphic regions of the genome. While such complex graph structures can indeed lead to a combinatorial explosion of possible alleles, the query set of reads from a diploid individual realizes only two alleles per locus-a property that is not exploited by extant methods.

RESULTS: We present the Pan-genome Seed Index (PSI), a fully-sensitive hybrid method for seed finding, which takes full advantage of this property by combining an index over selected paths in the graph with an index over the query reads. This enables PSI to find all seeds while eliminating the need to prune the graph. We demonstrate its performance with different parameter settings on both simulated data and on a whole human genome graph constructed from variants in the 1000 Genome Project dataset. On this graph, PSI outperforms GCSA2 in terms of index size, query time and sensitivity.

The C++ implementation is publicly available at: https://github.com/cartoonist/psi.}, } @article {pmid31507574, year = {2019}, author = {Espadinha, D and Sobral, RG and Mendes, CI and Méric, G and Sheppard, SK and Carriço, JA and de Lencastre, H and Miragaia, M}, title = {Distinct Phenotypic and Genomic Signatures Underlie Contrasting Pathogenic Potential of Staphylococcus epidermidis Clonal Lineages.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1971}, pmid = {31507574}, issn = {1664-302X}, support = {MR/L015080/1/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Background:Staphylococcus epidermidis is a common skin commensal that has emerged as a pathogen in hospitals, mainly related to medical devices-associated infections. Noteworthy, infection rates by S. epidermidis have the tendency to rise steeply in next decades together with medical devices use and immunocompromized population growth. Staphylococcus epidermidis population structure includes two major clonal lineages (A/C and B) that present contrasting pathogenic potentials. To address this distinction and explore the basis of increased pathogenicity of A/C lineage, we performed a detailed comparative analysis using phylogenetic and integrated pangenome-wide-association study (panGWAS) approaches and compared the lineages's phenotypes in in vitro conditions mimicking carriage and infection. Results: Each S. epidermidis lineage had distinct phenotypic signatures in skin and infection conditions and differed in genomic content. Combination of phenotypic and genotypic data revealed that both lineages were well adapted to skin environmental cues. However, they appear to occupy different skin niches, perform distinct biological functions in the skin and use different mechanisms to complete the same function: lineage B strains showed evidence of specialization to survival in microaerobic and lipid rich environment, characteristic of hair follicle and sebaceous glands; lineage A/C strains showed evidence for adaption to diverse osmotic and pH conditions, potentially allowing them to occupy a broader and more superficial skin niche. In infection conditions, A/C strains had an advantage, having the potential to bind blood-associated host matrix proteins, form biofilms at blood pH, resist antibiotics and macrophage acidity and to produce proteases. These features were observed to be rare in the lineage B strains. PanGWAS analysis produced a catalog of putative S. epidermidis virulence factors and identified an epidemiological molecular marker for the more pathogenic lineage. Conclusion: The prevalence of A/C lineage in infection is probably related to a higher metabolic and genomic versatility that allows rapid adaptation during transition from a commensal to a pathogenic lifestyle. The putative virulence and phenotypic factors associated to A/C lineage constitute a reliable framework for future studies on S. epidermidis pathogenesis and the finding of an epidemiological marker for the more pathogenic lineage is an asset for the management of S. epidermidis infections.}, } @article {pmid31506467, year = {2019}, author = {Fariq, A and Blazier, JC and Yasmin, A and Gentry, TJ and Deng, Y}, title = {Whole genome sequence analysis reveals high genetic variation of newly isolated Acidithiobacillus ferrooxidans IO-2C.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {13049}, doi = {10.1038/s41598-019-49213-x}, pmid = {31506467}, issn = {2045-2322}, mesh = {Acidithiobacillus/*genetics ; Environmental Microbiology ; *Genetic Variation ; *Genome, Bacterial ; *Genomics/methods ; Iron/metabolism ; Oxidation-Reduction ; Phylogeny ; Sequence Analysis ; Whole Genome Sequencing ; }, abstract = {Acidithiobacillus ferrooxidans, a chemolithoautotrophic bacterium, is well known for its mineral oxidizing properties. The current study combines experimental and whole genome sequencing approaches to investigate an iron oxidizing, extreme acidophilic bacterium, A. ferrooxidans isolate (IO-2C) from an acid seep area near Carlos, TX, USA. Strain IO-2C was capable of oxidizing iron i.e. iron sulphate and iron ammonium sulphate yielding shwertmannite and jarosite minerals. Further, the bacterium's genome was sequenced, assembled and annotated to study its general features, structure and functions. To determine genetic heterogeneity, it was compared with the genomes of other published A. ferrooxidans strains. Pan-genome analysis displayed low gene conservation and significant genetic diversity in A. ferrooxidans species comprising of 6926 protein coding sequences with 23.04% (1596) core genes, 46.13% (3195) unique and 30.82% (2135) accessory genes. Variant analysis showed >75,000 variants, 287 of them with a predicted high impact, in A. ferrooxidans IO-2C genome compared to the reference strain, resulting in abandonment of some important functional key genes. The genome contains numerous functional genes for iron and sulphur metabolism, nitrogen fixation, secondary metabolites, degradation of aromatic compounds, and multidrug and heavy metal resistance. This study demonstrated the bio-oxidation of iron by newly isolated A. ferrooxidans IO-2C under acidic conditions, which was further supported by genomic analysis. Genomic analysis of this strain provided valuable information about the complement of genes responsible for the utilization of iron and tolerance of other metals.}, } @article {pmid31500174, year = {2019}, author = {Kaminski, MA and Sobczak, A and Dziembowski, A and Lipinski, L}, title = {Genomic Analysis of γ-Hexachlorocyclohexane-Degrading Sphingopyxis lindanitolerans WS5A3p Strain in the Context of the Pangenome of Sphingopyxis.}, journal = {Genes}, volume = {10}, number = {9}, pages = {}, pmid = {31500174}, issn = {2073-4425}, mesh = {Bacterial Proteins/genetics/metabolism ; Biodegradation, Environmental ; *Genome, Bacterial ; Hexachlorocyclohexane/*metabolism ; Pesticides/*metabolism ; Sphingomonadaceae/enzymology/*genetics/metabolism ; }, abstract = {Sphingopyxis inhabit diverse environmental niches, including marine, freshwater, oceans, soil and anthropogenic sites. The genus includes 20 phylogenetically distinct, valid species, but only a few with a sequenced genome. In this work, we analyzed the nearly complete genome of the newly described species, Sphingopyxislindanitolerans, and compared it to the other available Sphingopyxis genomes. The genome included 4.3 Mbp in total and consists of a circular chromosome, and two putative plasmids. Among the identified set of lin genes responsible for γ-hexachlorocyclohexane pesticide degradation, we discovered a gene coding for a new isoform of the LinA protein. The significant potential of this species in the remediation of contaminated soil is also correlated with the fact that its genome encodes a higher number of enzymes potentially involved in aromatic compound degradation than for most other Sphingopyxis strains. Additional analysis of 44 Sphingopyxis representatives provides insights into the pangenome of Sphingopyxis and revealed a core of 734 protein clusters and between four and 1667 unique proteins per genome.}, } @article {pmid31484962, year = {2019}, author = {Safari, M and Yakhchali, B and Shariati J, V}, title = {Comprehensive genomic analysis of an indigenous Pseudomonas pseudoalcaligenes degrading phenolic compounds.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {12736}, doi = {10.1038/s41598-019-49048-6}, pmid = {31484962}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/pharmacology ; Biodegradation, Environmental ; Drug Resistance, Bacterial ; Genome, Bacterial ; Genomics ; Iran ; Phenols/chemistry/*metabolism ; Phylogeny ; Pseudomonas pseudoalcaligenes/drug effects/*genetics/isolation & purification/*metabolism ; Soil Pollutants/chemistry/metabolism ; }, abstract = {Environmental contamination with aromatic compounds is a universal challenge. Aromatic-degrading microorganisms isolated from the same or similar polluted environments seem to be more suitable for bioremediation. Moreover, microorganisms adapted to contaminated environments are able to use toxic compounds as the sole sources of carbon and energy. An indigenous strain of Pseudomonas, isolated from the Mahshahr Petrochemical plant in the Khuzestan province, southwest of Iran, was studied genetically. It was characterized as a novel Gram-negative, aerobic, halotolerant, rod-shaped bacterium designated Pseudomonas YKJ, which was resistant to chloramphenicol and ampicillin. Genome of the strain was completely sequenced using Illumina technology to identify its genetic characteristics. MLST analysis revealed that the YKJ strain belongs to the genus Pseudomonas indicating the highest sequence similarity with Pseudomonas pseudoalcaligenes strain CECT 5344 (99% identity). Core- and pan-genome analysis indicated that P. pseudoalcaligenes contains 1,671 core and 3,935 unique genes for coding DNA sequences. The metabolic and degradation pathways for aromatic pollutants were investigated using the NCBI and KEGG databases. Genomic and experimental analyses showed that the YKJ strain is able to degrade certain aromatic compounds including bisphenol A, phenol, benzoate, styrene, xylene, benzene and chlorobenzene. Moreover, antibiotic resistance and chemotaxis properties of the YKJ strain were found to be controlled by two-component regulatory systems.}, } @article {pmid31481382, year = {2019}, author = {Tidjani, AR and Lorenzi, JN and Toussaint, M and van Dijk, E and Naquin, D and Lespinet, O and Bontemps, C and Leblond, P}, title = {Massive Gene Flux Drives Genome Diversity between Sympatric Streptomyces Conspecifics.}, journal = {mBio}, volume = {10}, number = {5}, pages = {}, pmid = {31481382}, issn = {2150-7511}, mesh = {Actinobacteria/genetics ; Biosynthetic Pathways/genetics ; Chromosomes, Bacterial ; Conjugation, Genetic ; DNA, Bacterial/genetics ; *Gene Transfer, Horizontal ; Genes, Bacterial/*genetics ; *Genetic Variation ; Genome, Bacterial ; Multigene Family ; Multilocus Sequence Typing ; Phylogeny ; Plasmids ; Streptomyces/*genetics ; }, abstract = {In this work, by comparing genomes of closely related individuals of Streptomyces isolated at a spatial microscale (millimeters or centimeters), we investigated the extent and impact of horizontal gene transfer in the diversification of a natural Streptomyces population. We show that despite these conspecific strains sharing a recent common ancestor, all harbored significantly different gene contents, implying massive and rapid gene flux. The accessory genome of the strains was distributed across insertion/deletion events (indels) ranging from one to several hundreds of genes. Indels were preferentially located in the arms of the linear chromosomes (ca. 12 Mb) and appeared to form recombination hot spots. Some of them harbored biosynthetic gene clusters (BGCs) whose products confer an inhibitory capacity and may constitute public goods that can favor the cohesiveness of the bacterial population. Moreover, a significant proportion of these variable genes were either plasmid borne or harbored signatures of actinomycete integrative and conjugative elements (AICEs). We propose that conjugation is the main driver for the indel flux and diversity in Streptomyces populations.IMPORTANCE Horizontal gene transfer is a rapid and efficient way to diversify bacterial gene pools. Currently, little is known about this gene flux within natural soil populations. Using comparative genomics of Streptomyces strains belonging to the same species and isolated at microscale, we reveal frequent transfer of a significant fraction of the pangenome. We show that it occurs at a time scale enabling the population to diversify and to cope with its changing environment, notably, through the production of public goods.}, } @article {pmid31474554, year = {2019}, author = {Fernie, AR and Aharoni, A}, title = {Pan-Genomic Illumination of Tomato Identifies Novel Gene-Trait Interactions.}, journal = {Trends in plant science}, volume = {24}, number = {10}, pages = {882-884}, doi = {10.1016/j.tplants.2019.08.001}, pmid = {31474554}, issn = {1878-4372}, mesh = {Alleles ; Fruit ; Genome, Plant ; Genomics ; Lighting ; Lycopersicon esculentum/*genetics ; }, abstract = {A recent study by Gao et al., (Nat. Genet., 2019) presents a tomato pan-genome that was constructed using genome sequences of 725 phylogenetically and geographically representative accessions. The study revealed 4873 genes that are absent from the reference genome, including important genes associated with both disease resistance and flavor, thereby providing an important breeding resource.}, } @article {pmid31462565, year = {2019}, author = {Zeng, C and Gilcrease, EB and Hendrix, RW and Xie, Y and Jalfon, MJ and Gill, JJ and Casjens, SR}, title = {DNA Packaging and Genomics of the Salmonella 9NA-Like Phages.}, journal = {Journal of virology}, volume = {93}, number = {22}, pages = {}, pmid = {31462565}, issn = {1098-5514}, support = {R01 GM051975/GM/NIGMS NIH HHS/United States ; R01 GM114817/GM/NIGMS NIH HHS/United States ; }, mesh = {DNA Packaging/*genetics/physiology ; DNA Replication ; DNA, Viral/genetics ; Genome/genetics ; Genome, Viral/genetics ; Genomics/methods ; Phylogeny ; Salmonella/genetics/metabolism/*virology ; Salmonella Phages/*genetics ; Siphoviridae/genetics/metabolism ; Viral Proteins/genetics ; Virion/genetics ; }, abstract = {We present the genome sequences of Salmonella enterica tailed phages Sasha, Sergei, and Solent. These phages, along with Salmonella phages 9NA, FSL_SP-062, and FSL_SP-069 and the more distantly related Proteus phage PmiS-Isfahan, have similarly sized genomes of between 52 and 57 kbp in length that are largely syntenic. Their genomes also show substantial genome mosaicism relative to one another, which is common within tailed phage clusters. Their gene content ranges from 80 to 99 predicted genes, of which 40 are common to all seven and form the core genome, which includes all identifiable virion assembly and DNA replication genes. The total number of gene types (pangenome) in the seven phages is 176, and 59 of these are unique to individual phages. Their core genomes are much more closely related to one another than to the genome of any other known phage, and they comprise a well-defined cluster within the family Siphoviridae To begin to characterize this group of phages in more experimental detail, we identified the genes that encode the major virion proteins and examined the DNA packaging of the prototypic member, phage 9NA. We show that it uses a pac site-directed headful packaging mechanism that results in virion chromosomes that are circularly permuted and about 13% terminally redundant. We also show that its packaging series initiates with double-stranded DNA cleavages that are scattered across a 170-bp region and that its headful measuring device has a precision of ±1.8%.IMPORTANCE The 9NA-like phages are clearly highly related to each other but are not closely related to any other known phage type. This work describes the genomes of three new 9NA-like phages and the results of experimental analysis of the proteome of the 9NA virion and DNA packaging into the 9NA phage head. There is increasing interest in the biology of phages because of their potential for use as antibacterial agents and for their ecological roles in bacterial communities. 9NA-like phages that infect two bacterial genera have been identified to date, and related phages infecting additional Gram-negative bacterial hosts are likely to be found in the future. This work provides a foundation for the study of these phages, which will facilitate their study and potential use.}, } @article {pmid31461668, year = {2020}, author = {Lee, K and Kim, MS and Lee, JS and Bae, DN and Jeong, N and Yang, K and Lee, JD and Park, JH and Moon, JK and Jeong, SC}, title = {Chromosomal features revealed by comparison of genetic maps of Glycine max and Glycine soja.}, journal = {Genomics}, volume = {112}, number = {2}, pages = {1481-1489}, doi = {10.1016/j.ygeno.2019.08.019}, pmid = {31461668}, issn = {1089-8646}, abstract = {Recombination is a crucial component of evolution and breeding. New combinations of variation on chromosomes are shaped by recombination. Recombination is also involved in chromosomal rearrangements. However, recombination rates vary tremendously among chromosome segments. Genome-wide genetic maps are one of the best tools to study variation of recombination. Here, we describe high density genetic maps of Glycine max and Glycine soja constructed from four segregating populations. The maps were used to identify chromosomal rearrangements and find the highly predictable pattern of cross-overs on the broad scale in soybean. Markers on these genetic maps were used to evaluate assembly quality of the current soybean reference genome sequence. We find a strong inversion candidate larger than 3 Mb based on patterns of cross-overs. We also identify quantitative trait loci (QTL) that control number of cross-overs. This study provides fundamental insights relevant to practical strategy for breeding programs and for pan-genome researches.}, } @article {pmid31455646, year = {2019}, author = {Seif, Y and Monk, JM and Machado, H and Kavvas, E and Palsson, BO}, title = {Systems Biology and Pangenome of Salmonella O-Antigens.}, journal = {mBio}, volume = {10}, number = {4}, pages = {}, pmid = {31455646}, issn = {2150-7511}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Genetic Variation ; Genome, Bacterial/*genetics ; Lipopolysaccharides/*immunology ; Metabolic Networks and Pathways ; O Antigens/biosynthesis/*genetics/immunology ; Salmonella/genetics/*immunology ; Serogroup ; Serotyping ; *Systems Biology ; }, abstract = {O-antigens are glycopolymers in lipopolysaccharides expressed on the cell surface of Gram-negative bacteria. Variability in the O-antigen structure constitutes the basis for the establishment of the serotyping schema. We pursued a two-pronged approach to define the basis for O-antigen structural diversity. First, we developed a bottom-up systems biology approach to O-antigen metabolism by building a reconstruction of Salmonella O-antigen biosynthesis and used it to (i) update 410 existing Salmonella strain-specific metabolic models, (ii) predict a strain's serogroup and its O-antigen glycan synthesis capability (yielding 98% agreement with experimental data), and (iii) extend our workflow to more than 1,400 Gram-negative strains. Second, we used a top-down pangenome analysis to elucidate the genetic basis for intraserogroup O-antigen structural variations. We assembled a database of O-antigen gene islands from over 11,000 sequenced Salmonella strains, revealing (i) that gene duplication, pseudogene formation, gene deletion, and bacteriophage insertion elements occur ubiquitously across serogroups; (ii) novel serotypes in the group O:4 B2 variant, as well as an additional genotype variant for group O:4, and (iii) two novel O-antigen gene islands in understudied subspecies. We thus comprehensively defined the genetic basis for O-antigen diversity.IMPORTANCE Lipopolysaccharides are a major component of the outer membrane in Gram-negative bacteria. They are composed of a conserved lipid structure that is embedded in the outer leaflet of the outer membrane and a polysaccharide known as the O-antigen. O-antigens are highly variable in structure across strains of a species and are crucial to a bacterium's interactions with its environment. They constitute the first line of defense against both the immune system and bacteriophage infections and have been shown to mediate antimicrobial resistance. The significance of our research is in identifying the metabolic and genetic differences within and across O-antigen groups in Salmonella strains. Our effort constitutes a first step toward characterizing the O-antigen metabolic network across Gram-negative organisms and a comprehensive overview of genetic variations in Salmonella.}, } @article {pmid31409021, year = {2019}, author = {Dar, HA and Zaheer, T and Shehroz, M and Ullah, N and Naz, K and Muhammad, SA and Zhang, T and Ali, A}, title = {Immunoinformatics-Aided Design and Evaluation of a Potential Multi-Epitope Vaccine against Klebsiella Pneumoniae.}, journal = {Vaccines}, volume = {7}, number = {3}, pages = {}, pmid = {31409021}, issn = {2076-393X}, abstract = {Klebsiella pneumoniae is an opportunistic gram-negative bacterium that causes nosocomial infection in healthcare settings. Despite the high morbidity and mortality rate associated with these bacterial infections, no effective vaccine is available to counter the pathogen. In this study, the pangenome of a total of 222 available complete genomes of K. pneumoniae was explored to obtain the core proteome. A reverse vaccinology strategy was applied to the core proteins to identify four antigenic proteins. These proteins were then subjected to epitope mapping and prioritization steps to shortlist nine B-cell derived T-cell epitopes which were linked together using GPGPG linkers. An adjuvant (Cholera Toxin B) was also added at the N-terminal of the vaccine construct to improve its immunogenicity and a stabilized multi-epitope protein structure was obtained using molecular dynamics simulation. The designed vaccine exhibited sustainable and strong bonding interactions with Toll-like receptor 2 and Toll-like receptor 4. In silico reverse translation and codon optimization also confirmed its high expression in E. coli K12 strain. The computer-aided analyses performed in this study imply that the designed multi-epitope vaccine can elicit specific immune responses against K. pneumoniae. However, wet lab validation is necessary to further verify the effectiveness of this proposed vaccine candidate.}, } @article {pmid31399846, year = {2019}, author = {Xing, J and Li, X and Sun, Y and Zhao, J and Miao, S and Xiong, Q and Zhang, Y and Zhang, G}, title = {Comparative genomic and functional analysis of Akkermansia muciniphila and closely related species.}, journal = {Genes & genomics}, volume = {41}, number = {11}, pages = {1253-1264}, pmid = {31399846}, issn = {2092-9293}, mesh = {Akkermansia ; *Genome, Bacterial ; *Phylogeny ; *Polymorphism, Single Nucleotide ; Verrucomicrobia/classification/*genetics ; }, abstract = {BACKGROUND: Akkermansia muciniphila is an important bacterium that resides on the mucus layer of the intestinal tract. Akkermansia muciniphila has a high abundance in human feces and plays an important role in human health.

OBJECTIVE: In this article, 23 whole genome sequences of the Akkermansia genus were comparatively studied.

METHODS: Phylogenetic trees were constructed with three methods: All amino acid sequences of each strain were used to construct the first phylogenetic tree using the web server of Composition Vector Tree Version 3. The matrix of Genome-to-Genome Distances which were obtained from GGDC 2.0 was used to construct the second phylogenetic tree using FastME. The concatenated single-copy core gene-based phylogenetic tree was generated through MEGA. The single-copy genes were obtained using OrthoMCL. Population structure was assessed by STRUCTURE 2.3.4 using the SNPs in core genes. PROKKA and Roary were used to do pan-genome analyses. The biosynthetic gene clusters were predicted using antiSMASH 4.0. IalandViewer 4 was used to detect the genomic islands.

RESULTS: The results of comparative genomic analysis revealed that: (1) The 23 Akkermansia strains formed 4 clades in phylogenetic trees. The A. muciniphila strains isolated from different geographic regions and ecological niches, formed a closely related clade. (2) The 23 Akkermansia strains were divided into 4 species based on digital DNA-DNA hybridization (dDDH) values. (3) Pan-genome of A. muciniphila is in an open state and increases with addition of new sequenced genomes. (4) SNPs were not evenly distributed throughout the A. muciniphila genomes. The genes in regions with high SNP density are related to metabolism and cell wall/membrane envelope biogenesis. (5) The thermostable outer-membrane protein, Amuc_1100, was conserved in the Akkermansia genus, except for Akkermansia glycaniphila PytT.

CONCLUSION: Overall, applying comparative genomic and pan-genomic analyses, we classified and illuminated the phylogenetic relationship of the 23 Akkermansia strains. Insights of the evolutionary, population structure, gene clusters and genome islands of Akkermansia provided more information about the possible physiological and probiotic mechanisms of the Akkermansia strains, and gave some instructions for the in-depth researches about the use of Akkermansia as a gut probiotic in the future.}, } @article {pmid31392469, year = {2019}, author = {Khan, AMAM and Mendoza, C and Hauk, VJ and Blumer-Schuette, SE}, title = {Genomic and physiological analyses reveal that extremely thermophilic Caldicellulosiruptor changbaiensis deploys uncommon cellulose attachment mechanisms.}, journal = {Journal of industrial microbiology & biotechnology}, volume = {46}, number = {9-10}, pages = {1251-1263}, pmid = {31392469}, issn = {1476-5535}, mesh = {Bacterial Adhesion ; Biomass ; Cellulose/*metabolism ; Clostridiales/genetics/*metabolism ; Genome, Bacterial ; Genomics ; }, abstract = {The genus Caldicellulosiruptor is comprised of extremely thermophilic, heterotrophic anaerobes that degrade plant biomass using modular, multifunctional enzymes. Prior pangenome analyses determined that this genus is genetically diverse, with the current pangenome remaining open, meaning that new genes are expected with each additional genome sequence added. Given the high biodiversity observed among the genus Caldicellulosiruptor, we have sequenced and added a 14th species, Caldicellulosiruptor changbaiensis, to the pangenome. The pangenome now includes 3791 ortholog clusters, 120 of which are unique to C. changbaiensis and may be involved in plant biomass degradation. Comparisons between C. changbaiensis and Caldicellulosiruptor bescii on the basis of growth kinetics, cellulose solubilization and cell attachment to polysaccharides highlighted physiological differences between the two species which are supported by their respective gene inventories. Most significantly, these comparisons indicated that C. changbaiensis possesses uncommon cellulose attachment mechanisms not observed among the other strongly cellulolytic members of the genus Caldicellulosiruptor.}, } @article {pmid31375706, year = {2019}, author = {Chapeton-Montes, D and Plourde, L and Bouchier, C and Ma, L and Diancourt, L and Criscuolo, A and Popoff, MR and Brüggemann, H}, title = {The population structure of Clostridium tetani deduced from its pan-genome.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {11220}, doi = {10.1038/s41598-019-47551-4}, pmid = {31375706}, issn = {2045-2322}, mesh = {Clostridium tetani/*genetics/pathogenicity ; Collagenases/genetics ; Conserved Sequence ; Genome, Bacterial/*genetics ; Neurotoxins/genetics ; Phylogeny ; Species Specificity ; Tetanus Toxin/genetics ; Virulence Factors/genetics ; }, abstract = {Clostridium tetani produces a potent neurotoxin, the tetanus neurotoxin (TeNT) that is responsible for the worldwide neurological disease tetanus, but which can be efficiently prevented by vaccination with tetanus toxoid. Until now only one type of TeNT has been characterized and very little information exists about the heterogeneity among C. tetani strains. We report here the genome sequences of 26 C. tetani strains, isolated between 1949 and 2017 and obtained from different locations. Genome analyses revealed that the C. tetani population is distributed in two phylogenetic clades, a major and a minor one, with no evidence for clade separation based on geographical origin or time of isolation. The chromosome of C. tetani is highly conserved; in contrast, the TeNT-encoding plasmid shows substantial heterogeneity. TeNT itself is highly conserved among all strains; the most relevant difference is an insertion of four amino acids in the C-terminal receptor-binding domain in four strains that might impact on receptor-binding properties. Other putative virulence factors, including tetanolysin and collagenase, are encoded in all genomes. This study highlights the population structure of C. tetani and suggests that tetanus-causing strains did not undergo extensive evolutionary diversification, as judged from the high conservation of its main virulence factors.}, } @article {pmid31371776, year = {2019}, author = {Saad, J and Phelippeau, M and Khoder, M and Lévy, M and Musso, D and Drancourt, M}, title = {"Mycobacterium mephinesia", a Mycobacterium terrae complex species of clinical interest isolated in French Polynesia.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {11169}, doi = {10.1038/s41598-019-47674-8}, pmid = {31371776}, issn = {2045-2322}, mesh = {Bronchoalveolar Lavage ; Genome, Bacterial ; Humans ; Lung/microbiology ; Male ; Middle Aged ; Mycobacterium/genetics/*isolation & purification ; Nontuberculous Mycobacteria/*isolation & purification ; Polynesia ; RNA, Ribosomal, 16S ; Sequence Analysis, DNA ; }, abstract = {A 59-year-old tobacco smoker male with chronic bronchitis living in Taravao, French Polynesia, Pacific, presented with a two-year growing nodule in the middle lobe of the right lung. A guided bronchoalveolar lavage inoculated onto Löwenstein-Jensen medium yielded colonies of a rapidly-growing non-chromogenic mycobacterium designed as isolate P7213. The isolate could not be identified using routine matrix-assisted laser desorption ionization-time of flight-mass spectrometry and phenotypic and probe-hybridization techniques and yielded 100% and 97% sequence similarity with the respective 16S rRNA and rpoB gene sequences of Mycobacterium virginiense in the Mycobacterium terrae complex. Electron microscopy showed a 1.15 µm long and 0.38 µm large bacillus which was in vitro susceptible to rifampicin, rifabutin, ethambutol, isoniazid, doxycycline and kanamycin. Its 4,511,948-bp draft genome exhibited a 67.6% G + C content with 4,153 coding-protein genes and 87 predicted RNA genes. Genome sequence-derived DNA-DNA hybridization, OrthoANI and pangenome analysis confirmed isolate P7213 was representative of a new species in the M. terrae complex. We named this species "Mycobacterium mephinesia".}, } @article {pmid31371382, year = {2019}, author = {O'Connor, E and McGowan, J and McCarthy, CGP and Amini, A and Grogan, H and Fitzpatrick, DA}, title = {Whole Genome Sequence of the Commercially Relevant Mushroom Strain Agaricus bisporus var. bisporus ARP23.}, journal = {G3 (Bethesda, Md.)}, volume = {9}, number = {10}, pages = {3057-3066}, pmid = {31371382}, issn = {2160-1836}, mesh = {Agaricales/*classification/*genetics ; Agaricus/*classification/*genetics ; Computational Biology/methods ; *Genome, Fungal ; Molecular Sequence Annotation ; *Molecular Typing ; Phylogeny ; Quantitative Trait Loci ; *Whole Genome Sequencing ; }, abstract = {Agaricus bisporus is an extensively cultivated edible mushroom. Demand for cultivation is continuously growing and difficulties associated with breeding programs now means strains are effectively considered monoculture. While commercial growing practices are highly efficient and tightly controlled, the over-use of a single strain has led to a variety of disease outbreaks from a range of pathogens including bacteria, fungi and viruses. To address this, the Agaricus Resource Program (ARP) was set up to collect wild isolates from diverse geographical locations through a bounty-driven scheme to create a repository of wild Agaricus germplasm. One of the strains collected, Agaricus bisporus var. bisporus ARP23, has been crossed extensively with white commercial varieties leading to the generation of a novel hybrid with a dark brown pileus commonly referred to as 'Heirloom'. Heirloom has been successfully implemented into commercial mushroom cultivation. In this study the whole genome of Agaricus bisporus var. bisporus ARP23 was sequenced and assembled with Illumina and PacBio sequencing technology. The final genome was found to be 33.49 Mb in length and have significant levels of synteny to other sequenced Agaricus bisporus strains. Overall, 13,030 putative protein coding genes were located and annotated. Relative to the other A. bisporus genomes that are currently available, Agaricus bisporus var. bisporus ARP23 is the largest A. bisporus strain in terms of gene number and genetic content sequenced to date. Comparative genomic analysis shows that the A. bisporus mating loci in unifactorial and unsurprisingly highly conserved between strains. The lignocellulolytic gene content of all A. bisporus strains compared is also very similar. Our results show that the pangenome structure of A. bisporus is quite diverse with between 60-70% of the total protein coding genes per strain considered as being orthologous and syntenically conserved. These analyses and the genome sequence described herein are the starting point for more detailed molecular analyses into the growth and phenotypical responses of Agaricus bisporus var. bisporus ARP23 when challenged with economically important mycoviruses.}, } @article {pmid31366358, year = {2019}, author = {Duan, Z and Qiao, Y and Lu, J and Lu, H and Zhang, W and Yan, F and Sun, C and Hu, Z and Zhang, Z and Li, G and Chen, H and Xiang, Z and Zhu, Z and Zhao, H and Yu, Y and Wei, C}, title = {HUPAN: a pan-genome analysis pipeline for human genomes.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {149}, pmid = {31366358}, issn = {1474-760X}, mesh = {African Continental Ancestry Group/genetics ; Asian Continental Ancestry Group/genetics ; *Genome, Human ; High-Throughput Nucleotide Sequencing ; Humans ; Proteins/genetics ; Sequence Analysis, DNA ; *Software ; }, abstract = {The human reference genome is still incomplete, especially for those population-specific or individual-specific regions, which may have important functions. Here, we developed a HUman Pan-genome ANalysis (HUPAN) system to build the human pan-genome. We applied it to 185 deep sequencing and 90 assembled Han Chinese genomes and detected 29.5 Mb novel genomic sequences and at least 188 novel protein-coding genes missing in the human reference genome (GRCh38). It can be an important resource for the human genome-related biomedical studies, such as cancer genome analysis. HUPAN is freely available at http://cgm.sjtu.edu.cn/hupan/ and https://github.com/SJTU-CGM/HUPAN .}, } @article {pmid31350563, year = {2019}, author = {Richards, VP and Velsko, IM and Alam, T and Zadoks, RN and Manning, SD and Pavinski Bitar, PD and Hasler, HB and Crestani, C and Springer, G and Probert, B and Town, CD and Stanhope, MJ}, title = {Population gene introgression and high genome plasticity for the zoonotic pathogen Streptococcus agalactiae.}, journal = {Molecular biology and evolution}, volume = {}, number = {}, pages = {}, pmid = {31350563}, issn = {1537-1719}, abstract = {The influence that bacterial adaptation (or niche partitioning) within species has on gene spillover and transmission among bacteria populations occupying different niches is not well understood. Streptococcus agalactiae is an important bacterial pathogen that has a taxonomically diverse host range making it an excellent model system to study these processes. Here we analyze a global set of 901 genome sequences from nine diverse host species to advance our understanding of these processes. Bayesian clustering analysis delineated twelve major populations that closely aligned with niches. Comparative genomics revealed extensive gene gain/loss among populations and a large pan-genome of 9,527 genes, which remained open and was strongly partitioned among niches. As a result, the biochemical characteristics of eleven populations were highly distinctive (significantly enriched). Positive selection was detected and biochemical characteristics of the dispensable genes under selection were enriched in ten populations. Despite the strong gene partitioning, phylogenomics detected gene spillover. In particular, tetracycline resistance (which likely evolved in the human-associated population) from humans to bovine, canines, seals, and fish, demonstrating how a gene selected in one host can ultimately be transmitted into another, and biased transmission from humans to bovines was confirmed with a Bayesian migration analysis. Our findings show high bacterial genome plasticity acting in balance with selection pressure from distinct functional requirements of niches that is associated with an extensive and highly partitioned dispensable genome, likely facilitating continued and expansive adaptation.}, } @article {pmid31333599, year = {2019}, author = {Naidenov, B and Lim, A and Willyerd, K and Torres, NJ and Johnson, WL and Hwang, HJ and Hoyt, P and Gustafson, JE and Chen, C}, title = {Pan-Genomic and Polymorphic Driven Prediction of Antibiotic Resistance in Elizabethkingia.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1446}, pmid = {31333599}, issn = {1664-302X}, abstract = {The Elizabethkingia are a genetically diverse genus of emerging pathogens that exhibit multidrug resistance to a range of common antibiotics. Two representative species, Elizabethkingia bruuniana and E. meningoseptica, were phenotypically tested to determine minimum inhibitory concentrations (MICs) for five antibiotics. Ultra-long read sequencing with Oxford Nanopore Technologies (ONT) and subsequent de novo assembly produced complete, gapless circular genomes for each strain. Alignment based annotation with Prokka identified 5,480 features in E. bruuniana and 5,203 features in E. meningoseptica, where none of these identified genes or gene combinations corresponded to observed phenotypic resistance values. Pan-genomic analysis, performed with an additional 19 Elizabethkingia strains, identified a core-genome size of 2,658,537 bp, 32 uniquely identifiable intrinsic chromosomal antibiotic resistance core-genes and 77 antibiotic resistance pan-genes. Using core-SNPs and pan-genes in combination with six machine learning (ML) algorithms, binary classification of clindamycin and vancomycin resistance achieved f1 scores of 0.94 and 0.84, respectively. Performance on the more challenging multiclass problem for fusidic acid, rifampin and ciprofloxacin resulted in f1 scores of 0.70, 0.75, and 0.54, respectively. By producing two sets of quality biological predictors, pan-genome genes and core-genome SNPs, from long-read sequence data and applying an ensemble of ML techniques, our results demonstrated that accurate phenotypic inference, at multiple AMR resolutions, can be achieved.}, } @article {pmid31329231, year = {2019}, author = {Fenske, GJ and Thachil, A and McDonough, PL and Glaser, A and Scaria, J}, title = {Geography Shapes the Population Genomics of Salmonella enterica Dublin.}, journal = {Genome biology and evolution}, volume = {11}, number = {8}, pages = {2220-2231}, pmid = {31329231}, issn = {1759-6653}, mesh = {Animals ; Bacterial Proteins/*genetics ; Cattle ; *Evolution, Molecular ; Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; *Metagenomics ; *Phylogeography ; Salmonella enterica/classification/*genetics ; Serogroup ; Transcriptome ; Virulence ; Virulence Factors/*genetics ; }, abstract = {Salmonella enterica serotype Dublin (S. Dublin) is a bovine-adapted serotype that can cause serious systemic infections in humans. Despite the increasing prevalence of human infections and the negative impact on agricultural processes, little is known about the population structure of the serotype. To this end, we compiled a manually curated data set comprising of 880 S. Dublin genomes. Core genome phylogeny and ancestral state reconstruction revealed that region-specific clades dominate the global population structure of S. Dublin. Strains of S. Dublin in the UK are genomically distinct from US, Brazilian, and African strains. The geographical partitioning impacts the composition of the core genome as well as the ancillary genome. Antibiotic resistance genes are almost exclusively found in US genomes and are mediated by an IncA/C2 plasmid. Phage content and the S. Dublin virulence plasmid were strongly conserved in the serotype. Comparison of S. Dublin to a closely related serotype, S. enterica serotype Enteritidis, revealed that S. Dublin contains 82 serotype specific genes that are not found in S. Enteritidis. Said genes encode metabolic functions involved in the uptake and catabolism of carbohydrates and virulence genes associated with type VI secretion systems and fimbria assembly respectively.}, } @article {pmid31319017, year = {2019}, author = {Passarelli-Araujo, H and Palmeiro, JK and Moharana, KC and Pedrosa-Silva, F and Dalla-Costa, LM and Venancio, TM}, title = {Genomic analysis unveils important aspects of population structure, virulence, and antimicrobial resistance in Klebsiella aerogenes.}, journal = {The FEBS journal}, volume = {286}, number = {19}, pages = {3797-3810}, doi = {10.1111/febs.15005}, pmid = {31319017}, issn = {1742-4658}, support = {//Fundação Carlos Chagas Filho de Amparo à Pesquisa do Estado do Rio de Janeiro/International ; //Conselho Nacional de Desenvolvimento Científico e Tecnológico/International ; //Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/International ; //UENF/International ; }, mesh = {Anti-Bacterial Agents/*pharmacology ; Bacteriophages/isolation & purification ; Drug Resistance, Bacterial/*genetics ; Enterobacter aerogenes/drug effects/*genetics/*pathogenicity ; *Genome, Bacterial ; Plasmids ; Virulence/*genetics ; }, abstract = {Klebsiella aerogenes is an important pathogen in healthcare-associated infections. Nevertheless, in comparison to other clinically important pathogens, K. aerogenes population structure, genetic diversity, and pathogenicity remain poorly understood. Here, we elucidate K. aerogenes clonal complexes (CCs) and genomic features associated with resistance and virulence. We present a detailed description of the population structure of K. aerogenes based on 97 publicly available genomes by using both multilocus sequence typing and single-nucleotide polymorphisms extracted from the core genome. We also assessed virulence and resistance profiles using Virulence Finder Database and Comprehensive Antibiotic Resistance Database, respectively. We show that K. aerogenes has an open pangenome and a large effective population size, which account for its high genomic diversity and support that negative selection prevents fixation of most deleterious alleles. The population is structured in at least 10 CCs, including two novel ones identified here, CC9 and CC10. The repertoires of resistance genes comprise a high number of antibiotic efflux proteins as well as narrow- and extended-spectrum β-lactamases. Regarding the population structure, we identified two clusters based on virulence profiles because of the presence of the toxin-encoding clb operon and the siderophore production genes, irp and ybt. Notably, CC3 comprises the majority of K. aerogenes isolates associated with hospital outbreaks, emphasizing the importance of constant monitoring of this pathogen. Collectively, our results may provide a foundation for the development of new therapeutic and surveillance strategies worldwide.}, } @article {pmid31316488, year = {2019}, author = {Chen, SL}, title = {Genomic Insights Into the Distribution and Evolution of Group B Streptococcus.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1447}, pmid = {31316488}, issn = {1664-302X}, abstract = {Streptococcus agalactiae, also known as Group B Streptococcus (GBS), is a bacteria with truly protean biology. It infects a variety of hosts, among which the most commonly studied are humans, cattle, and fish. GBS holds a singular position in the history of bacterial genomics, as it was the substrate used to describe one of the first major conceptual advances of comparative genomics, the idea of the pan-genome. In this review, I describe a brief history of GBS and the major contributions of genomics to understanding its genome plasticity and evolution as well as its molecular epidemiology, focusing on the three hosts mentioned above. I also discuss one of the major recent paradigm shifts in our understanding of GBS evolution and disease burden: foodborne GBS can cause invasive infections in humans.}, } @article {pmid31316072, year = {2019}, author = {Xia, Q and Pan, L and Zhang, R and Ni, X and Wang, Y and Dong, X and Gao, Y and Zhang, Z and Kui, L and Li, Y and Wang, W and Yang, H and Chen, C and Miao, J and Chen, W and Dong, Y}, title = {The genome assembly of asparagus bean, Vigna unguiculata ssp. sesquipedialis.}, journal = {Scientific data}, volume = {6}, number = {1}, pages = {124}, doi = {10.1038/s41597-019-0130-6}, pmid = {31316072}, issn = {2052-4463}, support = {NO.31501369//National Natural Science Foundation of China (National Science Foundation of China)/International ; }, mesh = {Chromosome Mapping ; DNA Transposable Elements ; Genetic Linkage ; *Genome, Plant ; Genomics ; Molecular Sequence Annotation ; Vigna/*genetics ; Whole Genome Sequencing ; }, abstract = {Asparagus bean (Vigna. unguiculata ssp. sesquipedialis), known for its very long and tender green pods, is an important vegetable crop broadly grown in the developing Asian countries. In this study, we reported a 632.8 Mb assembly (549.81 Mb non-N size) of asparagus bean based on the whole genome shotgun sequencing strategy. We also generated a linkage map for asparagus bean, which helped anchor 94.42% of the scaffolds into 11 pseudo-chromosomes. A total of 42,609 protein-coding genes and 3,579 non-protein-coding genes were predicted from the assembly. Taken together, these genomic resources of asparagus bean will help develop a pan-genome of V. unguiculata and facilitate the investigation of economically valuable traits in this species, so that the cultivation of this plant would help combat the protein and energy malnutrition in the developing world.}, } @article {pmid31310202, year = {2019}, author = {Yahara, K and Lehours, P and Vale, FF}, title = {Analysis of genetic recombination and the pan-genome of a highly recombinogenic bacteriophage species.}, journal = {Microbial genomics}, volume = {5}, number = {8}, pages = {}, pmid = {31310202}, issn = {2057-5858}, mesh = {Bacteriophages/genetics ; Genetic Variation/genetics ; Genome, Bacterial/genetics ; Genome, Viral/genetics ; Helicobacter pylori/*genetics/virology ; Phylogeny ; Prophages/*genetics ; Recombination, Genetic/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {Bacteriophages are the most prevalent biological entities impacting on the ecosystem and are characterized by their extensive diversity. However, there are two aspects of phages that have remained largely unexplored: genetic flux by recombination between phage populations and characterization of specific phages in terms of the pan-genome. Here, we examined the recombination and pan-genome in Helicobacter pylori prophages at both the genome and gene level. In the genome-level analysis, we applied, for the first time, chromosome painting and fineSTRUCTURE algorithms to a phage species, and showed novel trends in inter-population genetic flux. Notably, hpEastAsia is a phage population that imported a higher proportion of DNA fragments from other phages, whereas the hpSWEurope phages showed weaker signatures of inter-population recombination, suggesting genetic isolation. The gene-level analysis showed that, after parameter tuning of the prokaryote pan-genome analysis program, H. pylori phages have a pan-genome consisting of 75 genes and a soft-core genome of 10 genes, which includes genes involved in the lytic and lysogenic life cycles. Quantitative analysis of recombination events of the soft-core genes showed no substantial variation in the intensity of recombination across the genes, but rather equally frequent recombination among housekeeping genes that were previously reported to be less prone to recombination. The signature of frequent recombination appears to reflect the host-phage evolutionary arms race, either by contributing to escape from bacterial immunity or by protecting the host by producing defective phages.}, } @article {pmid31302711, year = {2019}, author = {Paterson, ML and Ranasinghe, D and Blom, J and Dover, LG and Sutcliffe, IC and Lopes, B and Sangal, V}, title = {Genomic analysis of a novel Rhodococcus (Prescottella) equi isolate from a bovine host.}, journal = {Archives of microbiology}, volume = {201}, number = {9}, pages = {1317-1321}, pmid = {31302711}, issn = {1432-072X}, mesh = {Actinomycetales Infections/microbiology/*veterinary ; Amino Acid Sequence/genetics ; Animals ; Bacterial Proteins/genetics ; Cattle ; Genome, Bacterial/*genetics ; Genomic Islands/genetics ; Genomics ; Horses ; Humans ; Multigene Family/genetics ; Plasmids/genetics ; Pneumonia, Bacterial/microbiology/*veterinary ; Rhodococcus equi/*genetics/isolation & purification ; Vesicular Transport Proteins/genetics ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Rhodococcus (Prescottella) equi causes pneumonia-like infections in foals with high mortality rates and can also infect a number of other animals. R. equi is also emerging as an opportunistic human pathogen. In this study, we have sequenced the genome of a novel R. equi isolate, B0269, isolated from the faeces of a bovine host. Comparative genomic analyses with seven other published R. equi genomes, including those from equine or human sources, revealed a pangenome comprising of 6876 genes with 4141 genes in the core genome. Two hundred and 75 genes were specific to the bovine isolate, mostly encoding hypothetical proteins of unknown function. However, these genes include four copies of terA and five copies of terD genes that may be involved in responding to chemical stress. Virulence characteristics in R. equi are associated with the presence of large plasmids carrying a pathogenicity island, including genes from the vap multigene family. A BLAST search of the protein sequences from known virulence-associated plasmids (pVAPA, pVAPB and pVAPN) revealed a similar plasmid backbone on two contigs in bovine isolate B0269; however, no homologues of the main virulence-associated genes, vapA, vapB or vapN, were identified. In summary, this study confirms that R. equi genomes are highly conserved and reports the presence of an apparently novel plasmid in the bovine isolate B0269 that needs further characterisation to understand its potential involvement in virulence properties.}, } @article {pmid31300285, year = {2019}, author = {Kingstad-Bakke, BA and Chandrasekar, SS and Phanse, Y and Ross, KA and Hatta, M and Suresh, M and Kawaoka, Y and Osorio, JE and Narasimhan, B and Talaat, AM}, title = {Effective mosaic-based nanovaccines against avian influenza in poultry.}, journal = {Vaccine}, volume = {37}, number = {35}, pages = {5051-5058}, doi = {10.1016/j.vaccine.2019.06.077}, pmid = {31300285}, issn = {1873-2518}, mesh = {Animals ; Antibodies, Viral/*blood ; Chickens/immunology ; Hemagglutinin Glycoproteins, Influenza Virus/chemistry/immunology ; Immunity, Cellular ; Immunity, Humoral ; Influenza A Virus, H1N1 Subtype ; Influenza A Virus, H5N1 Subtype ; Influenza A Virus, H5N2 Subtype ; Influenza Vaccines/administration & dosage/*immunology ; Influenza in Birds/*prevention & control ; Nanoparticles/*administration & dosage/chemistry ; Vaccination/*veterinary ; }, abstract = {Avian influenza virus (AIV) is an extraordinarily diverse pathogen that causes significant morbidity in domesticated poultry populations and threatens human life with looming pandemic potential. Controlling avian influenza in susceptible populations requires highly effective, economical and broadly reactive vaccines. Several AIV vaccines have proven insufficient despite their wide use, and better technologies are needed to improve their immunogenicity and broaden effectiveness. Previously, we developed a "mosaic" H5 subtype hemagglutinin (HA) AIV vaccine and demonstrated its broad protection against diverse highly pathogenic H5N1 and seasonal H1N1 virus strains in mouse and non-human primate models. There is a significant interest in developing effective and safe vaccines against AIV that cannot contribute to the emergence of new strains of the virus once circulating in poultry. Here, we report on the development of an H5 mosaic (H5M) vaccine antigen formulated with polyanhydride nanoparticles (PAN) that provide sustained release of encapsulated antigens. H5M vaccine constructs were immunogenic whether delivered by the modified virus Ankara (MVA) strain or encapsulated within PAN. Both humoral and cellular immune responses were generated in both specific-pathogen free (SPF) and commercial chicks. Importantly, chicks vaccinated by H5M constructs were protected in terms of viral shedding from divergent challenge with a low pathogenicity avian influenza (LPAI) strain at 8 weeks post-vaccination. In addition, protective levels of humoral immunity were generated against highly pathogenic avian influenza (HPAI) of the similar H5N1 and genetically dissimilar H5N2 viruses. Overall, the developed platform technologies (MVA vector and PAN encapsulation) were safe and provided high levels of sustained protection against AIV in chickens. Such approaches could be used to design more efficacious vaccines against other important poultry infections.}, } @article {pmid31295964, year = {2019}, author = {McCarthy, CGP and Fitzpatrick, DA}, title = {Pangloss: A Tool for Pan-Genome Analysis of Microbial Eukaryotes.}, journal = {Genes}, volume = {10}, number = {7}, pages = {}, pmid = {31295964}, issn = {2073-4425}, mesh = {Aspergillus fumigatus/*genetics ; *Genome, Fungal ; Genomics ; *Software ; Yarrowia/*genetics ; }, abstract = {Although the pan-genome concept originated in prokaryote genomics, an increasing number of eukaryote species pan-genomes have also been analysed. However, there is a relative lack of software intended for eukaryote pan-genome analysis compared to that available for prokaryotes. In a previous study, we analysed the pan-genomes of four model fungi with a computational pipeline that constructed pan-genomes using the synteny-dependent Pan-genome Ortholog Clustering Tool (PanOCT) approach. Here, we present a modified and improved version of that pipeline which we have called Pangloss. Pangloss can perform gene prediction for a set of genomes from a given species that the user provides, constructs and optionally refines a species pan-genome from that set using PanOCT, and can perform various functional characterisation and visualisation analyses of species pan-genome data. To demonstrate Pangloss's capabilities, we constructed and analysed a species pan-genome for the oleaginous yeast Yarrowialipolytica and also reconstructed a previously-published species pan-genome for the opportunistic respiratory pathogen Aspergillus fumigatus. Pangloss is implemented in Python, Perl and R and is freely available under an open source GPLv3 licence via GitHub.}, } @article {pmid31293547, year = {2019}, author = {Passera, A and Compant, S and Casati, P and Maturo, MG and Battelli, G and Quaglino, F and Antonielli, L and Salerno, D and Brasca, M and Toffolatti, SL and Mantegazza, F and Delledonne, M and Mitter, B}, title = {Not Just a Pathogen? Description of a Plant-Beneficial Pseudomonas syringae Strain.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1409}, pmid = {31293547}, issn = {1664-302X}, abstract = {Plants develop in a microbe-rich environment and must interact with a plethora of microorganisms, both pathogenic and beneficial. Indeed, such is the case of Pseudomonas, and its model organisms P. fluorescens and P. syringae, a bacterial genus that has received particular attention because of its beneficial effect on plants and its pathogenic strains. The present study aims to compare plant-beneficial and pathogenic strains belonging to the P. syringae species to get new insights into the distinction between the two types of plant-microbe interactions. In assays carried out under greenhouse conditions, P. syringae pv. syringae strain 260-02 was shown to promote plant-growth and to exert biocontrol of P. syringae pv. tomato strain DC3000, against the Botrytis cinerea fungus and the Cymbidium Ringspot Virus. This P. syringae strain also had a distinct volatile emission profile, as well as a different plant-colonization pattern, visualized by confocal microscopy and gfp labeled strains, compared to strain DC3000. Despite the different behavior, the P. syringae strain 260-02 showed great similarity to pathogenic strains at a genomic level. However, genome analyses highlighted a few differences that form the basis for the following hypotheses regarding strain 260-02. P. syringae strain 260-02: (i) possesses non-functional virulence genes, like the mangotoxin-producing operon Mbo; (ii) has different regulation pathways, suggested by the difference in the autoinducer system and the lack of a virulence activator gene; (iii) has genes encoding DNA methylases different from those found in other P. syringae strains, suggested by the presence of horizontal-gene-transfer-obtained methylases that could affect gene expression.}, } @article {pmid31293536, year = {2019}, author = {Fontana, A and Falasconi, I and Molinari, P and Treu, L and Basile, A and Vezzi, A and Campanaro, S and Morelli, L}, title = {Genomic Comparison of Lactobacillus helveticus Strains Highlights Probiotic Potential.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1380}, pmid = {31293536}, issn = {1664-302X}, abstract = {Lactobacillus helveticus belongs to the large group of lactic acid bacteria (LAB), which are the major players in the fermentation of a wide range of foods. LAB are also present in the human gut, which has often been exploited as a reservoir of potential novel probiotic strains, but several parameters need to be assessed before establishing their safety and potential use for human consumption. In the present study, six L. helveticus strains isolated from natural whey cultures were analyzed for their phenotype and genotype in exopolysaccharide (EPS) production, low pH and bile salt tolerance, bile salt hydrolase (BSH) activity, and antibiotic resistance profile. In addition, a comparative genomic investigation was performed between the six newly sequenced strains and the 51 publicly available genomes of L. helveticus to define the pangenome structure. The results indicate that the newly sequenced strain UC1267 and the deposited strain DSM 20075 can be considered good candidates for gut-adapted strains due to their ability to survive in the presence of 0.2% glycocholic acid (GCA) and 1% taurocholic and taurodeoxycholic acid (TDCA). Moreover, these strains had the highest bile salt deconjugation activity among the tested L. helveticus strains. Considering the safety profile, none of these strains presented antibiotic resistance phenotypically and/or at the genome level. The pangenome analysis revealed genes specific to the new isolates, such as enzymes related to folate biosynthesis in strains UC1266 and UC1267 and an integrated phage in strain UC1035. Finally, the presence of maltose-degrading enzymes and multiple copies of 6-phospho-β-glucosidase genes in our strains indicates the capability to metabolize sugars other than lactose, which is related solely to dairy niches.}, } @article {pmid31290097, year = {2020}, author = {Tian, X and Li, R and Fu, W and Li, Y and Wang, X and Li, M and Du, D and Tang, Q and Cai, Y and Long, Y and Zhao, Y and Li, M and Jiang, Y}, title = {Building a sequence map of the pig pan-genome from multiple de novo assemblies and Hi-C data.}, journal = {Science China. Life sciences}, volume = {63}, number = {5}, pages = {750-763}, doi = {10.1007/s11427-019-9551-7}, pmid = {31290097}, issn = {1869-1889}, mesh = {Animals ; Base Sequence ; Chromatin/*genetics ; Chromosome Mapping ; Female ; Genome/*genetics ; High-Throughput Nucleotide Sequencing ; Liver ; Mutation ; Sequence Alignment ; *Sequence Analysis, DNA ; Swine/*genetics ; }, abstract = {Pigs were domesticated independently in the Near East and China, indicating that a single reference genome from one individual is unable to represent the full spectrum of divergent sequences in pigs worldwide. Therefore, 12 de novo pig assemblies from Eurasia were compared in this study to identify the missing sequences from the reference genome. As a result, 72.5 Mb of non-redundant sequences (∼3% of the genome) were found to be absent from the reference genome (Sscrofa11.1) and were defined as pan-sequences. Of the pan-sequences, 9.0 Mb were dominant in Chinese pigs, in contrast with their low frequency in European pigs. One sequence dominant in Chinese pigs contained the complete genic region of the tazarotene-induced gene 3 (TIG3) gene which is involved in fatty acid metabolism. Using flanking sequences and Hi-C based methods, 27.7% of the sequences could be anchored to the reference genome. The supplementation of these sequences could contribute to the accurate interpretation of the 3D chromatin structure. A web-based pan-genome database was further provided to serve as a primary resource for exploration of genetic diversity and promote pig breeding and biomedical research.}, } @article {pmid31284652, year = {2019}, author = {Piligrimova, EG and Kazantseva, OA and Nikulin, NA and Shadrin, AM}, title = {Bacillus Phage vB_BtS_B83 Previously Designated as a Plasmid May Represent a New Siphoviridae Genus.}, journal = {Viruses}, volume = {11}, number = {7}, pages = {}, pmid = {31284652}, issn = {1999-4915}, mesh = {Bacillus Phages/*classification/*genetics/isolation & purification/ultrastructure ; Bacillus thuringiensis/virology ; Base Sequence ; DNA, Viral/genetics ; Genes, Viral/genetics ; Genome, Viral ; Genomics ; Host Specificity ; Microscopy, Electron, Transmission ; *Phylogeny ; *Plasmids ; Prophages/genetics ; Sequence Analysis, DNA ; Siphoviridae/*classification/*genetics/ultrastructure ; }, abstract = {The Bacillus cereus group of bacteria includes, inter alia, the species known to be associated with human diseases and food poisoning. Here, we describe the Bacillus phage vB_BtS_B83 (abbreviated as B83) infecting the species of this group. Transmission electron microscopy (TEM) micrographs indicate that B83 belongs to the Siphoviridae family. B83 is a temperate phage using an arbitrium system for the regulation of the lysis-lysogeny switch, and is probably capable of forming a circular plasmid prophage. Comparative analysis shows that it has been previously sequenced, but was mistaken for a plasmid. B83 shares common genome organization and >46% of proteins with other the Bacillus phage, BMBtp14. Phylograms constructed using large terminase subunits and a pan-genome presence-absence matrix show that these phages form a clade distinct from the closest viruses. Based on the above, we propose the creation of a new genus named Bembunaquatrovirus that includes B83 and BMBtp14.}, } @article {pmid31281302, year = {2019}, author = {Machado, KCT and Fortuin, S and Tomazella, GG and Fonseca, AF and Warren, RM and Wiker, HG and de Souza, SJ and de Souza, GA}, title = {On the Impact of the Pangenome and Annotation Discrepancies While Building Protein Sequence Databases for Bacteria Proteogenomics.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1410}, pmid = {31281302}, issn = {1664-302X}, abstract = {In proteomics, peptide information within mass spectrometry (MS) data from a specific organism sample is routinely matched against a protein sequence database that best represent such organism. However, if the species/strain in the sample is unknown or genetically poorly characterized, it becomes challenging to determine a database which can represent such sample. Building customized protein sequence databases merging multiple strains for a given species has become a strategy to overcome such restrictions. However, as more genetic information is publicly available and interesting genetic features such as the existence of pan- and core genes within a species are revealed, we questioned how efficient such merging strategies are to report relevant information. To test this assumption, we constructed databases containing conserved and unique sequences for 10 different species. Features that are relevant for probabilistic-based protein identification by proteomics were then monitored. As expected, increase in database complexity correlates with pangenomic complexity. However, Mycobacterium tuberculosis and Bordetella pertussis generated very complex databases even having low pangenomic complexity. We further tested database performance by using MS data from eight clinical strains from M. tuberculosis, and from two published datasets from Staphylococcus aureus. We show that by using an approach where database size is controlled by removing repeated identical tryptic sequences across strains/species, computational time can be reduced drastically as database complexity increases.}, } @article {pmid31279974, year = {2019}, author = {Nielsen, MR and Wollenberg, RD and Westphal, KR and Sondergaard, TE and Wimmer, R and Gardiner, DM and Sørensen, JL}, title = {Heterologous expression of intact biosynthetic gene clusters in Fusarium graminearum.}, journal = {Fungal genetics and biology : FG & B}, volume = {132}, number = {}, pages = {103248}, doi = {10.1016/j.fgb.2019.103248}, pmid = {31279974}, issn = {1096-0937}, mesh = {Biosynthetic Pathways/*genetics ; Fungal Proteins/genetics ; Fusarium/enzymology/*genetics ; *Gene Expression Regulation, Fungal ; Genome, Fungal ; *Multigene Family ; Peptide Synthases/genetics ; Polyketide Synthases/genetics ; Recombination, Genetic ; }, abstract = {Filamentous fungi such as species from the genus Fusarium are capable of producing a wide palette of interesting metabolites relevant to health, agriculture and biotechnology. Secondary metabolites are formed from large synthase/synthetase enzymes often encoded in gene clusters containing additional enzymes cooperating in the metabolite's biosynthesis. The true potential of fungal metabolomes remain untapped as the majority of secondary metabolite gene clusters are silent under standard laboratory growth conditions. One way to achieve expression of biosynthetic pathways is to clone the responsible genes and express them in a well-suited heterologous host, which poses a challenge since Fusarium polyketide synthase and non-ribosomal peptide synthetase gene clusters can be large (e.g. as large as 80 kb) and comprise several genes necessary for product formation. The major challenge associated with heterologous expression of fungal biosynthesis pathways is thus handling and cloning large DNA sequences. In this paper we present the successful workflow for cloning, reconstruction and heterologous production of two previously characterized Fusarium pseudograminearum natural product pathways in Fusarium graminearum. In vivo yeast recombination enabled rapid assembly of the W493 (NRPS32-PKS40) and the Fusarium Cytokinin gene clusters. F. graminearum transformants were obtained through protoplast-mediated and Agrobacterium tumefaciens-mediated transformation. Whole genome sequencing revealed isolation of transformants carrying intact copies the gene clusters was possible. Known Fusarium cytokinin metabolites; fusatin, 8-oxo-fusatin, 8-oxo-isopentenyladenine, fusatinic acid together with cis- and trans-zeatin were detected by liquid chromatography and mass spectrometry, which confirmed gene functionality in F. graminearum. In addition the non-ribosomal lipopeptide products W493 A and B was heterologously produced in similar amounts to that observed in the F. pseudograminearum doner. The Fusarium pan-genome comprises more than 60 uncharacterized putative secondary metabolite gene clusters. We nominate the well-characterized F. graminearum as a heterologous expression platform for Fusarium secondary metabolite gene clusters, and present our experience cloning and introducing gene clusters into this species. We expect the presented methods will inspire future endevours in heterologous production of Fusarium metabolites and potentially aid the production and characterization of novel natural products.}, } @article {pmid31279858, year = {2020}, author = {Matteoli, FP and Passarelli-Araujo, H and Pedrosa-Silva, F and Olivares, FL and Venancio, TM}, title = {Population structure and pangenome analysis of Enterobacter bugandensis uncover the presence of blaCTX-M-55, blaNDM-5 and blaIMI-1, along with sophisticated iron acquisition strategies.}, journal = {Genomics}, volume = {112}, number = {2}, pages = {1182-1191}, doi = {10.1016/j.ygeno.2019.07.003}, pmid = {31279858}, issn = {1089-8646}, abstract = {Enterobacter bugandensis is a recently described species that has been largely associated with nosocomial infections. We report the genome of a non-clinical E. bugandensis strain, which was integrated with publicly available genomes to study the pangenome and general population structure of E. bugandensis. Core- and whole-genome multilocus sequence typing allowed the detection of five E. bugandensis phylogroups (PG-A to E), which contain important antimicrobial resistance and virulence determinants. We uncovered several extended-spectrum β-lactamases, including blaCTX-M-55 and blaNDM-5, present in an IncX replicon type plasmid, described here for the first time in E. bugandensis. Genetic context analysis of blaNDM-5 revealed the resemblance of this plasmid with other IncX plasmids from other bacteria from the same country. Three distinctive siderophore producing operons were found in E. bugandensis: enterobactin (ent), aerobactin (iuc/iut), and salmochelin (iro). Our findings provide novel insights on the lifestyle, physiology, antimicrobial, and virulence profiles of E. bugandensis.}, } @article {pmid31273387, year = {2019}, author = {Kopejtka, K and Lin, Y and Jakubovičová, M and Koblížek, M and Tomasch, J}, title = {Clustered Core- and Pan-Genome Content on Rhodobacteraceae Chromosomes.}, journal = {Genome biology and evolution}, volume = {11}, number = {8}, pages = {2208-2217}, pmid = {31273387}, issn = {1759-6653}, mesh = {Bacterial Proteins/*genetics ; Chromosomes, Bacterial/*genetics ; DNA Replication ; Gene Expression Regulation, Bacterial ; *Genome, Bacterial ; *Phylogeny ; Rhodobacteraceae/*genetics ; }, abstract = {In Bacteria, chromosome replication starts at a single origin of replication and proceeds on both replichores. Due to its asymmetric nature, replication influences chromosome structure and gene organization, mutation rate, and expression. To date, little is known about the distribution of highly conserved genes over the bacterial chromosome. Here, we used a set of 101 fully sequenced Rhodobacteraceae representatives to analyze the relationship between conservation of genes within this family and their distance from the origin of replication. Twenty-two of the analyzed species had core genes clustered significantly closer to the origin of replication with representatives of the genus Celeribacter being the most apparent example. Interestingly, there were also eight species with the opposite organization. In particular, Rhodobaca barguzinensis and Loktanella vestfoldensis showed a significant increase of core genes with distance from the origin of replication. The uneven distribution of low-conserved regions is in particular pronounced for genomes in which the halves of one replichore differ in their conserved gene content. Phage integration and horizontal gene transfer partially explain the scattered nature of Rhodobacteraceae genomes. Our findings lay the foundation for a better understanding of bacterial genome evolution and the role of replication therein.}, } @article {pmid31253105, year = {2019}, author = {Lima, NCB and Tanmoy, AM and Westeel, E and de Almeida, LGP and Rajoharison, A and Islam, M and Endtz, HP and Saha, SK and de Vasconcelos, ATR and Komurian-Pradel, F}, title = {Analysis of isolates from Bangladesh highlights multiple ways to carry resistance genes in Salmonella Typhi.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {530}, pmid = {31253105}, issn = {1471-2164}, support = {23038.010041/2013-13//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; Finance code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 643476//European COMPARE/ ; Allocations de Recherche pour une Thèse au Sud (ARTS) scholarship//Institut de Recherche pour le Développement/ ; //Wellcome Trust/United Kingdom ; }, mesh = {Bangladesh ; Chromosomes, Bacterial/genetics ; Drug Resistance, Bacterial/*genetics ; Genes, Bacterial/*genetics ; Genomic Islands/genetics ; *Genomics ; Genotype ; Humans ; Molecular Sequence Annotation ; Phenotype ; Plasmids/genetics ; Salmonella typhi/drug effects/*genetics/isolation & purification ; }, abstract = {BACKGROUND: Typhoid fever, caused by Salmonella Typhi, follows a fecal-oral transmission route and is a major global public health concern, especially in developing countries like Bangladesh. Increasing emergence of antimicrobial resistance (AMR) is a serious issue; the list of treatments for typhoid fever is ever-decreasing. In addition to IncHI1-type plasmids, Salmonella genomic island (SGI) 11 has been reported to carry AMR genes. Although reports suggest a recent reduction in multidrug resistance (MDR) in the Indian subcontinent, the corresponding genomic changes in the background are unknown.

RESULTS: Here, we assembled and annotated complete closed chromosomes and plasmids for 73 S. Typhi isolates using short-length Illumina reads. S. Typhi had an open pan-genome, and the core genome was smaller than previously reported. Considering AMR genes, we identified five variants of SGI11, including the previously reported reference sequence. Five plasmids were identified, including the new plasmids pK91 and pK43; pK43and pHCM2 were not related to AMR. The pHCM1, pPRJEB21992 and pK91 plasmids carried AMR genes and, along with the SGI11 variants, were responsible for resistance phenotypes. pK91 also contained qnr genes, conferred high ciprofloxacin resistance and was related to the H58-sublineage Bdq, which shows the same phenotype. The presence of plasmids (pHCM1 and pK91) and SGI11 were linked to two H58-lineages, Ia and Bd. Loss of plasmids and integration of resistance genes in genomic islands could contribute to the fitness advantage of lineage Ia isolates.

CONCLUSIONS: Such events may explain why lineage Ia is globally widespread, while the Bd lineage is locally restricted. Further studies are required to understand how these S. Typhi AMR elements spread and generate new variants. Preventive measures such as vaccination programs should also be considered in endemic countries; such initiatives could potentially reduce the spread of AMR.}, } @article {pmid31244798, year = {2019}, author = {Levesque, S and de Melo, AG and Labrie, SJ and Moineau, S}, title = {Mobilome of Brevibacterium aurantiacum Sheds Light on Its Genetic Diversity and Its Adaptation to Smear-Ripened Cheeses.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1270}, pmid = {31244798}, issn = {1664-302X}, abstract = {Brevibacterium aurantiacum is an actinobacterium that confers key organoleptic properties to washed-rind cheeses during the ripening process. Although this industrially relevant species has been gaining an increasing attention in the past years, its genome plasticity is still understudied due to the unavailability of complete genomic sequences. To add insights on the mobilome of this group, we sequenced the complete genomes of five dairy Brevibacterium strains and one non-dairy strain using PacBio RSII. We performed phylogenetic and pan-genome analyses, including comparisons with other publicly available Brevibacterium genomic sequences. Our phylogenetic analysis revealed that these five dairy strains, previously identified as Brevibacterium linens, belong instead to the B. aurantiacum species. A high number of transposases and integrases were observed in the Brevibacterium spp. strains. In addition, we identified 14 and 12 new insertion sequences (IS) in B. aurantiacum and B. linens genomes, respectively. Several stretches of homologous DNA sequences were also found between B. aurantiacum and other cheese rind actinobacteria, suggesting horizontal gene transfer (HGT). A HGT region from an iRon Uptake/Siderophore Transport Island (RUSTI) and an iron uptake composite transposon were found in five B. aurantiacum genomes. These findings suggest that low iron availability in milk is a driving force in the adaptation of this bacterial species to this niche. Moreover, the exchange of iron uptake systems suggests cooperative evolution between cheese rind actinobacteria. We also demonstrated that the integrative and conjugative element BreLI (Brevibacterium Lanthipeptide Island) can excise from B. aurantiacum SMQ-1417 chromosome. Our comparative genomic analysis suggests that mobile genetic elements played an important role into the adaptation of B. aurantiacum to cheese ecosystems.}, } @article {pmid31240253, year = {2019}, author = {Zhang, B and Zhu, W and Diao, S and Wu, X and Lu, J and Ding, C and Su, X}, title = {The poplar pangenome provides insights into the evolutionary history of the genus.}, journal = {Communications biology}, volume = {2}, number = {}, pages = {215}, pmid = {31240253}, issn = {2399-3642}, mesh = {*Evolution, Molecular ; Gene Ontology ; *Genome, Plant ; Phylogeny ; *Polymorphism, Single Nucleotide ; Populus/classification/*genetics ; }, abstract = {The genus Populus comprises a complex amalgam of ancient and modern species that has become a prime model for evolutionary and taxonomic studies. Here we sequenced the genomes of 10 species from five sections of the genus Populus, identified 71 million genomic variations, and observed new correlations between the single-nucleotide polymorphism-structural variation (SNP-SV) density and indel-SV density to complement the SNP-indel density correlation reported in mammals. Disease resistance genes (R genes) with heterozygous loss-of-function (LOF) were significantly enriched in the 10 species, which increased the diversity of poplar R genes during evolution. Heterozygous LOF mutations in the self-incompatibility genes were closely related to the self-fertilization of poplar, suggestive of genomic control of self-fertilization in dioecious plants. The phylogenetic genome-wide SNPs tree also showed possible ancient hybridization among species in sections Tacamahaca, Aigeiros, and Leucoides. The pangenome resource also provided information for poplar genetics and breeding.}, } @article {pmid31238973, year = {2019}, author = {Zhang, AN and Mao, Y and Wang, Y and Zhang, T}, title = {Mining traits for the enrichment and isolation of not-yet-cultured populations.}, journal = {Microbiome}, volume = {7}, number = {1}, pages = {96}, pmid = {31238973}, issn = {2049-2618}, mesh = {Betaproteobacteria/*genetics/*isolation & purification/metabolism ; Gene Expression Profiling ; *Genome, Bacterial ; Genomics/*methods ; Metabolic Networks and Pathways/genetics ; Phylogeny ; }, abstract = {BACKGROUND: The lack of pure cultures limits our understanding into 99% of bacteria. Proper interpretation of the genetic and the transcriptional datasets can reveal clues for the enrichment and even isolation of the not-yet-cultured populations. Unraveling such information requires a proper mining method.

RESULTS: Here, we present a method to infer the hidden traits for the enrichment of not-yet-cultured populations. We demonstrate this method using Candidatus Accumulibacter. Our method constructs a whole picture of the carbon, electron, and energy flows in the not-yet-cultured populations from the genomic datasets. Then, it decodes the coordination across three flows from the transcriptional datasets. Based on it, our method diagnoses the status of the not-yet-cultured populations and provides strategy to optimize the enrichment systems.

CONCLUSION: Our method could shed light to the exploration into the bacterial dark matter in the environments.}, } @article {pmid31231937, year = {2019}, author = {Québatte, M and Dehio, C}, title = {Bartonella gene transfer agent: Evolution, function, and proposed role in host adaptation.}, journal = {Cellular microbiology}, volume = {21}, number = {11}, pages = {e13068}, pmid = {31231937}, issn = {1462-5822}, support = {31003A_173119/SNSF_/Swiss National Science Foundation/Switzerland ; }, mesh = {Adaptation, Physiological/genetics ; Animals ; Bacterial Proteins/genetics ; Bartonella/*genetics/growth & development/metabolism/*pathogenicity ; Evolution, Molecular ; Gene Transfer, Horizontal/*genetics/physiology ; Host Microbial Interactions ; Mutation ; Recombination, Genetic/genetics ; Replication Origin/genetics ; Type IV Secretion Systems/genetics/metabolism ; }, abstract = {The processes underlying host adaptation by bacterial pathogens remain a fundamental question with relevant clinical, ecological, and evolutionary implications. Zoonotic pathogens of the genus Bartonella constitute an exceptional model to study these aspects. Bartonellae have undergone a spectacular diversification into multiple species resulting from adaptive radiation. Specific adaptations of a complex facultative intracellular lifestyle have enabled the colonisation of distinct mammalian reservoir hosts. This remarkable host adaptability has a multifactorial basis and is thought to be driven by horizontal gene transfer (HGT) and recombination among a limited genus-specific pan genome. Recent functional and evolutionary studies revealed that the conserved Bartonella gene transfer agent (BaGTA) mediates highly efficient HGT and could thus drive this evolution. Here, we review the recent progress made towards understanding BaGTA evolution, function, and its role in the evolution and pathogenesis of Bartonella spp. We notably discuss how BaGTA could have contributed to genome diversification through recombination of beneficial traits that underlie host adaptability. We further address how BaGTA may counter the accumulation of deleterious mutations in clonal populations (Muller's ratchet), which are expected to occur through the recurrent transmission bottlenecks during the complex infection cycle of these pathogens in their mammalian reservoir hosts and arthropod vectors.}, } @article {pmid31231616, year = {2019}, author = {Minnullina, L and Pudova, D and Shagimardanova, E and Shigapova, L and Sharipova, M and Mardanova, A}, title = {Comparative Genome Analysis of Uropathogenic Morganella morganii Strains.}, journal = {Frontiers in cellular and infection microbiology}, volume = {9}, number = {}, pages = {167}, pmid = {31231616}, issn = {2235-2988}, mesh = {Adult ; Bacterial Proteins/biosynthesis/genetics ; Bacterial Toxins/genetics ; Base Composition ; Carcinoma ; Cell Line, Tumor ; Child, Preschool ; Female ; Genes, Bacterial/*genetics ; Genome Size ; *Genome, Bacterial ; Genomic Islands ; *Genomics ; Hemolysin Proteins/genetics ; Humans ; Male ; Middle Aged ; Molecular Sequence Annotation ; Morganella morganii/*genetics/isolation & purification ; Multigene Family ; Prophages/genetics ; Russia ; Salmonella Phages/genetics ; Urease/genetics/metabolism ; Urinary Bladder Neoplasms ; Urinary Tract Infections/*microbiology ; Virulence/genetics ; }, abstract = {Morganella morganii is an opportunistic bacterial pathogen shown to cause a wide range of clinical and community-acquired infections. This study was aimed at sequencing and comparing the genomes of three M. morganii strains isolated from the urine samples of patients with community-acquired urinary tract infections. Draft genome sequencing was conducted using the Illumina HiSeq platform. The genomes of MM 1, MM 4, and MM 190 strains have a size of 3.82-3.97 Mb and a GC content of 50.9-51%. Protein-coding sequences (CDS) represent 96.1% of the genomes, RNAs are encoded by 2.7% of genes and pseudogenes account for 1.2% of the genomes. The pan-genome containes 4,038 CDS, of which 3,279 represent core genes. Six to ten prophages and 21-33 genomic islands were identified in the genomes of MM 1, MM 4, and MM 190. More than 30 genes encode capsular biosynthesis proteins, an average of 60 genes encode motility and chemotaxis proteins, and about 70 genes are associated with fimbrial biogenesis and adhesion. We determined that all strains contained urease gene cluster ureABCEFGD and had a urease activity. Both MM 4 and MM 190 strains are capable of hemolysis and their activity correlates well with a cytotoxicity level on T-24 bladder carcinoma cells. These activities were associated with expression of RTX toxin gene hlyA, which was introduced into the genomes by a phage similar to Salmonella phage 118970_sal4.}, } @article {pmid31210272, year = {2019}, author = {Blake, VC and Woodhouse, MR and Lazo, GR and Odell, SG and Wight, CP and Tinker, NA and Wang, Y and Gu, YQ and Birkett, CL and Jannink, JL and Matthews, DE and Hane, DL and Michel, SL and Yao, E and Sen, TZ}, title = {GrainGenes: centralized small grain resources and digital platform for geneticists and breeders.}, journal = {Database : the journal of biological databases and curation}, volume = {2019}, number = {}, pages = {}, pmid = {31210272}, issn = {1758-0463}, mesh = {*Databases, Genetic ; Edible Grain/*genetics ; *Genome, Plant ; *Plant Breeding ; Poaceae/*genetics ; *Quantitative Trait Loci ; }, abstract = {GrainGenes (https://wheat.pw.usda.gov or https://graingenes.org) is an international centralized repository for curated, peer-reviewed datasets useful to researchers working on wheat, barley, rye and oat. GrainGenes manages genomic, genetic, germplasm and phenotypic datasets through a dynamically generated web interface for facilitated data discovery. Since 1992, GrainGenes has served geneticists and breeders in both the public and private sectors on six continents. Recently, several new datasets were curated into the database along with new tools for analysis. The GrainGenes homepage was enhanced by making it more visually intuitive and by adding links to commonly used pages. Several genome assemblies and genomic tracks are displayed through the genome browsers at GrainGenes, including the Triticum aestivum (bread wheat) cv. 'Chinese Spring' IWGSC RefSeq v1.0 genome assembly, the Aegilops tauschii (D genome progenitor) Aet v4.0 genome assembly, the Triticum turgidum ssp. dicoccoides (wild emmer wheat) cv. 'Zavitan' WEWSeq v.1.0 genome assembly, a T. aestivum (bread wheat) pangenome, the Hordeum vulgare (barley) cv. 'Morex' IBSC genome assembly, the Secale cereale (rye) select 'Lo7' assembly, a partial hexaploid Avena sativa (oat) assembly and the Triticum durum cv. 'Svevo' (durum wheat) RefSeq Release 1.0 assembly. New genetic maps and markers were added and can be displayed through CMAP. Quantitative trait loci, genetic maps and genes from the Wheat Gene Catalogue are indexed and linked through the Wheat Information System (WheatIS) portal. Training videos were created to help users query and reach the data they need. GSP (Genome Specific Primers) and PIECE2 (Plant Intron Exon Comparison and Evolution) tools were implemented and are available to use. As more small grains reference sequences become available, GrainGenes will play an increasingly vital role in helping researchers improve crops.}, } @article {pmid31202417, year = {2019}, author = {Chun, BH and Han, DM and Kim, KH and Jeong, SE and Park, D and Jeon, CO}, title = {Genomic and metabolic features of Tetragenococcus halophilus as revealed by pan-genome and transcriptome analyses.}, journal = {Food microbiology}, volume = {83}, number = {}, pages = {36-47}, doi = {10.1016/j.fm.2019.04.009}, pmid = {31202417}, issn = {1095-9998}, mesh = {Biogenic Amines/metabolism ; Enterococcaceae/*genetics/*metabolism ; *Gene Expression Profiling ; *Genome, Bacterial ; Genomics ; *Metabolic Networks and Pathways ; Osmotic Pressure ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Salinity ; }, abstract = {The genomic and metabolic diversity and features of Tetragenococcus halophilus, a moderately halophilic lactic acid bacterium, were investigated by pan-genome, transcriptome, and metabolite analyses. Phylogenetic analyses based on the 16S rRNA gene and genome sequences of 15 T. halophilus strains revealed their phylogenetic distinctness from other Tetragenococcus species. Pan-genome analysis of the T. halophilus strains showed that their carbohydrate metabolic capabilities were diverse and strain dependent. Aside from one histidine decarboxylase gene in one strain, no decarboxylase gene associated with biogenic amine production was identified from the genomes. However, T. halophilus DSM 20339T produced tyramine without a biogenic amine-producing decarboxylase gene, suggesting the presence of an unidentified tyramine-producing gene. Our reconstruction of the metabolic pathways of these strains showed that T. halophilus harbors a facultative lactic acid fermentation pathway to produce l-lactate, ethanol, acetate, and CO2 from various carbohydrates. The transcriptomic analysis of strain DSM 20339T suggested that T. halophilus may produce more acetate via the heterolactic pathway (including d-ribose metabolism) at high salt conditions. Although genes associated with the metabolism of glycine betaine, proline, glutamate, glutamine, choline, and citrulline were identified from the T. halophilus genomes, the transcriptome and metabolite analyses suggested that glycine betaine was the main compatible solute responding to high salt concentration and that citrulline may play an important role in the coping mechanism against high salinity-induced osmotic stresses. Our results will provide a better understanding of the genome and metabolic features of T. halophilus, which has implications for the food fermentation industry.}, } @article {pmid31191477, year = {2019}, author = {Kröber, E and Schäfer, H}, title = {Identification of Proteins and Genes Expressed by Methylophaga thiooxydans During Growth on Dimethylsulfide and Their Presence in Other Members of the Genus.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {1132}, pmid = {31191477}, issn = {1664-302X}, abstract = {Dimethylsulfide is a volatile organic sulfur compound that provides the largest input of biogenic sulfur from the oceans to the atmosphere, and thence back to land, constituting an important link in the global sulfur cycle. Microorganisms degrading DMS affect fluxes of DMS in the environment, but the underlying metabolic pathways are still poorly understood. Methylophaga thiooxydans is a marine methylotrophic bacterium capable of growth on DMS as sole source of carbon and energy. Using proteomics and transcriptomics we identified genes expressed during growth on dimethylsulfide and methanol to refine our knowledge of the metabolic pathways that are involved in DMS and methanol degradation in this strain. Amongst the most highly expressed genes on DMS were the two methanethiol oxidases driving the oxidation of this reactive and toxic intermediate of DMS metabolism. Growth on DMS also increased expression of the enzymes of the tetrahydrofolate linked pathway of formaldehyde oxidation, in addition to the tetrahydromethanopterin linked pathway. Key enzymes of the inorganic sulfur oxidation pathway included flavocytochrome c sulfide dehydrogenase, sulfide quinone oxidoreductase, and persulfide dioxygenases. A sulP permease was also expressed during growth on DMS. Proteomics and transcriptomics also identified a number of highly expressed proteins and gene products whose function is currently not understood. As the identity of some enzymes of organic and inorganic sulfur metabolism previously detected in Methylophaga has not been characterized at the genetic level yet, highly expressed uncharacterized genes provide new targets for further biochemical and genetic analysis. A pan-genome analysis of six available Methylophaga genomes showed that only two of the six investigated strains, M. thiooxydans and M. sulfidovorans have the gene encoding methanethiol oxidase, suggesting that growth on methylated sulfur compounds of M. aminisulfidivorans is likely to involve different enzymes and metabolic intermediates. Hence, the pathways of DMS-utilization and subsequent C1 and sulfur oxidation are not conserved across Methylophaga isolates that degrade methylated sulfur compounds.}, } @article {pmid31188829, year = {2019}, author = {Guyeux, C and Charr, JC and Tran, HTM and Furtado, A and Henry, RJ and Crouzillat, D and Guyot, R and Hamon, P}, title = {Evaluation of chloroplast genome annotation tools and application to analysis of the evolution of coffee species.}, journal = {PloS one}, volume = {14}, number = {6}, pages = {e0216347}, pmid = {31188829}, issn = {1932-6203}, mesh = {Coffee/*genetics ; Evolution, Molecular ; Genes, Plant ; Genome, Chloroplast/*genetics ; Molecular Sequence Annotation/*methods/standards ; *Phylogeny ; Sequence Analysis, DNA ; }, abstract = {Chloroplast sequences are widely used for phylogenetic analysis due to their high degree of conservation in plants. Whole chloroplast genomes can now be readily obtained for plant species using new sequencing methods, giving invaluable data for plant evolution However new annotation methods are required for the efficient analysis of this data to deliver high quality phylogenetic analyses. In this study, the two main tools for chloroplast genome annotation were compared. More consistent detection and annotation of genes were produced with GeSeq when compared to the currently used Dogma. This suggests that the annotation of most of the previously annotated chloroplast genomes should now be updated. GeSeq was applied to species related to coffee, including 16 species of the Coffea and Psilanthus genera to reconstruct the ancestral chloroplast genomes and to evaluate their phylogenetic relationships. Eight genes in the plant chloroplast pan genome (consisting of 92 genes) were always absent in the coffee species analyzed. Notably, the two main cultivated coffee species (i.e. Arabica and Robusta) did not group into the same clade and differ in their pattern of gene evolution. While Arabica coffee (Coffea arabica) belongs to the Coffea genus, Robusta coffee (Coffea canephora) is associated with the Psilanthus genus. A more extensive survey of related species is required to determine if this is a unique attribute of Robusta coffee or a more widespread feature of coffee tree species.}, } @article {pmid31169073, year = {2019}, author = {Hsu, T and Gemmell, MR and Franzosa, EA and Berry, S and Mukhopadhya, I and Hansen, R and Michaud, M and Nielsen, H and Miller, WG and Nielsen, H and Bajaj-Elliott, M and Huttenhower, C and Garrett, WS and Hold, GL}, title = {Comparative genomics and genome biology of Campylobacter showae.}, journal = {Emerging microbes & infections}, volume = {8}, number = {1}, pages = {827-840}, pmid = {31169073}, issn = {2222-1751}, support = {R24 DK110499/DK/NIDDK NIH HHS/United States ; }, mesh = {Bacterial Proteins/genetics ; Campylobacter/classification/*genetics/isolation & purification/pathogenicity ; Campylobacter Infections/*microbiology ; Crohn Disease/microbiology ; Gastroenteritis/microbiology ; *Genome, Bacterial ; Genomics ; Humans ; Phenotype ; Phylogeny ; Virulence ; Virulence Factors/genetics ; }, abstract = {Campylobacter showae a bacterium historically linked to gingivitis and periodontitis, has recently been associated with inflammatory bowel disease and colorectal cancer. Our aim was to generate genome sequences for new clinical C. showae strains and identify functional properties explaining their pathogenic potential. Eight C. showae genomes were assessed, four strains isolated from inflamed gut tissues from paediatric Crohn's disease patients, three strains from colonic adenomas, and one from a gastroenteritis patient stool. Genome assemblies were analyzed alongside the only 3 deposited C. showae genomes. The pangenome from these 11 strains consisted of 4686 unique protein families, and the core genome size was estimated at 1050 ± 15 genes with each new genome contributing an additional 206 ± 16 genes. Functional assays indicated that colonic strains segregated into 2 groups: adherent/invasive vs. non-adherent/non-invasive strains. The former possessed Type IV secretion machinery and S-layer proteins, while the latter contained Cas genes and other CRISPR associated proteins. Comparison of gene profiles with strains in Human Microbiome Project metagenomes showed that gut-derived isolates share genes specific to tongue dorsum and supragingival plaque counterparts. Our findings indicate that C. showae strains are phenotypically and genetically diverse and suggest that secretion systems may play an important role in virulence potential.}, } @article {pmid31164106, year = {2019}, author = {Hemsley, CM and O'Neill, PA and Essex-Lopresti, A and Norville, IH and Atkins, TP and Titball, RW}, title = {Extensive genome analysis of Coxiella burnetii reveals limited evolution within genomic groups.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {441}, pmid = {31164106}, issn = {1471-2164}, support = {//Wellcome Trust/United Kingdom ; DSTLX-1000068994//Defence Science and Technology Laboratory/ ; }, mesh = {Animals ; Cattle ; Coxiella burnetii/classification/*genetics/isolation & purification ; Evolution, Molecular ; *Genome, Bacterial ; Genome-Wide Association Study ; Genomics ; Genotyping Techniques ; Phylogeny ; United Kingdom ; }, abstract = {BACKGROUND: Coxiella burnetii is a zoonotic pathogen that resides in wild and domesticated animals across the globe and causes a febrile illness, Q fever, in humans. An improved understanding of the genetic diversity of C. burnetii is essential for the development of diagnostics, vaccines and therapeutics, but genotyping data is lacking from many parts of the world. Sporadic outbreaks of Q fever have occurred in the United Kingdom, but the local genetic make-up of C. burnetii has not been studied in detail.

RESULTS: Here, we report whole genome data for nine C. burnetii sequences obtained in the UK. All four genomes of C. burnetii from cattle, as well as one sheep sample, belonged to Multi-spacer sequence type (MST) 20, whereas the goat samples were MST33 (three genomes) and MST32 (one genome), two genotypes that have not been described to be present in the UK to date. We established the phylogenetic relationship between the UK genomes and 67 publically available genomes based on single nucleotide polymorphisms (SNPs) in the core genome, which confirmed tight clustering of strains within genomic groups, but also indicated that sub-groups exist within those groups. Variation is mainly achieved through SNPs, many of which are non-synonymous, thereby confirming that evolution of C. burnetii is based on modification of existing genes. Finally, we discovered genomic-group specific genome content, which supports a model of clonal expansion of previously established genotypes, with large scale dissemination of some of these genotypes across continents being observed.

CONCLUSIONS: The genetic make-up of C. burnetii in the UK is similar to the one in neighboring European countries. As a species, C. burnetii has been considered a clonal pathogen with low genetic diversity at the nucleotide level. Here, we present evidence for significant variation at the protein level between isolates of different genomic groups, which mainly affects secreted and membrane-associated proteins. Our results thereby increase our understanding of the global genetic diversity of C. burnetii and provide new insights into the evolution of this emerging zoonotic pathogen.}, } @article {pmid31162871, year = {2019}, author = {León-Sampedro, R and Del Campo, R and Rodriguez-Baños, M and Lanza, VF and Pozuelo, MJ and Francés-Cuesta, C and Tedim, AP and Freitas, AR and Novais, C and Peixe, L and Willems, RJL and Corander, J and González Candelas, F and Baquero, F and Coque, TM}, title = {Phylogenomics of Enterococcus faecalis from wild birds: new insights into host-associated differences in core and accessory genomes of the species.}, journal = {Environmental microbiology}, volume = {21}, number = {8}, pages = {3046-3062}, doi = {10.1111/1462-2920.14702}, pmid = {31162871}, issn = {1462-2920}, support = {//Instituto de Salud Carlos III of Spain/Ministry of Economy and Competitiveness/International ; //European Development Regional Fund 'A way to achieve Europe' (ERDF)/International ; PI15-0512//Spanish R&D National Plan Estatal de I + D + i 2013-2016/International ; JPIAMR2016-AC16/00039//Joint Programming Initiative in Antimicrobial Resistance (JPIAMR)/International ; //Sociedad Española de Enfermedades Infecciosas y Microbiología Clínica (SEIMC)/International ; }, mesh = {Animals ; Animals, Wild ; Birds/*microbiology ; Enterococcus faecalis/*genetics ; Gene Expression Regulation, Bacterial ; Gene Transfer, Horizontal ; Genome, Bacterial ; Host Specificity ; *Phylogeny ; }, abstract = {Wild birds have been suggested to be reservoirs of antimicrobial resistant and/or pathogenic Enterococcus faecalis (Efs) strains, but the scarcity of studies and available sequences limit our understanding of the population structure of the species in these hosts. Here, we analysed the clonal and plasmid diversity of 97 Efs isolates from wild migratory birds. We found a high diversity, with most sequence types (STs) being firstly described here, while others were found in other hosts including some predominant in poultry. We found that pheromone-responsive plasmids predominate in wild bird Efs while 35% of the isolates entirely lack plasmids. Then, to better understand the ecology of the species, the whole genome of fivestrains with known STs (ST82, ST170, ST16 and ST55) were sequenced and compared with all the Efs genomes available in public databases. Using several methods to analyse core and accessory genomes (AccNET, PLACNET, hierBAPS and PANINI), we detected differences in the accessory genome of some lineages (e.g. ST82) demonstrating specific associations with birds. Conversely, the genomes of other Efs lineages exhibited divergence in core and accessory genomes, reflecting different adaptive trajectories in various hosts. This pangenome divergence, horizontal gene transfer events and occasional epidemic peaks could explain the population structure of the species.}, } @article {pmid31149898, year = {2019}, author = {Rossoni, AW and Price, DC and Seger, M and Lyska, D and Lammers, P and Bhattacharya, D and Weber, AP}, title = {The genomes of polyextremophilic cyanidiales contain 1% horizontally transferred genes with diverse adaptive functions.}, journal = {eLife}, volume = {8}, number = {}, pages = {}, pmid = {31149898}, issn = {2050-084X}, support = {EXC 1028//Deutsche Forschungsgemeinschaft/International ; WE 2231/21-1//Deutsche Forschungsgemeinschaft/International ; }, mesh = {*Adaptation, Biological ; Algal Proteins/genetics ; DNA, Algal/genetics ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; Rhodophyta/*genetics ; }, abstract = {The role and extent of horizontal gene transfer (HGT) in eukaryotes are hotly disputed topics that impact our understanding of the origin of metabolic processes and the role of organelles in cellular evolution. We addressed this issue by analyzing 10 novel Cyanidiales genomes and determined that 1% of their gene inventory is HGT-derived. Numerous HGT candidates share a close phylogenetic relationship with prokaryotes that live in similar habitats as the Cyanidiales and encode functions related to polyextremophily. HGT candidates differ from native genes in GC-content, number of splice sites, and gene expression. HGT candidates are more prone to loss, which may explain the absence of a eukaryotic pan-genome. Therefore, the lack of a pan-genome and cumulative effects fail to provide substantive arguments against our hypothesis of recurring HGT followed by differential loss in eukaryotes. The maintenance of 1% HGTs, even under selection for genome reduction, underlines the importance of non-endosymbiosis related foreign gene acquisition.}, } @article {pmid31134015, year = {2019}, author = {Singh, PK and Mahato, AK and Jain, P and Rathour, R and Sharma, V and Sharma, TR}, title = {Comparative Genomics Reveals the High Copy Number Variation of a Retro Transposon in Different Magnaporthe Isolates.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {966}, pmid = {31134015}, issn = {1664-302X}, abstract = {Magnaporthe oryzae is one of the fungal pathogens of rice which results in heavy yield losses worldwide. Understanding the genomic structure of M. oryzae is essential for appropriate deployment of the blast resistance in rice crop improvement programs. In this study we sequenced two M. oryzae isolates, RML-29 (avirulent) and RP-2421 (highly virulent) and performed comparative study along with three publically available genomes of 70-15, P131, and Y34. We identified several candidate effectors (>600) and isolate specific sequences from RML-29 and RP-2421, while a core set of 10013 single copy orthologs were found among the isolates. Pan-genome analysis showed extensive presence and absence variations (PAVs). We identified isolate-specific genes across 12 isolates using the pan-genome information. Repeat analysis was separately performed for each of the 15 isolates. This analysis revealed ∼25 times higher copy number of short interspersed nuclear elements (SINE) in virulent than avirulent isolate. We conclude that the extensive PAVs and occurrence of SINE throughout the genome could be one of the major mechanisms by which pathogenic variability is emerging in M. oryzae isolates. The knowledge gained in this comparative genome study can provide understandings about the fungal genome variations in different hosts and environmental conditions, and it will provide resources to effectively manage this important disease of rice.}, } @article {pmid31131017, year = {2019}, author = {Norri, T and Cazaux, B and Kosolobov, D and Mäkinen, V}, title = {Linear time minimum segmentation enables scalable founder reconstruction.}, journal = {Algorithms for molecular biology : AMB}, volume = {14}, number = {}, pages = {12}, pmid = {31131017}, issn = {1748-7188}, abstract = {Background: We study a preprocessing routine relevant in pan-genomic analyses: consider a set of aligned haplotype sequences of complete human chromosomes. Due to the enormous size of such data, one would like to represent this input set with a few founder sequences that retain as well as possible the contiguities of the original sequences. Such a smaller set gives a scalable way to exploit pan-genomic information in further analyses (e.g. read alignment and variant calling). Optimizing the founder set is an NP-hard problem, but there is a segmentation formulation that can be solved in polynomial time, defined as follows. Given a threshold L and a set R = { R 1 , … , R m } of m strings (haplotype sequences), each having length n, the minimum segmentation problem for founder reconstruction is to partition [1, n] into set P of disjoint segments such that each segment [ a , b ] ∈ P has length at least L and the number d (a , b) = | { R i [ a , b ] : 1 ≤ i ≤ m } | of distinct substrings at segment [a, b] is minimized over [ a , b ] ∈ P . The distinct substrings in the segments represent founder blocks that can be concatenated to form max { d (a , b) : [ a , b ] ∈ P } founder sequences representing the original R such that crossovers happen only at segment boundaries.

Results: We give an O(mn) time (i.e. linear time in the input size) algorithm to solve the minimum segmentation problem for founder reconstruction, improving over an earlier O (m n 2) .

Conclusions: Our improvement enables to apply the formulation on an input of thousands of complete human chromosomes. We implemented the new algorithm and give experimental evidence on its practicality. The implementation is available in https://github.com/tsnorri/founder-sequences.}, } @article {pmid31126314, year = {2019}, author = {Yang, X and Lee, WP and Ye, K and Lee, C}, title = {One reference genome is not enough.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {104}, pmid = {31126314}, issn = {1474-760X}, support = {U41HG007497/NH/NIH HHS/United States ; }, mesh = {*Genome, Human ; Genomic Structural Variation ; Genomics/standards ; Humans ; Reference Standards ; }, abstract = {A recent study on human structural variation indicates insufficiencies and errors in the human reference genome, GRCh38, and argues for the construction of a human pan-genome.}, } @article {pmid31122208, year = {2019}, author = {Feyereisen, M and Mahony, J and Kelleher, P and Roberts, RJ and O'Sullivan, T and Geertman, JA and van Sinderen, D}, title = {Comparative genome analysis of the Lactobacillus brevis species.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {416}, pmid = {31122208}, issn = {1471-2164}, support = {EPSPG/2015/7//Irish Research Council/ ; 450 13/IA/1953//Science Foundation Ireland/Ireland ; 15/SIRG/3430//Science Foundation Ireland/Ireland ; }, mesh = {Beer/microbiology ; Evolution, Molecular ; Genes, Bacterial ; *Genome, Bacterial ; Lactobacillus brevis/classification/*genetics/isolation & purification ; Phylogeny ; Plasmids ; }, abstract = {BACKGROUND: Lactobacillus brevis is a member of the lactic acid bacteria (LAB), and strains of L. brevis have been isolated from silage, as well as from fermented cabbage and other fermented foods. However, this bacterium is also commonly associated with bacterial spoilage of beer.

RESULTS: In the current study, complete genome sequences of six isolated L. brevis strains were determined. Five of these L. brevis strains were isolated from beer (three isolates) or the brewing environment (two isolates), and were characterized as beer-spoilers or non-beer spoilers, respectively, while the sixth isolate had previously been isolated from silage. The genomic features of 19 L. brevis strains, encompassing the six L. brevis strains described in this study and thirteen L. brevis strains for which complete genome sequences were available in public databases, were analyzed with particular attention to evolutionary aspects and adaptation to beer.

CONCLUSIONS: Comparative genomic analysis highlighted evolution of the taxon allowing niche colonization, notably adaptation to the beer environment, with approximately 50 chromosomal genes acquired by L. brevis beer-spoiler strains representing approximately 2% of their total chromosomal genetic content. These genes primarily encode proteins that are putatively involved in oxidation-reduction reactions, transcription regulation or membrane transport, functions that may be crucial to survive the harsh conditions associated with beer. The study emphasized the role of plasmids in beer spoilage with a number of unique genes identified among L. brevis beer-spoiler strains.}, } @article {pmid31120895, year = {2019}, author = {Vincent, AT and Schiettekatte, O and Goarant, C and Neela, VK and Bernet, E and Thibeaux, R and Ismail, N and Mohd Khalid, MKN and Amran, F and Masuzawa, T and Nakao, R and Amara Korba, A and Bourhy, P and Veyrier, FJ and Picardeau, M}, title = {Revisiting the taxonomy and evolution of pathogenicity of the genus Leptospira through the prism of genomics.}, journal = {PLoS neglected tropical diseases}, volume = {13}, number = {5}, pages = {e0007270}, pmid = {31120895}, issn = {1935-2735}, mesh = {Animals ; Asia ; *Evolution, Molecular ; *Genome, Bacterial ; Genomics ; Humans ; Leptospira/*classification/genetics/isolation & purification/*pathogenicity ; Leptospirosis/*microbiology ; Phylogeny ; Virulence ; Zoonoses/microbiology ; }, abstract = {The causative agents of leptospirosis are responsible for an emerging zoonotic disease worldwide. One of the major routes of transmission for leptospirosis is the natural environment contaminated with the urine of a wide range of reservoir animals. Soils and surface waters also host a high diversity of non-pathogenic Leptospira and species for which the virulence status is not clearly established. The genus Leptospira is currently divided into 35 species classified into three phylogenetic clusters, which supposedly correlate with the virulence of the bacteria. In this study, a total of 90 Leptospira strains isolated from different environments worldwide including Japan, Malaysia, New Caledonia, Algeria, mainland France, and the island of Mayotte in the Indian Ocean were sequenced. A comparison of average nucleotide identity (ANI) values of genomes of the 90 isolates and representative genomes of known species revealed 30 new Leptospira species. These data also supported the existence of two clades and 4 subclades. To avoid classification that strongly implies assumption on the virulence status of the lineages, we called them P1, P2, S1, S2. One of these subclades has not yet been described and is composed of Leptospira idonii and 4 novel species that are phylogenetically related to the saprophytes. We then investigated genome diversity and evolutionary relationships among members of the genus Leptospira by studying the pangenome and core gene sets. Our data enable the identification of genome features, genes and domains that are important for each subclade, thereby laying the foundation for refining the classification of this complex bacterial genus. We also shed light on atypical genomic features of a group of species that includes the species often associated with human infection, suggesting a specific and ongoing evolution of this group of species that will require more attention. In conclusion, we have uncovered a massive species diversity and revealed a novel subclade in environmental samples collected worldwide and we have redefined the classification of species in the genus. The implication of several new potentially infectious Leptospira species for human and animal health remains to be determined but our data also provide new insights into the emergence of virulence in the pathogenic species.}, } @article {pmid31114559, year = {2019}, author = {González, V and Santamaría, RI and Bustos, P and Pérez-Carrascal, OM and Vinuesa, P and Juárez, S and Martínez-Flores, I and Cevallos, MÁ and Brom, S and Martínez-Romero, E and Romero, D}, title = {Phylogenomic Rhizobium Species Are Structured by a Continuum of Diversity and Genomic Clusters.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {910}, pmid = {31114559}, issn = {1664-302X}, abstract = {The bacterial genus Rhizobium comprises diverse symbiotic nitrogen-fixing species associated with the roots of plants in the Leguminosae family. Multiple genomic clusters defined by whole genome comparisons occur within Rhizobium, but their equivalence to species is controversial. In this study we investigated such genomic clusters to ascertain their significance in a species phylogeny context. Phylogenomic inferences based on complete sets of ribosomal proteins and stringent core genome markers revealed the main lineages of Rhizobium. The clades corresponding to R. etli and R. leguminosarum species show several genomic clusters with average genomic nucleotide identities (ANI > 95%), and a continuum of divergent strains, respectively. They were found to be inversely correlated with the genetic distance estimated from concatenated ribosomal proteins. We uncovered evidence of a Rhizobium pangenome that was greatly expanded, both in its chromosomes and plasmids. Despite the variability of extra-chromosomal elements, our genomic comparisons revealed only a few chromid and plasmid families. The presence/absence profile of genes in the complete Rhizobium genomes agreed with the phylogenomic pattern of species divergence. Symbiotic genes were distributed according to the principal phylogenomic Rhizobium clades but did not resolve genome clusters within the clades. We distinguished some types of symbiotic plasmids within Rhizobium that displayed different rates of synonymous nucleotide substitutions in comparison to chromosomal genes. Symbiotic plasmids may have been repeatedly transferred horizontally between strains and species, in the process displacing and substituting pre-existing symbiotic plasmids. In summary, the results indicate that Rhizobium genomic clusters, as defined by whole genomic identities, might be part of a continuous process of evolutionary divergence that includes the core and the extrachromosomal elements leading to species formation.}, } @article {pmid31112551, year = {2019}, author = {Pucker, B and Holtgräwe, D and Stadermann, KB and Frey, K and Huettel, B and Reinhardt, R and Weisshaar, B}, title = {A chromosome-level sequence assembly reveals the structure of the Arabidopsis thaliana Nd-1 genome and its gene set.}, journal = {PloS one}, volume = {14}, number = {5}, pages = {e0216233}, pmid = {31112551}, issn = {1932-6203}, mesh = {Arabidopsis/genetics ; Chromosome Aberrations ; Chromosomes/genetics ; Genome, Plant/*genetics ; Genotype ; Sequence Analysis, DNA ; }, abstract = {In addition to the BAC-based reference sequence of the accession Columbia-0 from the year 2000, several short read assemblies of THE plant model organism Arabidopsis thaliana were published during the last years. Also, a SMRT-based assembly of Landsberg erecta has been generated that identified translocation and inversion polymorphisms between two genotypes of the species. Here we provide a chromosome-arm level assembly of the A. thaliana accession Niederzenz-1 (AthNd-1_v2c) based on SMRT sequencing data. The best assembly comprises 69 nucleome sequences and displays a contig length of up to 16 Mbp. Compared to an earlier Illumina short read-based NGS assembly (AthNd-1_v1), a 75 fold increase in contiguity was observed for AthNd-1_v2c. To assign contig locations independent from the Col-0 gold standard reference sequence, we used genetic anchoring to generate a de novo assembly. In addition, we assembled the chondrome and plastome sequences. Detailed analyses of AthNd-1_v2c allowed reliable identification of large genomic rearrangements between A. thaliana accessions contributing to differences in the gene sets that distinguish the genotypes. One of the differences detected identified a gene that is lacking from the Col-0 gold standard sequence. This de novo assembly extends the known proportion of the A. thaliana pan-genome.}, } @article {pmid31100356, year = {2019}, author = {Galata, V and Laczny, CC and Backes, C and Hemmrich-Stanisak, G and Schmolke, S and Franke, A and Meese, E and Herrmann, M and von Müller, L and Plum, A and Müller, R and Stähler, C and Posch, AE and Keller, A}, title = {Integrating Culture-based Antibiotic Resistance Profiles with Whole-genome Sequencing Data for 11,087 Clinical Isolates.}, journal = {Genomics, proteomics & bioinformatics}, volume = {17}, number = {2}, pages = {169-182}, doi = {10.1016/j.gpb.2018.11.002}, pmid = {31100356}, issn = {2210-3244}, mesh = {Acinetobacter baumannii/genetics/isolation & purification ; Bacteria/*genetics/*isolation & purification ; Cell Culture Techniques/*methods ; Drug Resistance, Microbial/*genetics ; Escherichia coli/genetics/isolation & purification ; Genome, Bacterial ; Genotype ; Humans ; Internet ; Microbial Sensitivity Tests ; Phenotype ; *Whole Genome Sequencing ; }, abstract = {Emerging antibiotic resistance is a major global health threat. The analysis of nucleic acid sequences linked to susceptibility phenotypes facilitates the study of genetic antibiotic resistance determinants to inform molecular diagnostics and drug development. We collected genetic data (11,087 newly-sequenced whole genomes) and culture-based resistance profiles (10,991 out of the 11,087 isolates comprehensively tested against 22 antibiotics in total) of clinical isolates including 18 main species spanning a time period of 30 years. Species and drug specific resistance patterns were observed including increased resistance rates for Acinetobacter baumannii to carbapenems and for Escherichia coli to fluoroquinolones. Species-level pan-genomes were constructed to reflect the genetic repertoire of the respective species, including conserved essential genes and known resistance factors. Integrating phenotypes and genotypes through species-level pan-genomes allowed to infer gene-drug resistance associations using statistical testing. The isolate collection and the analysis results have been integrated into GEAR-base, a resource available for academic research use free of charge at https://gear-base.com.}, } @article {pmid31086351, year = {2019}, author = {Gao, L and Gonda, I and Sun, H and Ma, Q and Bao, K and Tieman, DM and Burzynski-Chang, EA and Fish, TL and Stromberg, KA and Sacks, GL and Thannhauser, TW and Foolad, MR and Diez, MJ and Blanca, J and Canizares, J and Xu, Y and van der Knaap, E and Huang, S and Klee, HJ and Giovannoni, JJ and Fei, Z}, title = {The tomato pan-genome uncovers new genes and a rare allele regulating fruit flavor.}, journal = {Nature genetics}, volume = {51}, number = {6}, pages = {1044-1051}, doi = {10.1038/s41588-019-0410-2}, pmid = {31086351}, issn = {1546-1718}, mesh = {*Alleles ; Computational Biology/methods ; Domestication ; Fruit/*genetics ; *Genetic Association Studies ; *Genome, Plant ; *Genomics/methods ; Humans ; Lycopersicon esculentum/*genetics ; Open Reading Frames ; Plant Breeding ; Promoter Regions, Genetic ; *Quantitative Trait, Heritable ; Selection, Genetic ; }, abstract = {Modern tomatoes have narrow genetic diversity limiting their improvement potential. We present a tomato pan-genome constructed using genome sequences of 725 phylogenetically and geographically representative accessions, revealing 4,873 genes absent from the reference genome. Presence/absence variation analyses reveal substantial gene loss and intense negative selection of genes and promoters during tomato domestication and improvement. Lost or negatively selected genes are enriched for important traits, especially disease resistance. We identify a rare allele in the TomLoxC promoter selected against during domestication. Quantitative trait locus mapping and analysis of transgenic plants reveal a role for TomLoxC in apocarotenoid production, which contributes to desirable tomato flavor. In orange-stage fruit, accessions harboring both the rare and common TomLoxC alleles (heterozygotes) have higher TomLoxC expression than those homozygous for either and are resurgent in modern tomatoes. The tomato pan-genome adds depth and completeness to the reference genome, and is useful for future biological discovery and breeding.}, } @article {pmid31076745, year = {2019}, author = {Cruz-Morales, P and Orellana, CA and Moutafis, G and Moonen, G and Rincon, G and Nielsen, LK and Marcellin, E}, title = {Revisiting the Evolution and Taxonomy of Clostridia, a Phylogenomic Update.}, journal = {Genome biology and evolution}, volume = {11}, number = {7}, pages = {2035-2044}, pmid = {31076745}, issn = {1759-6653}, mesh = {Clostridium/*genetics ; DNA, Bacterial/*genetics ; Phylogeny ; }, abstract = {Clostridium is a large genus of obligate anaerobes belonging to the Firmicutes phylum of bacteria, most of which have a Gram-positive cell wall structure. The genus includes significant human and animal pathogens, causative of potentially deadly diseases such as tetanus and botulism. Despite their relevance and many studies suggesting that they are not a monophyletic group, the taxonomy of the group has largely been neglected. Currently, species belonging to the genus are placed in the unnatural order defined as Clostridiales, which includes the class Clostridia. Here, we used genomic data from 779 strains to study the taxonomy and evolution of the group. This analysis allowed us to 1) confirm that the group is composed of more than one genus, 2) detect major differences between pathogens classified as a single species within the group of authentic Clostridium spp. (sensu stricto), 3) identify inconsistencies between taxonomy and toxin evolution that reflect on the pervasive misclassification of strains, and 4) identify differential traits within central metabolism of members of what has been defined earlier and confirmed by us as cluster I. Our analysis shows that the current taxonomic classification of Clostridium species hinders the prediction of functions and traits, suggests a new classification for this fascinating class of bacteria, and highlights the importance of phylogenomics for taxonomic studies.}, } @article {pmid31068915, year = {2019}, author = {Park, SC and Lee, K and Kim, YO and Won, S and Chun, J}, title = {Large-Scale Genomics Reveals the Genetic Characteristics of Seven Species and Importance of Phylogenetic Distance for Estimating Pan-Genome Size.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {834}, pmid = {31068915}, issn = {1664-302X}, abstract = {For more than a decade, pan-genome analysis has been applied as an effective method for explaining the genetic contents variation of prokaryotic species. However, genomic characteristics and detailed structures of gene pools have not been fully clarified, because most studies have used a small number of genomes. Here, we constructed pan-genomes of seven species in order to elucidate variations in the genetic contents of >27,000 genomes belonging to Streptococcus pneumoniae, Staphylococcus aureus subsp. aureus, Salmonella enterica subsp. enterica, Escherichia coli and Shigella spp., Mycobacterium tuberculosis complex, Pseudomonas aeruginosa, and Acinetobacter baumannii. This work showed the pan-genomes of all seven species has open property. Additionally, systematic evaluation of the characteristics of their pan-genome revealed that phylogenetic distance provided valuable information for estimating the parameters for pan-genome size among several models including Heaps' law. Our results provide a better understanding of the species and a solution to minimize sampling biases associated with genome-sequencing preferences for pathogenic strains.}, } @article {pmid31053324, year = {2019}, author = {van der Nest, MA and Steenkamp, ET and Roodt, D and Soal, NC and Palmer, M and Chan, WY and Wilken, PM and Duong, TA and Naidoo, K and Santana, QC and Trollip, C and De Vos, L and van Wyk, S and McTaggart, AR and Wingfield, MJ and Wingfield, BD}, title = {Genomic analysis of the aggressive tree pathogen Ceratocystis albifundus.}, journal = {Fungal biology}, volume = {123}, number = {5}, pages = {351-363}, doi = {10.1016/j.funbio.2019.02.002}, pmid = {31053324}, issn = {1878-6146}, mesh = {Africa ; Ascomycota/*genetics ; Computational Biology ; Evolution, Molecular ; Gene Order ; Genetic Variation ; *Genome, Fungal ; Genomics ; High-Throughput Nucleotide Sequencing ; Interspersed Repetitive Sequences ; Plant Diseases/*microbiology ; Synteny ; Trees/*microbiology ; }, abstract = {The overall goal of this study was to determine whether the genome of an important plant pathogen in Africa, Ceratocystis albifundus, is structured into subgenomic compartments, and if so, to establish how these compartments are distributed across the genome. For this purpose, the publicly available genome of C. albifundus was complemented with the genome sequences for four additional isolates using the Illumina HiSeq platform. In addition, a reference genome for one of the individuals was assembled using both PacBio and Illumina HiSeq technologies. Our results showed a high degree of synteny between the five genomes, although several regions lacked detectable long-range synteny. These regions were associated with the presence of accessory genes, lower genetic similarity, variation in read-map depth, as well as transposable elements and genes associated with host-pathogen interactions (e.g. effectors and CAZymes). Such patterns are regarded as hallmarks of accelerated evolution, particularly of accessory subgenomic compartments in fungal pathogens. Our findings thus showed that the genome of C. albifundus is made-up of core and accessory subgenomic compartments, which is an important step towards characterizing its pangenome. This study also highlights the value of comparative genomics for understanding mechanisms that may underly and influence the biology and evolution of pathogens.}, } @article {pmid31046679, year = {2019}, author = {Lorentzen, MP and Campbell-Sills, H and Jorgensen, TS and Nielsen, TK and Coton, M and Coton, E and Hansen, L and Lucas, PM}, title = {Expanding the biodiversity of Oenococcus oeni through comparative genomics of apple cider and kombucha strains.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {330}, pmid = {31046679}, issn = {1471-2164}, support = {643063//H2020 Marie Skłodowska-Curie Actions, ITN MICROWINE/ ; 8960//Villum Fonden (DK), Project AMPHICOP/ ; }, mesh = {*Biodiversity ; *Genome, Bacterial ; *Kombucha Tea ; Malus/*chemistry ; Oenococcus/*classification/*genetics ; Phylogeny ; Whole Genome Sequencing ; *Wine ; }, abstract = {BACKGROUND: Oenococcus oeni is a lactic acid bacteria species adapted to the low pH, ethanol-rich environments of wine and cider fermentation, where it performs the crucial role of malolactic fermentation. It has a small genome and has lost the mutS-mutL DNA mismatch repair genes, making it a hypermutable and highly specialized species. Two main lineages of strains, named groups A and B, have been described to date, as well as other subgroups correlated to different types of wines or regions. A third group "C" has also been hypothesized based on sequence analysis, but it remains controversial. In this study we have elucidated the species population structure by sequencing 14 genomes of new strains isolated from cider and kombucha and performing comparative genomics analyses.

RESULTS: Sequence-based phylogenetic trees confirmed a population structure of 4 clades: The previously identified A and B, a third group "C" consisting of the new cider strains and a small subgroup of wine strains previously attributed to group B, and a fourth group "D" exclusively represented by kombucha strains. A pair of complete genomes from group C and D were compared to the circularized O. oeni PSU-1 strain reference genome and no genomic rearrangements were found. Phylogenetic trees, K-means clustering and pangenome gene clusters evidenced the existence of smaller, specialized subgroups of strains. Using the pangenome, genomic differences in stress resistance and biosynthetic pathways were found to uniquely distinguish the C and D clades.

CONCLUSIONS: The obtained results, including the additional cider and kombucha strains, firmly established the O. oeni population structure. Group C does not appear as fully domesticated as group A to wine, but showed several unique patterns which may be due to ongoing specialization to the cider environment. Group D was shown to be the most divergent member of O. oeni to date, appearing as the closest to a pre-domestication state of the species.}, } @article {pmid31030454, year = {2019}, author = {Wang, J and Xing, J and Lu, J and Sun, Y and Zhao, J and Miao, S and Xiong, Q and Zhang, Y and Zhang, G}, title = {Complete Genome Sequencing of Bacillus velezensis WRN014, and Comparison with Genome Sequences of other Bacillus velezensis Strains.}, journal = {Journal of microbiology and biotechnology}, volume = {29}, number = {5}, pages = {794-808}, doi = {10.4014/jmb.1901.01040}, pmid = {31030454}, issn = {1738-8872}, mesh = {Bacillus/classification/*genetics/isolation & purification ; Base Sequence ; China ; Chromosome Mapping ; DNA, Bacterial/analysis/genetics ; Genes, Bacterial/*genetics ; Genetic Variation ; Genome, Bacterial ; Multigene Family ; Musa/microbiology ; Mutation ; Phylogeny ; Plant Development ; Plant Diseases/microbiology ; Polymorphism, Single Nucleotide ; Secondary Metabolism/genetics ; Sequence Analysis, DNA ; Whole Genome Sequencing/*methods ; }, abstract = {Bacillus velezensis strain WRN014 was isolated from banana fields in Hainan, China. Bacillus velezensis is an important member of the plant growth-promoting rhizobacteria (PGPR) which can enhance plant growth and control soil-borne disease. The complete genome of Bacillus velezensis WRN014 was sequenced by combining Illumina Hiseq 2500 system and Pacific Biosciences SMRT high-throughput sequencing technologies. Then, the genome of Bacillus velezensis WRN014, together with 45 other completed genome sequences of the Bacillus velezensis strains, were comparatively studied. The genome of Bacillus velezensis WRN014 was 4,063,541bp in length and contained 4,062 coding sequences, 9 genomic islands and 13 gene clusters. The results of comparative genomic analysis provide evidence that (i) The 46 Bacillus velezensis strains formed 2 obviously closely related clades in phylogenetic trees. (ii) The pangenome in this study is open and is increasing with the addition of new sequenced genomes. (iii) Analysis of single nucleotide polymorphisms (SNPs) revealed local diversification of the 46 Bacillus velezensis genomes. Surprisingly, SNPs were not evenly distributed throughout the whole genome. (iv) Analysis of gene clusters revealed that rich gene clusters spread over Bacillus velezensis strains and some gene clusters are conserved in different strains. This study reveals that the strain WRN014 and other Bacillus velezensis strains have potential to be used as PGPR and biopesticide.}, } @article {pmid31024592, year = {2019}, author = {Dillon, MM and Almeida, RND and Laflamme, B and Martel, A and Weir, BS and Desveaux, D and Guttman, DS}, title = {Molecular Evolution of Pseudomonas syringae Type III Secreted Effector Proteins.}, journal = {Frontiers in plant science}, volume = {10}, number = {}, pages = {418}, pmid = {31024592}, issn = {1664-462X}, abstract = {Diverse Gram-negative pathogens like Pseudomonas syringae employ type III secreted effector (T3SE) proteins as primary virulence factors that combat host immunity and promote disease. T3SEs can also be recognized by plant hosts and activate an effector triggered immune (ETI) response that shifts the interaction back toward plant immunity. Consequently, T3SEs are pivotal in determining the virulence potential of individual P. syringae strains, and ultimately help to restrict P. syringae pathogens to a subset of potential hosts that are unable to recognize their repertoires of T3SEs. While a number of effector families are known to be present in the P. syringae species complex, one of the most persistent challenges has been documenting the complex variation in T3SE contents across a diverse collection of strains. Using the entire pan-genome of 494 P. syringae strains isolated from more than 100 hosts, we conducted a global analysis of all known and putative T3SEs. We identified a total of 14,613 putative T3SEs, 4,636 of which were unique at the amino acid level, and show that T3SE repertoires of different P. syringae strains vary dramatically, even among strains isolated from the same hosts. We also find substantial diversification within many T3SE families, and in many cases find strong signatures of positive selection. Furthermore, we identify multiple gene gain and loss events for several families, demonstrating an important role of horizontal gene transfer (HGT) in the evolution of P. syringae T3SEs. These analyses provide insight into the evolutionary history of P. syringae T3SEs as they co-evolve with the host immune system, and dramatically expand the database of P. syringae T3SEs alleles.}, } @article {pmid31014247, year = {2019}, author = {Roach, R and Mann, R and Gambley, CG and Chapman, T and Shivas, RG and Rodoni, B}, title = {Genomic sequence analysis reveals diversity of Australian Xanthomonas species associated with bacterial leaf spot of tomato, capsicum and chilli.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {310}, pmid = {31014247}, issn = {1471-2164}, mesh = {*Biodiversity ; Capsicum/*microbiology ; Genome, Bacterial/genetics ; *Genomics ; Lycopersicon esculentum/*microbiology ; Phylogeny ; Plant Diseases/*microbiology ; Plasmids/genetics ; Polymorphism, Single Nucleotide ; Xanthomonas/classification/*genetics/*physiology ; }, abstract = {BACKGROUND: The genetic diversity in Australian populations of Xanthomonas species associated with bacterial leaf spot in tomato, capsicum and chilli were compared to worldwide bacterial populations. The aim of this study was to confirm the identities of these Australian Xanthomonas species and classify them in comparison to overseas isolates. Analysis of whole genome sequence allows for the investigation of bacterial population structure, pathogenicity and gene exchange, resulting in better management strategies and biosecurity.

RESULTS: Phylogenetic analysis of the core genome alignments and SNP data grouped strains in distinct clades. Patterns observed in average nucleotide identity, pan genome structure, effector and carbohydrate active enzyme profiles reflected the whole genome phylogeny and highlight taxonomic issues in X. perforans and X. euvesicatoria. Circular sequences with similarity to previously characterised plasmids were identified, and plasmids of similar sizes were isolated. Potential false positive and false negative plasmid assemblies were discussed. Effector patterns that may influence virulence on host plant species were analysed in pathogenic and non-pathogenic xanthomonads.

CONCLUSIONS: The phylogeny presented here confirmed X. vesicatoria, X. arboricola, X. euvesicatoria and X. perforans and a clade of an uncharacterised Xanthomonas species shown to be genetically distinct from all other strains of this study. The taxonomic status of X. perforans and X. euvesicatoria as one species is discussed in relation to whole genome phylogeny and phenotypic traits. The patterns evident in enzyme and plasmid profiles indicate worldwide exchange of genetic material with the potential to introduce new virulence elements into local bacterial populations.}, } @article {pmid31009331, year = {2019}, author = {Rao, RT and Sivakumar, N and Jayakumar, K}, title = {Analyses of Livestock-Associated Staphylococcus aureus Pan-Genomes Suggest Virulence Is Not Primary Interest in Evolution of Its Genome.}, journal = {Omics : a journal of integrative biology}, volume = {23}, number = {4}, pages = {224-236}, doi = {10.1089/omi.2019.0005}, pmid = {31009331}, issn = {1557-8100}, mesh = {Computational Biology ; Drug Discovery ; Genome, Bacterial/genetics ; Genomics ; Staphylococcus aureus/*genetics/*pathogenicity ; Virulence/genetics ; }, abstract = {Staphylococcus aureus is not only part of normal flora but also an opportunistic pathogen relevant to microbial genomics, public health, and veterinary medicine. In addition to being a well-known human pathogen, S. aureus causes various infections in economically important livestock animals such as cows, sheep, goats, and pigs. There are very few studies that have examined the pan-genome of S. aureus or the host-specific strains' pan-genomes. We report on livestock-associated S. aureus' (LA-SA) pan-genome and suggest that virulence is not the primary interest in evolution of its genome. LA-SA' complete genomes were retrieved from the NCBI and pan-genome was constructed by high-speed Roary pipeline. The pan-genome size was 4637 clusters, whereas 42.46% of the pan-genome was associated with the core genome. We found 1268 genes were associated with the strain-unique genome, and the remaining 1432 cluster with the accessory genome. COG (clusters of orthologous group of proteins) analysis of the core genes revealed 34% of clusters related to metabolism responsible for amino acid and inorganic ion transport (COG categories E and P), followed by carbohydrate metabolism (category G). Virulent gene analysis revealed the core genes responsible for antiphagocytosis and iron uptake. The fluidity of pan-genome was calculated as 0.082 ± 0.025. Importantly, the positive selection analysis suggested a slower rate of evolution among the LA-SA genomes. We call for comparative microbial and pan-genome research between human and LA-SA that can help further understand the evolution of virulence and thus inform future microbial diagnostics and drug discovery.}, } @article {pmid31004458, year = {2019}, author = {Liu, J and Zeng, Q and Wang, M and Cheng, A and Liu, M and Zhu, D and Chen, S and Jia, R and Zhao, XX and Wu, Y and Yang, Q and Zhang, S and Liu, Y and Yu, Y and Zhang, L and Chen, X}, title = {Comparative genome-scale modelling of the pathogenic Flavobacteriaceae species Riemerella anatipestifer in China.}, journal = {Environmental microbiology}, volume = {21}, number = {8}, pages = {2836-2851}, doi = {10.1111/1462-2920.14635}, pmid = {31004458}, issn = {1462-2920}, support = {CARS-42-17//China Agricultural Research System/International ; 2017YFD0500800//National Key Research and Development Program of China/International ; CARS-SVDIP//Sichuan Veterinary Medicine and Drug Innovation Group of China Agricultural Research System/International ; 2016JPT0004//Special Fund for Key Laboratory of Animal Disease and Human Health of Sichuan Province/International ; }, mesh = {Animals ; China ; Genetic Variation ; Genome, Bacterial ; Genomics ; Models, Genetic ; Multilocus Sequence Typing ; Riemerella/*genetics/pathogenicity ; Virulence ; }, abstract = {Riemerella anatipestifer (RA) is a gram-negative bacterium that has a high potential to infect waterfowl. Although more and more genomes of RA have been generated comparaed to genomic analysis of RA still remains at the level of individual species. In this study, we analysed the pan-genome of 27 RA virulent isolates to reveal the intraspecies genomic diversity from various aspects. The multi-locus sequence typing (MLST) analysis suggests that the geographic origin of R. anatipestifer is Guangdong province, China. Results of pan-genome analysis revealed an open pan-genome for all 27 species with the sizes of 2967 genes. We identified 387 genes among 555 unique genes originated by horizontal gene transfer. Further studies showed 204 strain-specific HGT genes were predicted as virulent proteins. Screening the 1113 core genes in RA through subtractive genomic approach, 70 putative vaccine targets out of 125 non-cytoplasmic proteins have been predicted. Further analysis of these non A. platyrhynchos homologous proteins predicted that 56 essential proteins as drug target with more interaction partners were involved in unique metabolic pathways of RA. In conclusion, the present study indicated the essence and the diversity of RA and also provides useful information for identification of vaccine and drugs candidates in future.}, } @article {pmid30992351, year = {2019}, author = {Knight, DR and Kullin, B and Androga, GO and Barbut, F and Eckert, C and Johnson, S and Spigaglia, P and Tateda, K and Tsai, PJ and Riley, TV}, title = {Evolutionary and Genomic Insights into Clostridioides difficile Sequence Type 11: a Diverse Zoonotic and Antimicrobial-Resistant Lineage of Global One Health Importance.}, journal = {mBio}, volume = {10}, number = {2}, pages = {}, pmid = {30992351}, issn = {2150-7511}, support = {I01 BX002449/BX/BLRD VA/United States ; }, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Asia ; Australia ; Clostridioides difficile/drug effects/*genetics ; Clostridium Infections/transmission ; Drug Resistance, Bacterial/*genetics ; Europe ; *Evolution, Molecular ; *Genome, Bacterial ; Global Health ; Humans ; Myoviridae/genetics ; North America ; *One Health ; Phylogeny ; Prophages/genetics ; Ribotyping ; Siphoviridae/genetics ; Whole Genome Sequencing ; Zoonoses/microbiology ; }, abstract = {Clostridioides difficile (Clostridium difficile) sequence type 11 (ST11) is well established in production animal populations worldwide and contributes considerably to the global burden of C. difficile infection (CDI) in humans. Increasing evidence of shared ancestry and genetic overlap of PCR ribotype 078 (RT078), the most common ST11 sublineage, between human and animal populations suggests that CDI may be a zoonosis. We performed whole-genome sequencing (WGS) on a collection of 207 ST11 and closely related ST258 isolates of human and veterinary/environmental origin, comprising 16 RTs collected from Australia, Asia, Europe, and North America. Core genome single nucleotide variant (SNV) analysis identified multiple intraspecies and interspecies clonal groups (isolates separated by ≤2 core genome SNVs) in all the major RT sublineages: 078, 126, 127, 033, and 288. Clonal groups comprised isolates spread across different states, countries, and continents, indicative of reciprocal long-range dissemination and possible zoonotic/anthroponotic transmission. Antimicrobial resistance genotypes and phenotypes varied across host species, geographic regions, and RTs and included macrolide/lincosamide resistance (Tn6194 [ermB]), tetracycline resistance (Tn6190 [tetM] and Tn6164 [tet44]), and fluoroquinolone resistance (gyrA/B mutations), as well as numerous aminoglycoside resistance cassettes. The population was defined by a large "open" pan-genome (10,378 genes), a remarkably small core genome of 2,058 genes (only 19.8% of the gene pool), and an accessory genome containing a large and diverse collection of important prophages of the Siphoviridae and Myoviridae This study provides novel insights into strain relatedness and genetic variability of C. difficile ST11, a lineage of global One Health importance.IMPORTANCE Historically, Clostridioides difficile (Clostridium difficile) has been associated with life-threatening diarrhea in hospitalized patients. Increasing rates of C. difficile infection (CDI) in the community suggest exposure to C. difficile reservoirs outside the hospital, including animals, the environment, or food. C. difficile sequence type 11 (ST11) is known to infect/colonize livestock worldwide and comprises multiple ribotypes, many of which cause disease in humans, suggesting CDI may be a zoonosis. Using high-resolution genomics, we investigated the evolution and zoonotic potential of ST11 and a new closely related ST258 lineage sourced from diverse origins. We found multiple intra- and interspecies clonal transmission events in all ribotype sublineages. Clones were spread across multiple continents, often without any health care association, indicative of zoonotic/anthroponotic long-range dissemination in the community. ST11 possesses a massive pan-genome and numerous clinically important antimicrobial resistance elements and prophages, which likely contribute to the success of this globally disseminated lineage of One Health importance.}, } @article {pmid30986243, year = {2019}, author = {Wyres, KL and Wick, RR and Judd, LM and Froumine, R and Tokolyi, A and Gorrie, CL and Lam, MMC and Duchêne, S and Jenney, A and Holt, KE}, title = {Distinct evolutionary dynamics of horizontal gene transfer in drug resistant and virulent clones of Klebsiella pneumoniae.}, journal = {PLoS genetics}, volume = {15}, number = {4}, pages = {e1008114}, pmid = {30986243}, issn = {1553-7404}, mesh = {Bacterial Capsules/genetics/metabolism ; Bacteriophages/genetics ; Cross Infection/drug therapy/microbiology ; Drug Resistance, Bacterial/*genetics ; Drug Resistance, Multiple, Bacterial/genetics ; *Evolution, Molecular ; *Gene Transfer, Horizontal ; Genetic Variation ; Genome, Bacterial ; Humans ; Klebsiella Infections/drug therapy/microbiology ; Klebsiella pneumoniae/drug effects/*genetics/pathogenicity ; Lipopolysaccharides/biosynthesis/genetics ; Models, Genetic ; Plasmids/genetics ; Virulence/*genetics ; }, abstract = {Klebsiella pneumoniae has emerged as an important cause of two distinct public health threats: multi-drug resistant (MDR) healthcare-associated infections and drug susceptible community-acquired invasive infections. These pathotypes are generally associated with two distinct subsets of K. pneumoniae lineages or 'clones' that are distinguished by the presence of acquired resistance genes and several key virulence loci. Genomic evolutionary analyses of the most notorious MDR and invasive community-associated ('hypervirulent') clones indicate differences in terms of chromosomal recombination dynamics and capsule polysaccharide diversity, but it remains unclear if these differences represent generalised trends. Here we leverage a collection of >2200 K. pneumoniae genomes to identify 28 common clones (n ≥ 10 genomes each), and perform the first genomic evolutionary comparison. Eight MDR and 6 hypervirulent clones were identified on the basis of acquired resistance and virulence gene prevalence. Chromosomal recombination, surface polysaccharide locus diversity, pan-genome, plasmid and phage dynamics were characterised and compared. The data showed that MDR clones were highly diverse, with frequent chromosomal recombination generating extensive surface polysaccharide locus diversity. Additional pan-genome diversity was driven by frequent acquisition/loss of both plasmids and phage. In contrast, chromosomal recombination was rare in the hypervirulent clones, which also showed a significant reduction in pan-genome diversity, largely driven by a reduction in plasmid diversity. Hence the data indicate that hypervirulent clones may be subject to some sort of constraint for horizontal gene transfer that does not apply to the MDR clones. Our findings are relevant for understanding the risk of emergence of individual K. pneumoniae strains carrying both virulence and acquired resistance genes, which have been increasingly reported and cause highly virulent infections that are extremely difficult to treat. Specifically, our data indicate that MDR clones pose the greatest risk, because they are more likely to acquire virulence genes than hypervirulent clones are to acquire resistance genes.}, } @article {pmid30975079, year = {2019}, author = {Du, Y and Ma, J and Yin, Z and Liu, K and Yao, G and Xu, W and Fan, L and Du, B and Ding, Y and Wang, C}, title = {Comparative genomic analysis of Bacillus paralicheniformis MDJK30 with its closely related species reveals an evolutionary relationship between B. paralicheniformis and B. licheniformis.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {283}, pmid = {30975079}, issn = {1471-2164}, support = {2017YFD0200804//the National Key Research and Development Program of China/ ; 31700094//National Natural Science Foundation of China/ ; 31770115//National Natural Science Foundation of China/ ; 31600090//National Natural Science Foundation of China/ ; 2014BAD16B02//the National Science and Technology Pillar Program of China/ ; SYL2017XTTD03//the funds of Shandong "Double Tops" Program/ ; }, mesh = {Adaptation, Physiological/genetics ; Bacillus/*genetics/metabolism/physiology ; *Evolution, Molecular ; *Genomics ; Multigene Family/genetics ; Phylogeny ; }, abstract = {BACKGROUND: Members of the genus Bacillus are important plant growth-promoting rhizobacteria that serve as biocontrol agents. Bacillus paralicheniformis MDJK30 is a PGPR isolated from the peony rhizosphere and can suppress plant-pathogenic bacteria and fungi. To further uncover the genetic mechanism of the plant growth-promoting traits of MDJK30 and its closely related strains, we used comparative genomics to provide insights into the genetic diversity and evolutionary relationship between B. paralicheniformis and B. licheniformis.

RESULTS: A comparative genomics analysis based on B. paralicheniformis MDJK30 and 55 other previously reported Bacillus strains was performed. The evolutionary position of MDJK30 and the evolutionary relationship between B. paralicheniformis and B. licheniformis were evaluated by studying the phylogeny of the core genomes, a population structure analysis and ANI results. Comparative genomic analysis revealed various features of B. paralicheniformis that contribute to its commensal lifestyle in the rhizosphere, including an opening pan genome, a diversity of transport and the metabolism of the carbohydrates and amino acids. There are notable differences in the numbers and locations of the insertion sequences, prophages, genomic islands and secondary metabolic synthase operons between B. paralicheniformis and B. licheniformis. In particular, we found most gene clusters of Fengycin, Bacitracin and Lantipeptide were only present in B. paralicheniformis and were obtained by horizontal gene transfer (HGT), and these clusters may be used as genetic markers for distinguishing B. paralicheniformis and B. licheniformis.

CONCLUSIONS: This study reveals that MDJK30 and the other strains of lineage paralicheniformis present plant growth-promoting traits at the genetic level and can be developed and commercially formulated in agriculture as PGPR. Core genome phylogenies and population structure analysis has proven to be a powerful tool for differentiating B. paralicheniformis and B. licheniformis. Comparative genomic analyses illustrate the genetic differences between the paralicheniformis-licheniformis group with respect to rhizosphere adaptation.}, } @article {pmid30963617, year = {2019}, author = {Vakirlis, N and Monerawela, C and McManus, G and Ribeiro, O and McLysaght, A and James, T and Bond, U}, title = {Evolutionary journey and characterisation of a novel pan-gene associated with beer strains of Saccharomyces cerevisiae.}, journal = {Yeast (Chichester, England)}, volume = {36}, number = {7}, pages = {425-437}, doi = {10.1002/yea.3391}, pmid = {30963617}, issn = {1097-0061}, support = {764364//European Commission, Marie Skłodowska-Curie Innovative Training Network award/International ; 1592 award//Trinity College Dublin/International ; }, mesh = {Beer/*microbiology ; Cell Membrane/metabolism ; Chromosomes, Fungal/genetics ; Evolution, Molecular ; Fungal Proteins/*genetics/metabolism ; Gene Deletion ; Gene Expression ; Gene Transfer, Horizontal ; Genome, Fungal/genetics ; Open Reading Frames ; Saccharomyces/classification/genetics/growth & development/isolation & purification ; Saccharomyces cerevisiae/classification/*genetics/growth & development/*isolation & purification ; }, abstract = {The sequencing of over a thousand Saccharomyces cerevisiae genomes revealed a complex pangenome. Over one third of the discovered genes are not present in the S. cerevisiae core genome but instead are often restricted to a subset of yeast isolates and thus may be important for adaptation to specific environmental niches. We refer to these genes as "pan-genes," being part of the pangenome but not the core genome. Here, we describe the evolutionary journey and characterisation of a novel pan-gene, originally named hypothetical (HYPO) open-reading frame. Phylogenetic analysis reveals that HYPO has been predominantly retained in S. cerevisiae strains associated with brewing but has been repeatedly lost in most other fungal species during evolution. There is also evidence that HYPO was horizontally transferred at least once, from S. cerevisiae to Saccharomyces paradoxus. The phylogenetic analysis of HYPO exemplifies the complexity and intricacy of evolutionary trajectories of genes within the S. cerevisiae pangenome. To examine possible functions for Hypo, we overexpressed a HYPO-GFP fusion protein in both S. cerevisiae and Saccharomyces pastorianus. The protein localised to the plasma membrane where it accumulated initially in distinct foci. Time-lapse fluorescent imaging revealed that when cells are grown in wort, Hypo-gfp fluorescence spreads throughout the membrane during cell growth. The overexpression of Hypo-gfp in S. cerevisiae or S. pastorianus strains did not significantly alter cell growth in medium-containing glucose, maltose, maltotriose, or wort at different concentrations.}, } @article {pmid30957837, year = {2019}, author = {Quijada, NM and Rodríguez-Lázaro, D and Eiros, JM and Hernández, M}, title = {TORMES: an automated pipeline for whole bacterial genome analysis.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {21}, pages = {4207-4212}, doi = {10.1093/bioinformatics/btz220}, pmid = {30957837}, issn = {1367-4811}, mesh = {*Genome, Bacterial ; High-Throughput Nucleotide Sequencing ; Multilocus Sequence Typing ; *Software ; Whole Genome Sequencing ; }, abstract = {MOTIVATION: The progress of High Throughput Sequencing (HTS) technologies and the reduction in the sequencing costs are such that Whole Genome Sequencing (WGS) could replace many traditional laboratory assays and procedures. Exploiting the volume of data produced by HTS platforms requires substantial computing skills and this is the main bottleneck in the implementation of WGS as a routine laboratory technique. The way in which the vast amount of results are presented to researchers and clinicians with no specialist knowledge of genome sequencing is also a significant issue.

RESULTS: Here we present TORMES, a user-friendly pipeline for WGS analysis of bacteria from any origin generated by HTS on Illumina platforms. TORMES is designed for non-bioinformatician users, and automates the steps required for WGS analysis directly from the raw sequence data: sequence quality filtering, de novo assembly, draft genome ordering against a reference, genome annotation, multi-locus sequence typing (MLST), searching for antibiotic resistance and virulence genes, and pangenome comparisons. Once the analysis is finished, TORMES generates and interactive web-like report that can be opened in any web browser and shared and revised by researchers in a simple manner. TORMES can be run by using very simple commands and represent a quick an easy way to perform WGS analysis.

TORMES is free available at https://github.com/nmquijada/tormes.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30953542, year = {2019}, author = {Raymond, F and Boissinot, M and Ouameur, AA and Déraspe, M and Plante, PL and Kpanou, SR and Bérubé, È and Huletsky, A and Roy, PH and Ouellette, M and Bergeron, MG and Corbeil, J}, title = {Culture-enriched human gut microbiomes reveal core and accessory resistance genes.}, journal = {Microbiome}, volume = {7}, number = {1}, pages = {56}, pmid = {30953542}, issn = {2049-2618}, mesh = {Anti-Bacterial Agents/pharmacology ; Bacteria/*classification/drug effects/genetics/growth & development ; Bacterial Proteins/genetics ; Bacteriological Techniques/*methods ; *Drug Resistance, Microbial ; Escherichia coli/genetics/growth & development/isolation & purification ; Feces/cytology/microbiology ; Gastrointestinal Microbiome ; Gene Transfer, Horizontal ; Humans ; Metagenomics ; Phylogeny ; Sequence Analysis, DNA/*methods ; }, abstract = {BACKGROUND: Low-abundance microorganisms of the gut microbiome are often referred to as a reservoir for antibiotic resistance genes. Unfortunately, these less-abundant bacteria can be overlooked by deep shotgun sequencing. In addition, it is a challenge to associate the presence of resistance genes with their risk of acquisition by pathogens. In this study, we used liquid culture enrichment of stools to assemble the genome of lower-abundance bacteria from fecal samples. We then investigated the gene content recovered from these culture-enriched and culture-independent metagenomes in relation with their taxonomic origin, specifically antibiotic resistance genes. We finally used a pangenome approach to associate resistance genes with the core or accessory genome of Enterobacteriaceae and inferred their propensity to horizontal gene transfer.

RESULTS: Using culture-enrichment approaches with stools allowed assembly of 187 bacterial species with an assembly size greater than 1 million nucleotides. Of these, 67 were found only in culture-enriched conditions, and 22 only in culture-independent microbiomes. These assembled metagenomes allowed the evaluation of the gene content of specific subcommunities of the gut microbiome. We observed that differentially distributed metabolic enzymes were associated with specific culture conditions and, for the most part, with specific taxa. Gene content differences between microbiomes, for example, antibiotic resistance, were for the most part not associated with metabolic enzymes, but with other functions. We used a pangenome approach to determine if the resistance genes found in Enterobacteriaceae, specifically E. cloacae or E. coli, were part of the core genome or of the accessory genome of this species. In our healthy volunteer cohort, we found that E. cloacae contigs harbored resistance genes that were part of the core genome of the species, while E. coli had a large accessory resistome proximal to mobile elements.

CONCLUSION: Liquid culture of stools contributed to an improved functional and comparative genomics study of less-abundant gut bacteria, specifically those associated with antibiotic resistance. Defining whether a gene is part of the core genome of a species helped in interpreting the genomes recovered from culture-independent or culture-enriched microbiomes.}, } @article {pmid30949149, year = {2019}, author = {Park, CJ and Andam, CP}, title = {Within-Species Genomic Variation and Variable Patterns of Recombination in the Tetracycline Producer Streptomyces rimosus.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {552}, pmid = {30949149}, issn = {1664-302X}, abstract = {Streptomyces rimosus is best known as the primary source of the tetracycline class of antibiotics, most notably oxytetracycline, which have been widely used against many gram-positive and gram-negative pathogens and protozoan parasites. However, despite the medical and agricultural importance of S. rimosus, little is known of its evolutionary history and genome dynamics. In this study, we aim to elucidate the pan-genome characteristics and phylogenetic relationships of 32 S. rimosus genomes. The S. rimosus pan-genome contains more than 22,000 orthologous gene clusters, and approximately 8.8% of these genes constitutes the core genome. A large part of the accessory genome is composed of 9,646 strain-specific genes. S. rimosus exhibits an open pan-genome (decay parameter α = 0.83) and high gene diversity between strains (genomic fluidity φ = 0.12). We also observed strain-level variation in the distribution and abundance of biosynthetic gene clusters (BGCs) and that each individual S. rimosus genome has a unique repertoire of BGCs. Lastly, we observed variation in recombination, with some strains donating or receiving DNA more often than others, strains that tend to frequently recombine with specific partners, genes that often experience recombination more than others, and variable sizes of recombined DNA sequences. We conclude that the high levels of inter-strain genomic variation in S. rimosus is partly explained by differences in recombination among strains. These results have important implications on current efforts for natural drug discovery, the ecological role of strain-level variation in microbial populations, and addressing the fundamental question of why microbes have pan-genomes.}, } @article {pmid30930881, year = {2019}, author = {Cho, H and Song, ES and Heu, S and Baek, J and Lee, YK and Lee, S and Lee, SW and Park, DS and Lee, TH and Kim, JG and Hwang, I}, title = {Prediction of Host-Specific Genes by Pan-Genome Analyses of the Korean Ralstonia solanacearum Species Complex.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {506}, pmid = {30930881}, issn = {1664-302X}, abstract = {The soil-borne pathogenic Ralstonia solanacearum species complex (RSSC) is a group of plant pathogens that is economically destructive worldwide and has a broad host range, including various solanaceae plants, banana, ginger, sesame, and clove. Previously, Korean RSSC strains isolated from samples of potato bacterial wilt were grouped into four pathotypes based on virulence tests against potato, tomato, eggplant, and pepper. In this study, we sequenced the genomes of 25 Korean RSSC strains selected based on these pathotypes. The newly sequenced genomes were analyzed to determine the phylogenetic relationships between the strains with average nucleotide identity values, and structurally compared via multiple genome alignment using Mauve software. To identify candidate genes responsible for the host specificity of the pathotypes, functional genome comparisons were conducted by analyzing pan-genome orthologous group (POG) and type III secretion system effectors (T3es). POG analyses revealed that a total of 128 genes were shared only in tomato-non-pathogenic strains, 8 genes in tomato-pathogenic strains, 5 genes in eggplant-non-pathogenic strains, 7 genes in eggplant-pathogenic strains, 1 gene in pepper-non-pathogenic strains, and 34 genes in pepper-pathogenic strains. When we analyzed T3es, three host-specific effectors were predicted: RipS3 (SKWP3) and RipH3 (HLK3) were found only in tomato-pathogenic strains, and RipAC (PopC) were found only in eggplant-pathogenic strains. Overall, we identified host-specific genes and effectors that may be responsible for virulence functions in RSSC in silico. The expected characters of those genes suggest that the host range of RSSC is determined by the comprehensive actions of various virulence factors, including effectors, secretion systems, and metabolic enzymes.}, } @article {pmid30929798, year = {2019}, author = {Bedoya-Correa, CM and Rincón Rodríguez, RJ and Parada-Sanchez, MT}, title = {Genomic and phenotypic diversity of Streptococcus mutans.}, journal = {Journal of oral biosciences}, volume = {61}, number = {1}, pages = {22-31}, doi = {10.1016/j.job.2018.11.001}, pmid = {30929798}, issn = {1880-3865}, mesh = {Biofilms ; *Dental Caries ; Genomics ; Humans ; *Streptococcus mutans ; Virulence Factors ; }, abstract = {BACKGROUND: Streptococcus mutans (S. mutans) is a commensal microorganism found in the human oral cavity. However, due to environmental changes, selective pressures, and the presence of a variable genome, it adapts and may acquire new physiological and metabolic properties that alter dental biofilm homeostasis, promoting the development of dental caries. Although the plasticity and heterogeneity of S. mutans is widely recognized, very little is known about the mechanisms for the expression of pathogenic properties in specific genotypes.

HIGHLIGHT: The implementation of molecular biology techniques in the study of S. mutans has provided information on the genomic diversity of this species. This variability is generated by genome rearrangements, natural genetic transformation, and horizontal gene transfer, and continues to grow due to an open pan-genome. The main virulence factors associated with the cariogenic potential of S. mutans include adhesion, acid production (acidogenicity), and acid tolerance (aciduricity), and also show variability. These factors coordinate the modification of the physicochemical properties of the biofilm, which results in the accumulation of S. mutans and other acidogenic and aciduric species in the oral cavity.

CONCLUSION: We review the current literature on the main processes that generate S. mutans genomic diversity, as well as the phenotypic variability of its main virulence factors. S. mutans achieves its pathogenesis by sensing the intra- and extracellular environments and regulating gene transcription according to perceived environmental modifications. Consequently, this regulation gives rise to differential synthesis of proteins, allowing this species to potentially express virulence factors.}, } @article {pmid30929020, year = {2019}, author = {Pinholt, M and Bayliss, SC and Gumpert, H and Worning, P and Jensen, VVS and Pedersen, M and Feil, EJ and Westh, H}, title = {WGS of 1058 Enterococcus faecium from Copenhagen, Denmark, reveals rapid clonal expansion of vancomycin-resistant clone ST80 combined with widespread dissemination of a vanA-containing plasmid and acquisition of a heterogeneous accessory genome.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {74}, number = {7}, pages = {1776-1785}, doi = {10.1093/jac/dkz118}, pmid = {30929020}, issn = {1460-2091}, mesh = {Bacterial Proteins/*genetics ; Carbon-Oxygen Ligases/*genetics ; Cross Infection/epidemiology/microbiology/transmission ; Denmark/epidemiology ; Disease Transmission, Infectious ; Enterococcus faecium/classification/genetics/*isolation & purification ; Genome, Bacterial ; *Genotype ; Gram-Positive Bacterial Infections/*epidemiology/microbiology/transmission ; Hospitals ; Humans ; Molecular Epidemiology ; Molecular Typing ; Phylogeny ; Plasmids/*analysis ; Vancomycin-Resistant Enterococci/classification/genetics/*isolation & purification ; *Whole Genome Sequencing ; }, abstract = {OBJECTIVES: From 2012 to 2015, a sudden significant increase in vancomycin-resistant (vanA) Enterococcus faecium (VREfm) was observed in the Capital Region of Denmark. Clonal relatedness of VREfm and vancomycin-susceptible E. faecium (VSEfm) was investigated, transmission events between hospitals were identified and the pan-genome and plasmids from the largest VREfm clonal group were characterized.

METHODS: WGS of 1058 E. faecium isolates was carried out on the Illumina platform to perform SNP analysis and to identify the pan-genome. One isolate was also sequenced on the PacBio platform to close the genome. Epidemiological data were collected from laboratory information systems.

RESULTS: Phylogeny of 892 VREfm and 166 VSEfm revealed a polyclonal structure, with a single clonal group (ST80) accounting for 40% of the VREfm isolates. VREfm and VSEfm co-occurred within many clonal groups; however, no VSEfm were related to the dominant VREfm group. A similar vanA plasmid was identified in ≥99% of isolates belonging to the dominant group and 69% of the remaining VREfm. Ten plasmids were identified in the completed genome, and ∼29% of this genome consisted of dispensable accessory genes. The size of the pan-genome among isolates in the dominant group was 5905 genes.

CONCLUSIONS: Most probably, VREfm emerged owing to importation of a successful VREfm clone which rapidly transmitted to the majority of hospitals in the region whilst simultaneously disseminating a vanA plasmid to pre-existing VSEfm. Acquisition of a heterogeneous accessory genome may account for the success of this clone by facilitating adaptation to new environmental challenges.}, } @article {pmid30918968, year = {2019}, author = {Smith, BA and Leligdon, C and Baltrus, DA}, title = {Just the Two of Us? A Family of Pseudomonas Megaplasmids Offers a Rare Glimpse into the Evolution of Large Mobile Elements.}, journal = {Genome biology and evolution}, volume = {11}, number = {4}, pages = {1192-1206}, pmid = {30918968}, issn = {1759-6653}, mesh = {*Evolution, Molecular ; Genes, Essential ; Multigene Family ; Plasmids/*genetics ; Pseudomonas putida/*genetics ; Pseudomonas syringae/*genetics ; RNA, Transfer/genetics ; }, abstract = {Pseudomonads are ubiquitous group of environmental proteobacteria, well known for their roles in biogeochemical cycling, in the breakdown of xenobiotic materials, as plant growth promoters, and as pathogens of a variety of host organisms. We have previously identified a large megaplasmid present within one isolate of the plant pathogen Pseudomonas syringae, and here we report that a second member of this megaplasmid family is found within an environmental Pseudomonad isolate most closely related to Pseudomonas putida. Many of the shared genes are involved in critical cellular processes like replication, transcription, translation, and DNA repair. We argue that presence of these shared pathways sheds new light on discussions about the types of genes that undergo horizontal gene transfer (i.e., the complexity hypothesis) as well as the evolution of pangenomes. Furthermore, although both megaplasmids display a high level of synteny, genes that are shared differ by over 50% on average at the amino acid level. This combination of conservation in gene order despite divergence in gene sequence suggests that this Pseudomonad megaplasmid family is relatively old, that gene order is under strong selection within this family, and that there are likely many more members of this megaplasmid family waiting to be found in nature.}, } @article {pmid30917781, year = {2019}, author = {Shelyakin, PV and Bochkareva, OO and Karan, AA and Gelfand, MS}, title = {Micro-evolution of three Streptococcus species: selection, antigenic variation, and horizontal gene inflow.}, journal = {BMC evolutionary biology}, volume = {19}, number = {1}, pages = {83}, pmid = {30917781}, issn = {1471-2148}, mesh = {Animals ; Antigenic Variation/*genetics ; *Biological Evolution ; Conserved Sequence/genetics ; DNA, Intergenic ; Gene Flow ; Gene Ontology ; Gene Rearrangement/genetics ; *Gene Transfer, Horizontal ; Genes, Bacterial ; Genome Size ; Humans ; Hydrolases/metabolism ; Nucleotides/genetics ; Phylogeny ; *Selection, Genetic ; Sequence Deletion ; Species Specificity ; Streptococcus/*genetics ; Streptococcus pneumoniae/genetics ; Virulence/genetics ; }, abstract = {BACKGROUND: The genus Streptococcus comprises pathogens that strongly influence the health of humans and animals. Genome sequencing of multiple Streptococcus strains demonstrated high variability in gene content and order even in closely related strains of the same species and created a newly emerged object for genomic analysis, the pan-genome. Here we analysed the genome evolution of 25 strains of Streptococcus suis, 50 strains of Streptococcus pyogenes and 28 strains of Streptococcus pneumoniae.

RESULTS: Fractions of the pan-genome, unique, periphery, and universal genes differ in size, functional composition, the level of nucleotide substitutions, and predisposition to horizontal gene transfer and genomic rearrangements. The density of substitutions in intergenic regions appears to be correlated with selection acting on adjacent genes, implying that more conserved genes tend to have more conserved regulatory regions. The total pan-genome of the genus is open, but only due to strain-specific genes, whereas other pan-genome fractions reach saturation. We have identified the set of genes with phylogenies inconsistent with species and non-conserved location in the chromosome; these genes are rare in at least one species and have likely experienced recent horizontal transfer between species. The strain-specific fraction is enriched with mobile elements and hypothetical proteins, but also contains a number of candidate virulence-related genes, so it may have a strong impact on adaptability and pathogenicity. Mapping the rearrangements to the phylogenetic tree revealed large parallel inversions in all species. A parallel inversion of length 15 kB with breakpoints formed by genes encoding surface antigen proteins PhtD and PhtB in S. pneumoniae leads to replacement of gene fragments that likely indicates the action of an antigen variation mechanism.

CONCLUSIONS: Members of genus Streptococcus have a highly dynamic, open pan-genome, that potentially confers them with the ability to adapt to changing environmental conditions, i.e. antibiotic resistance or transmission between different hosts. Hence, integrated analysis of all aspects of genome evolution is important for the identification of potential pathogens and design of drugs and vaccines.}, } @article {pmid30906288, year = {2019}, author = {Legendre, M and Alempic, JM and Philippe, N and Lartigue, A and Jeudy, S and Poirot, O and Ta, NT and Nin, S and Couté, Y and Abergel, C and Claverie, JM}, title = {Pandoravirus Celtis Illustrates the Microevolution Processes at Work in the Giant Pandoraviridae Genomes.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {430}, pmid = {30906288}, issn = {1664-302X}, abstract = {With genomes of up to 2.7 Mb propagated in μm-long oblong particles and initially predicted to encode more than 2000 proteins, members of the Pandoraviridae family display the most extreme features of the known viral world. The mere existence of such giant viruses raises fundamental questions about their origin and the processes governing their evolution. A previous analysis of six newly available isolates, independently confirmed by a study including three others, established that the Pandoraviridae pan-genome is open, meaning that each new strain exhibits protein-coding genes not previously identified in other family members. With an average increment of about 60 proteins, the gene repertoire shows no sign of reaching a limit and remains largely coding for proteins without recognizable homologs in other viruses or cells (ORFans). To explain these results, we proposed that most new protein-coding genes were created de novo, from pre-existing non-coding regions of the G+C rich pandoravirus genomes. The comparison of the gene content of a new isolate, pandoravirus celtis, closely related (96% identical genome) to the previously described p. quercus is now used to test this hypothesis by studying genomic changes in a microevolution range. Our results confirm that the differences between these two similar gene contents mostly consist of protein-coding genes without known homologs, with statistical signatures close to that of intergenic regions. These newborn proteins are under slight negative selection, perhaps to maintain stable folds and prevent protein aggregation pending the eventual emergence of fitness-increasing functions. Our study also unraveled several insertion events mediated by a transposase of the hAT family, 3 copies of which are found in p. celtis and are presumably active. Members of the Pandoraviridae are presently the first viruses known to encode this type of transposase.}, } @article {pmid30902757, year = {2020}, author = {Banerjee, R and Shine, O and Rajachandran, V and Krishnadas, G and Minnick, MF and Paul, S and Chattopadhyay, S}, title = {Gene duplication and deletion, not horizontal transfer, drove intra-species mosaicism of Bartonella henselae.}, journal = {Genomics}, volume = {112}, number = {1}, pages = {467-471}, doi = {10.1016/j.ygeno.2019.03.009}, pmid = {30902757}, issn = {1089-8646}, mesh = {Bartonella henselae/*genetics ; *Evolution, Molecular ; *Gene Deletion ; *Gene Duplication ; Gene Transfer, Horizontal ; Genes, Bacterial ; Genome, Bacterial ; *Mosaicism ; }, abstract = {Bartonella henselae is a facultative intracellular pathogen that occurs worldwide and is responsible primarily for cat-scratch disease in young people and bacillary angiomatosis in immunocompromised patients. The principal source of genome-level diversity that contributes to B. henselae's host-adaptive features is thought to be horizontal gene transfer events. However, our analyses did not reveal the acquisition of horizontally-transferred islands in B. henselae after its divergence from other Bartonella. Rather, diversity in gene content and genome size was apparently acquired through two alternative mechanisms, including deletion and, more predominantly, duplication of genes. Interestingly, a majority of these events occurred in regions that were horizontally transferred long before B. henselae's divergence from other Bartonella species. Our study indicates the possibility that gene duplication, in response to positive selection pressures in specific clones of B. henselae, might be linked to the pathogen's adaptation to arthropod vectors, the cat reservoir, or humans as incidental host-species.}, } @article {pmid30900970, year = {2019}, author = {de Carvalho, SP and de Almeida, JB and de Freitas, LM and Guimarães, AMS and do Nascimento, NC and Dos Santos, AP and Campos, GB and Messick, JB and Timenetsky, J and Marques, LM}, title = {Genomic profile of Brazilian methicillin-resistant Staphylococcus aureus resembles clones dispersed worldwide.}, journal = {Journal of medical microbiology}, volume = {68}, number = {5}, pages = {693-702}, doi = {10.1099/jmm.0.000956}, pmid = {30900970}, issn = {1473-5644}, mesh = {Anti-Bacterial Agents/pharmacology ; Brazil ; Enterotoxins/genetics ; Fluoroquinolones/pharmacology ; *Genome, Bacterial ; Genomics ; Humans ; Macrolides/pharmacology ; Methicillin-Resistant Staphylococcus aureus/*genetics/*virology ; *Phylogeny ; Virulence Factors/*genetics ; }, abstract = {PURPOSE: Comparative genomic analysis of strains may help us to better understand the wide diversity of their genetic profiles. The aim of this study was to analyse the genomic features of the resistome and virulome of Brazilian first methicillin-resistant Staphylococcus aureus (MRSA) isolates and their relationship to other Brazilian and international MRSA strains.

METHODOLOGY: The whole genomes of three MRSA strains previously isolated in Vitória da Conquista were sequenced, assembled, annotated and compared with other MRSA genomes. A phylogenetic tree was constructed and the pan-genome and accessory and core genomes were constructed. The resistomes and virulomes of all strains were identified.Results/Key findings. Phylogenetic analysis of all 49 strains indicated different clones showing high similarity. The pan-genome of the analysed strains consisted of 4484 genes, with 31 % comprising the gene portion of the core genome, 47 % comprising the accessory genome and 22 % being singletons. Most strains showed at least one gene related to virulence factors associated with immune system evasion, followed by enterotoxins. The strains showed multiresistance, with the most recurrent genes conferring resistance to beta-lactams, fluoroquinolones, aminoglycosides and macrolides.

CONCLUSIONS: Our comparative genomic analysis showed that there is no pattern of virulence gene distribution among the clones analysed in the different regions. The Brazilian strains showed similarity with clones from several continents.}, } @article {pmid30893420, year = {2019}, author = {Correia, K and Yu, SM and Mahadevan, R}, title = {AYbRAH: a curated ortholog database for yeasts and fungi spanning 600 million years of evolution.}, journal = {Database : the journal of biological databases and curation}, volume = {2019}, number = {}, pages = {}, pmid = {30893420}, issn = {1758-0463}, mesh = {Data Curation ; *Databases, Protein ; Genome, Fungal ; Genomics ; Phylogeny ; Saccharomyces cerevisiae/genetics ; Sequence Homology, Amino Acid ; Time Factors ; Yeasts/genetics/*metabolism ; }, abstract = {Budding yeasts inhabit a range of environments by exploiting various metabolic traits. The genetic bases for these traits are mostly unknown, preventing their addition or removal in a chassis organism for metabolic engineering. Insight into the evolution of orthologs, paralogs and xenologs in the yeast pan-genome can help bridge these genotypes; however, existing phylogenomic databases do not span diverse yeasts, and sometimes cannot distinguish between these homologs. To help understand the molecular evolution of these traits in yeasts, we created Analyzing Yeasts by Reconstructing Ancestry of Homologs (AYbRAH), an open-source database of predicted and manually curated ortholog groups for 33 diverse fungi and yeasts in Dikarya, spanning 600 million years of evolution. OrthoMCL and OrthoDB were used to cluster protein sequence into ortholog and homolog groups, respectively; MAFFT and PhyML reconstructed the phylogeny of all homolog groups. Ortholog assignments for enzymes and small metabolite transporters were compared to their phylogenetic reconstruction, and curated to resolve any discrepancies. Information on homolog and ortholog groups can be viewed in the AYbRAH web portal (https://lmse.github.io/aybrah/), including functional annotations, predictions for mitochondrial localization and transmembrane domains, literature references and phylogenetic reconstructions. Ortholog assignments in AYbRAH were compared to HOGENOM, KEGG Orthology, OMA, eggNOG and PANTHER. PANTHER and OMA had the most congruent ortholog groups with AYbRAH, while the other phylogenomic databases had greater amounts of under-clustering, over-clustering or no ortholog annotations for proteins. Future plans are discussed for AYbRAH, and recommendations are made for other research communities seeking to create curated ortholog databases.}, } @article {pmid30871454, year = {2019}, author = {Naz, K and Naz, A and Ashraf, ST and Rizwan, M and Ahmad, J and Baumbach, J and Ali, A}, title = {PanRV: Pangenome-reverse vaccinology approach for identifications of potential vaccine candidates in microbial pangenome.}, journal = {BMC bioinformatics}, volume = {20}, number = {1}, pages = {123}, pmid = {30871454}, issn = {1471-2105}, mesh = {Bacterial Vaccines/pharmacology/*therapeutic use ; Genomics/*methods ; Humans ; Proteomics/*methods ; Vaccinology/*methods ; }, abstract = {BACKGROUND: A revolutionary diversion from classical vaccinology to reverse vaccinology approach has been observed in the last decade. The ever-increasing genomic and proteomic data has greatly facilitated the vaccine designing and development process. Reverse vaccinology is considered as a cost-effective and proficient approach to screen the entire pathogen genome. To look for broad-spectrum immunogenic targets and analysis of closely-related bacterial species, the assimilation of pangenome concept into reverse vaccinology approach is essential. The categories of species pangenome such as core, accessory, and unique genes sets can be analyzed for the identification of vaccine candidates through reverse vaccinology.

RESULTS: We have designed an integrative computational pipeline term as "PanRV" that employs both the pangenome and reverse vaccinology approaches. PanRV comprises of four functional modules including i) Pangenome Estimation Module (PGM) ii) Reverse Vaccinology Module (RVM) iii) Functional Annotation Module (FAM) and iv) Antibiotic Resistance Association Module (ARM). The pipeline is tested by using genomic data from 301 genomes of Staphylococcus aureus and the results are verified by experimentally known antigenic data.

CONCLUSION: The proposed pipeline has proved to be the first comprehensive automated pipeline that can precisely identify putative vaccine candidates exploiting the microbial pangenome. PanRV is a Linux based package developed in JAVA language. An executable installer is provided for ease of installation along with a user manual at https://sourceforge.net/projects/panrv2/ .}, } @article {pmid30863859, year = {2019}, author = {Li, H and Ding, X and Chen, C and Zheng, X and Han, H and Li, C and Gong, J and Xu, T and Li, QX and Ding, GC and Li, J}, title = {Enrichment of phosphate solubilizing bacteria during late developmental stages of eggplant (Solanum melongena L.).}, journal = {FEMS microbiology ecology}, volume = {95}, number = {3}, pages = {}, doi = {10.1093/femsec/fiz023}, pmid = {30863859}, issn = {1574-6941}, mesh = {Agriculture/*methods ; Bacteria/classification/genetics/growth & development/*metabolism ; Enterobacter/growth & development/metabolism ; Microbiota/genetics ; Phosphates/*metabolism ; RNA, Ribosomal, 16S/genetics ; Rhizosphere ; Soil Microbiology ; Solanum melongena/*growth & development/*microbiology ; }, abstract = {Understanding the ecology of phosphate solubilizing bacteria (PSBs) is critical for developing better strategies to increase crop productivity. In this study, the diversity of PSBs and of the total bacteria in the rhizosphere of eggplant (Solanum melongena L.) cultivated in organic, integrated and conventional farming systems was compared at four developmental stages of its lifecycle. Both selective culture and high-throughput sequencing analysis of 16S rRNA amplicons indicated that Enterobacter with strong or very strong in vivo phosphate solubilization activities was enriched in the rhizosphere during the fruiting stage. The high-throughput sequencing analysis results demonstrated that farming systems explained 23% of total bacterial community variation. Plant development and farming systems synergistically shaped the rhizospheric bacterial community, in which the degree of variation influenced by farming systems decreased over the plant development phase from 56% to 26.3% to 16.3%, and finally to no significant effect as the plant reached at fruiting stage. Pangenome analysis indicated that two-component and transporter systems varied between the rhizosphere and soil PSBs. This study elucidated the complex interactions among farming systems, plant development and rhizosphere microbiomes.}, } @article {pmid30859492, year = {2019}, author = {Burgueño-Roman, A and Castañeda-Ruelas, GM and Pacheco-Arjona, R and Jimenez-Edeza, M}, title = {Pathogenic potential of non-typhoidal Salmonella serovars isolated from aquatic environments in Mexico.}, journal = {Genes & genomics}, volume = {41}, number = {7}, pages = {767-779}, pmid = {30859492}, issn = {2092-9293}, mesh = {*Genome, Bacterial ; Mexico ; Molecular Sequence Annotation ; Phylogeny ; Rivers/*microbiology ; Salmonella/classification/*genetics/isolation & purification/pathogenicity ; Virulence/genetics ; }, abstract = {BACKGROUND: River water has been implicated as a source of non-typhoidal Salmonella (NTS) serovars in Mexico.

OBJECTIVE: To dissect the molecular pathogenesis and defense strategies of seven NTS strains isolated from river water in Mexico.

METHODS: The genome of Salmonella serovars Give, Pomona, Kedougou, Stanley, Oranienburg, Sandiego, and Muenchen were sequenced using the whole-genome shotgun methodology in the Illumina Miseq platform. The genoma annotation and evolutionary analyses were conducted in the RAST and FigTree servers, respectively. The MLST was performed using the SRST2 tool and the comparisons between strains were clustered and visualized using the Gview server. Experimental virulence assay was included to evaluate the pathogenic potential of strains.

RESULTS: We report seven high-quality draft genomes, ranging from ~ 4.61 to ~ 5.12 Mb, with a median G + C value, coding DNA sequence, and protein values of 52.1%, 4697 bp, and 4,589 bp, respectively. The NTS serovars presented with an open pan-genome, offering novel genetic content. Each NTS serovar had an indistinguishable virulotype with a core genome (352 virulence genes) closely associated with Salmonella pathogenicity; 13 genes were characterized as serotype specific, which could explain differences in pathogenicity. All strains maintained highly conserved genetic content regarding the Salmonella pathogenicity islands (1-5) (86.9-100%), fimbriae (84.6%), and hypermutation (100%) genes. Adherence and invasion capacity were confirmed among NTS strains in Caco-2 cells.

CONCLUSION: Our results demonstrated the arsenal of virulence and defense molecular factors harbored on NTS serovars and highlight that environmental NTS strains are waterborne pathogens worthy of attention.}, } @article {pmid30858837, year = {2019}, author = {van Tonder, AJ and Bray, JE and Jolley, KA and Jansen van Rensburg, M and Quirk, SJ and Haraldsson, G and Maiden, MCJ and Bentley, SD and Haraldsson, Á and Erlendsdóttir, H and Kristinsson, KG and Brueggemann, AB}, title = {Genomic Analyses of >3,100 Nasopharyngeal Pneumococci Revealed Significant Differences Between Pneumococci Recovered in Four Different Geographical Regions.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {317}, pmid = {30858837}, issn = {1664-302X}, support = {//Wellcome Trust/United Kingdom ; }, abstract = {Understanding the structure of a bacterial population is essential in order to understand bacterial evolution. Estimating the core genome (those genes common to all, or nearly all, strains of a species) is a key component of such analyses. The size and composition of the core genome varies by dataset, but we hypothesized that the variation between different collections of the same bacterial species would be minimal. To investigate this, we analyzed the genome sequences of 3,118 pneumococci recovered from healthy individuals in Reykjavik (Iceland), Southampton (United Kingdom), Boston (United States), and Maela (Thailand). The analyses revealed a "supercore" genome (genes shared by all 3,118 pneumococci) of 558 genes, although an additional 354 core genes were shared by pneumococci from Reykjavik, Southampton, and Boston. Overall, the size and composition of the core and pan-genomes among pneumococci recovered in Reykjavik, Southampton, and Boston were similar. Maela pneumococci were distinctly different in that they had a smaller core genome and larger pan-genome. The pan-genome of Maela pneumococci contained several >25 Kb sequence regions (flanked by pneumococcal genes) that were homologous to genomic regions found in other bacterial species. Overall, our work revealed that some subsets of the global pneumococcal population are highly heterogeneous, and our hypothesis was rejected. This is an important finding in terms of understanding genetic variation among pneumococci and is also an essential point of consideration before generalizing the findings from a single dataset to the wider pneumococcal population.}, } @article {pmid30858412, year = {2019}, author = {Obolski, U and Gori, A and Lourenço, J and Thompson, C and Thompson, R and French, N and Heyderman, RS and Gupta, S}, title = {Identifying genes associated with invasive disease in S. pneumoniae by applying a machine learning approach to whole genome sequence typing data.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {4049}, doi = {10.1038/s41598-019-40346-7}, pmid = {30858412}, issn = {2045-2322}, support = {/WT_/Wellcome Trust/United Kingdom ; MR/N023129/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Genes, Bacterial/genetics ; Genome, Bacterial/*genetics ; Humans ; Meningitis/cerebrospinal fluid/genetics/microbiology ; Pneumonia/cerebrospinal fluid/*genetics/microbiology ; Sepsis/cerebrospinal fluid/genetics/microbiology ; Streptococcus pneumoniae/*genetics/pathogenicity ; *Whole Genome Sequencing ; }, abstract = {Streptococcus pneumoniae, a normal commensal of the upper respiratory tract, is a major public health concern, responsible for substantial global morbidity and mortality due to pneumonia, meningitis and sepsis. Why some pneumococci invade the bloodstream or CSF (so-called invasive pneumococcal disease; IPD) is uncertain. In this study we identify genes associated with IPD. We transform whole genome sequence (WGS) data into a sequence typing scheme, while avoiding the caveat of using an arbitrary genome as a reference by substituting it with a constructed pangenome. We then employ a random forest machine-learning algorithm on the transformed data, and find 43 genes consistently associated with IPD across three geographically distinct WGS data sets of pneumococcal carriage isolates. Of the genes we identified as associated with IPD, we find 23 genes previously shown to be directly relevant to IPD, as well as 18 uncharacterized genes. We suggest that these uncharacterized genes identified by us are also likely to be relevant for IPD.}, } @article {pmid30853944, year = {2019}, author = {Khaleque, HN and González, C and Shafique, R and Kaksonen, AH and Holmes, DS and Watkin, ELJ}, title = {Uncovering the Mechanisms of Halotolerance in the Extremely Acidophilic Members of the Acidihalobacter Genus Through Comparative Genome Analysis.}, journal = {Frontiers in microbiology}, volume = {10}, number = {}, pages = {155}, pmid = {30853944}, issn = {1664-302X}, abstract = {There are few naturally occurring environments where both acid and salinity stress exist together, consequently, there has been little evolutionary pressure for microorganisms to develop systems that enable them to deal with both stresses simultaneously. Members of the genus Acidihalobacter are iron- and sulfur-oxidizing, halotolerant acidophiles that have developed the ability to tolerate acid and saline stress and, therefore, have the potential to bioleach ores with brackish or saline process waters under acidic conditions. The genus consists of four members, A. prosperus DSM 5130T, A. prosperus DSM 14174, A. prosperus F5 and "A. ferrooxidans" DSM 14175. An in depth genome comparison was undertaken in order to provide a more comprehensive description of the mechanisms of halotolerance used by the different members of this genus. Pangenome analysis identified 29, 3 and 9 protein families related to halotolerance in the core, dispensable and unique genomes, respectively. The genes for halotolerance showed Ka/Ks ratios between 0 and 0.2, confirming that they are conserved and stabilized. All the Acidihalobacter genomes contained similar genes for the synthesis and transport of ectoine, which was recently found to be the dominant osmoprotectant in A. prosperus DSM 14174 and A. prosperus DSM 5130T. Similarities also existed in genes encoding low affinity potassium pumps, however, A. prosperus DSM 14174 was also found to contain genes encoding high affinity potassium pumps. Furthermore, only A. prosperus DSM 5130T and "A. ferrooxidans" DSM 14175 contained genes allowing the uptake of taurine as an osmoprotectant. Variations were also seen in genes encoding proteins involved in the synthesis and/or transport of periplasmic glucans, sucrose, proline, and glycine betaine. This suggests that versatility exists in the Acidihalobacter genus in terms of the mechanisms they can use for halotolerance. This information is useful for developing hypotheses for the search for life on exoplanets and moons.}, } @article {pmid30851098, year = {2019}, author = {Tahir Ul Qamar, M and Zhu, X and Xing, F and Chen, LL}, title = {ppsPCP: a plant presence/absence variants scanner and pan-genome construction pipeline.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {20}, pages = {4156-4158}, doi = {10.1093/bioinformatics/btz168}, pmid = {30851098}, issn = {1367-4811}, mesh = {Eukaryota ; *Genome, Plant ; Genomics ; Prokaryotic Cells ; *Software ; }, abstract = {SUMMARY: Since the idea of pan-genomics emerged several tools and pipelines have been introduced for prokaryotic pan-genomics. However, not a single comprehensive pipeline has been reported which could overcome multiple challenges associated with eukaryotic pan-genomics. To aid the eukaryotic pan-genomic studies, here we present ppsPCP pipeline which is designed for eukaryotes especially for plants. It is capable of scanning presence/absence variants (PAVs) and constructing a fully annotated pan-genome. We believe with these unique features of PAV scanning and building a pan-genome together with its annotation, ppsPCP will be useful for plant pan-genomic studies and aid researchers to study genetic/phenotypic variations and genomic diversity.

The ppsPCP is freely available at github DOI: https://doi.org/10.5281/zenodo.2567390 and webpage http://cbi.hzau.edu.cn/ppsPCP/.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30851095, year = {2019}, author = {Rautiainen, M and Mäkinen, V and Marschall, T}, title = {Bit-parallel sequence-to-graph alignment.}, journal = {Bioinformatics (Oxford, England)}, volume = {35}, number = {19}, pages = {3599-3607}, pmid = {30851095}, issn = {1367-4811}, mesh = {*Algorithms ; *Genome ; Sequence Alignment ; Sequence Analysis, DNA ; }, abstract = {MOTIVATION: Graphs are commonly used to represent sets of sequences. Either edges or nodes can be labeled by sequences, so that each path in the graph spells a concatenated sequence. Examples include graphs to represent genome assemblies, such as string graphs and de Bruijn graphs, and graphs to represent a pan-genome and hence the genetic variation present in a population. Being able to align sequencing reads to such graphs is a key step for many analyses and its applications include genome assembly, read error correction and variant calling with respect to a variation graph.

RESULTS: We generalize two linear sequence-to-sequence algorithms to graphs: the Shift-And algorithm for exact matching and Myers' bitvector algorithm for semi-global alignment. These linear algorithms are both based on processing w sequence characters with a constant number of operations, where w is the word size of the machine (commonly 64), and achieve a speedup of up to w over naive algorithms. For a graph with |V| nodes and |E| edges and a sequence of length m, our bitvector-based graph alignment algorithm reaches a worst case runtime of O(|V|+⌈mw⌉|E| log w) for acyclic graphs and O(|V|+m|E| log w) for arbitrary cyclic graphs. We apply it to five different types of graphs and observe a speedup between 3-fold and 20-fold compared with a previous (asymptotically optimal) alignment algorithm.

https://github.com/maickrau/GraphAligner.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid30847473, year = {2019}, author = {Velsko, IM and Perez, MS and Richards, VP}, title = {Resolving Phylogenetic Relationships for Streptococcus mitis and Streptococcus oralis through Core- and Pan-Genome Analyses.}, journal = {Genome biology and evolution}, volume = {11}, number = {4}, pages = {1077-1087}, pmid = {30847473}, issn = {1759-6653}, mesh = {Genome, Bacterial ; *Phylogeny ; RNA, Ribosomal, 16S/genetics ; Streptococcus mitis/*genetics ; Streptococcus oralis/*genetics ; }, abstract = {Taxonomic and phylogenetic relationships of Streptococcus mitis and Streptococcus oralis have been difficult to establish biochemically and genetically. We used core-genome analyses of S. mitis and S. oralis, as well as the closely related species Streptococcus pneumoniae and Streptococcus parasanguinis, to clarify the phylogenetic relationships between S. mitis and S. oralis, as well as within subclades of S. oralis. All S. mitis (n = 67), S. oralis (n = 89), S. parasanguinis (n = 27), and 27 S. pneumoniae genome assemblies were downloaded from NCBI and reannotated. All genes were delineated into homologous clusters and maximum-likelihood phylogenies built from putatively nonrecombinant core gene sets. Population structure was determined using Bayesian genome clustering, and patristic distance was calculated between populations. Population-specific gene content was assessed using a phylogenetic-based genome-wide association approach. Streptococcus mitis and S. oralis formed distinct clades, but species mixing suggests taxonomic misassignment. Patristic distance between populations suggests that S. oralis subsp. dentisani is a distinct species, whereas S. oralis subsp. tigurinus and subsp. oralis are supported as subspecies, and that S. mitis comprises two subspecies. None of the genes within the pan-genomes of S. mitis and S. oralis could be statistically correlated with either, and the dispensable genomes showed extensive variation among isolates. These are likely important factors contributing to established overlap in biochemical characteristics for these taxa. Based on core-genome analysis, the substructure of S. oralis and S. mitis should be redefined, and species assignments within S. oralis and S. mitis should be made based on whole-genome analysis to be robust to misassignment.}, } @article {pmid30825936, year = {2019}, author = {Eisenbach, L and Geissler, AJ and Ehrmann, MA and Vogel, RF}, title = {Comparative genomics of Lactobacillus sakei supports the development of starter strain combinations.}, journal = {Microbiological research}, volume = {221}, number = {}, pages = {1-9}, doi = {10.1016/j.micres.2019.01.001}, pmid = {30825936}, issn = {1618-0623}, mesh = {Bioreactors/*microbiology ; Carbohydrate Metabolism/*genetics ; Fermentation/*genetics ; Fermented Foods and Beverages/*microbiology ; Food Microbiology ; Genome, Bacterial/*genetics ; Genomics ; Lactobacillus sakei/*genetics/metabolism ; Meat/microbiology ; Sequence Analysis, DNA ; }, abstract = {Strains of Lactobacillus sakei can be isolated from a variety of sources including meat, fermented sausages, sake, sourdough, sauerkraut or kimchi. Selected strains are widely used as starter cultures for sausage fermentation. Recently we have demonstrated that control about the lactic microbiota in fermenting sausages is achieved rather by pairs or strain sets than by single strains. In this work we characterized the pan genome of L. sakei to enable exploitation of the genomic diversity of L. sakei for the establishment of assertive starter strain sets. We have established the full genome sequences of nine L. sakei strains from different sources of isolation and included in the analysis the genome of L. sakei 23K. Comparative genomics revealed an accessory genome comprising about 50% of the pan genome and different lineages of strains with no relation to their source of isolation. Group and strain specific differences could be found, which namely referred to agmatine and citrate metabolism. The presence of genes encoding metabolic pathways for fructose, sucrose and trehalose as well as gluconate in all strains suggests a general adaptation to plant/sugary environments and a life in communities with other genera. Analysis of the plasmidome did not reveal any specific mechanisms of adaptation to a habitat. The predicted differences of metabolic settings enable prediction of partner strains, which can occupy the meat environment to a large extent and establish competitive exclusion of autochthonous microbiota. This may assist the development of a new generation of meat starter cultures containing L. sakei strains.}, } @article {pmid30814318, year = {2019}, author = {Raphael, BH and Huynh, T and Brown, E and Smith, JC and Ruberto, I and Getsinger, L and White, S and Winchell, JM}, title = {Culture of Clinical Specimens Reveals Extensive Diversity of Legionella pneumophila Strains in Arizona.}, journal = {mSphere}, volume = {4}, number = {1}, pages = {}, pmid = {30814318}, issn = {2379-5042}, mesh = {Arizona/epidemiology ; Bacterial Typing Techniques ; Centers for Disease Control and Prevention, U.S. ; *Genetic Variation ; *Genome, Bacterial ; Genotype ; Humans ; Legionella pneumophila/*classification/isolation & purification ; Legionnaires' Disease/epidemiology/*microbiology ; Multilocus Sequence Typing ; Serogroup ; United States ; Whole Genome Sequencing ; }, abstract = {Between 2000 and 2017, a total of 236 Legionella species isolates from Arizona were submitted to the CDC for reference testing. Most of these isolates were recovered from bronchoalveolar lavage specimens. Although the incidence of legionellosis in Arizona is less than the overall U.S. incidence, Arizona submits the largest number of isolates to the CDC for testing compared to those from other states. In addition to a higher proportion of culture confirmation of legionellosis cases in Arizona than in other states, all Legionellapneumophila isolates are forwarded to the CDC for confirmatory testing. Compared to that from other states, a higher proportion of isolates from Arizona were identified as belonging to L. pneumophila serogroups 6 (28.2%) and 8 (8.9%). Genome sequencing was conducted on 113 L. pneumophila clinical isolates not known to be associated with outbreaks in order to understand the genomic diversity of strains causing legionellosis in Arizona. Whole-genome multilocus sequence typing (wgMLST) revealed 17 clusters of isolates sharing at least 99% identical allele content. Only two of these clusters contained isolates from more than one individual with exposure at the same facility. Additionally, wgMLST analysis revealed a group of 31 isolates predominantly belonging to serogroup 6 and containing isolates from three separate clusters. Single nucleotide polymorphism (SNP) and pangenome analysis were used to further resolve genome sequences belonging to a subset of isolates. This study demonstrates that culture of clinical specimens for Legionella spp. reveals a highly diverse population of strains causing legionellosis in Arizona which could be underappreciated using other diagnostic approaches.IMPORTANCE Culture of clinical specimens from patients with Legionnaires' disease is rarely performed, restricting our understanding of the diversity and ecology of Legionella Culture of Legionella from patient specimens in Arizona revealed a greater proportion of non-serogroup 1 Legionellapneumophila isolates than in other U.S. isolates examined. Disease caused by such isolates may go undetected using other diagnostic methods. Moreover, genome sequence analysis revealed that these isolates were genetically diverse, and understanding these populations may help in future environmental source attribution studies.}, } @article {pmid30811910, year = {2019}, author = {Gabbett, MT and Laporte, J and Sekar, R and Nandini, A and McGrath, P and Sapkota, Y and Jiang, P and Zhang, H and Burgess, T and Montgomery, GW and Chiu, R and Fisk, NM}, title = {Molecular Support for Heterogonesis Resulting in Sesquizygotic Twinning.}, journal = {The New England journal of medicine}, volume = {380}, number = {9}, pages = {842-849}, doi = {10.1056/NEJMoa1701313}, pmid = {30811910}, issn = {1533-4406}, support = {T12-403/15-N//Research Grants Council of the Hong Kong/International ; }, mesh = {Adult ; Alleles ; *Chimera ; Embolism, Paradoxical/complications ; Female ; *Fertilization ; Genotype ; Humans ; Male ; Polymorphism, Single Nucleotide ; Pregnancy ; Pregnancy, Twin ; Thromboembolism/etiology ; Twins, Monozygotic/*genetics ; Ultrasonography, Prenatal ; Vena Cava, Inferior ; }, abstract = {Sesquizygotic multiple pregnancy is an exceptional intermediate between monozygotic and dizygotic twinning. We report a monochorionic twin pregnancy with fetal sex discordance. Genotyping of amniotic fluid from each sac showed that the twins were maternally identical but chimerically shared 78% of their paternal genome, which makes them genetically in between monozygotic and dizygotic; they are sesquizygotic. We observed no evidence of sesquizygosis in 968 dizygotic twin pairs whom we screened by means of pangenome single-nucleotide polymorphism genotyping. Data from published repositories also show that sesquizygosis is a rare event. Detailed genotyping implicates chimerism arising at the juncture of zygotic division, termed heterogonesis, as the likely initial step in the causation of sesquizygosis.}, } @article {pmid30808378, year = {2019}, author = {Caputo, A and Fournier, PE and Raoult, D}, title = {Genome and pan-genome analysis to classify emerging bacteria.}, journal = {Biology direct}, volume = {14}, number = {1}, pages = {5}, pmid = {30808378}, issn = {1745-6150}, mesh = {Bacteria/*classification/genetics ; Genomics/*methods ; Humans ; *Microbiota ; RNA, Bacterial/analysis ; RNA, Ribosomal, 16S/analysis ; Sequence Analysis, RNA/methods ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization/methods ; }, abstract = {BACKGROUND: In the recent years, genomic and pan-genomic studies have become increasingly important. Culturomics allows to study human microbiota through the use of different culture conditions, coupled with a method of rapid identification by MALDI-TOF, or 16S rRNA. Bacterial taxonomy is undergoing many changes as a consequence. With the help of pan-genomic analyses, species can be redefined, and new species definitions generated.

RESULTS: Genomics, coupled with culturomics, has led to the discovery of many novel bacterial species or genera, including Akkermansia muciniphila and Microvirga massiliensis. Using the genome to define species has been applied within the genus Klebsiella. A discontinuity or an abrupt break in the core/pan-genome ratio can uncover novel species.

CONCLUSIONS: Applying genomic and pan-genomic analyses to the reclassification of other bacterial species or genera will be important in the future of medical microbiology. The pan-genome is one of many new innovative tools in bacterial taxonomy.

REVIEWERS: This article was reviewed by William Martin, Eric Bapteste and James Mcinerney.

OPEN PEER REVIEW: Reviewed by William Martin, Eric Bapteste and James Mcinerney.}, } @article {pmid30801025, year = {2019}, author = {Entwistle, S and Li, X and Yin, Y}, title = {Orphan Genes Shared by Pathogenic Genomes Are More Associated with Bacterial Pathogenicity.}, journal = {mSystems}, volume = {4}, number = {1}, pages = {}, pmid = {30801025}, issn = {2379-5077}, support = {R15 GM114706/GM/NIGMS NIH HHS/United States ; }, abstract = {Orphan genes (also known as ORFans [i.e., orphan open reading frames]) are new genes that enable an organism to adapt to its specific living environment. Our focus in this study is to compare ORFans between pathogens (P) and nonpathogens (NP) of the same genus. Using the pangenome idea, we have identified 130,169 ORFans in nine bacterial genera (505 genomes) and classified these ORFans into four groups: (i) SS-ORFans (P), which are only found in a single pathogenic genome; (ii) SS-ORFans (NP), which are only found in a single nonpathogenic genome; (iii) PS-ORFans (P), which are found in multiple pathogenic genomes; and (iv) NS-ORFans (NP), which are found in multiple nonpathogenic genomes. Within the same genus, pathogens do not always have more genes, more ORFans, or more pathogenicity-related genes (PRGs)-including prophages, pathogenicity islands (PAIs), virulence factors (VFs), and horizontal gene transfers (HGTs)-than nonpathogens. Interestingly, in pathogens of the nine genera, the percentages of PS-ORFans are consistently higher than those of SS-ORFans, which is not true in nonpathogens. Similarly, in pathogens of the nine genera, the percentages of PS-ORFans matching the four types of PRGs are also always higher than those of SS-ORFans, but this is not true in nonpathogens. All of these findings suggest the greater importance of PS-ORFans for bacterial pathogenicity. IMPORTANCE Recent pangenome analyses of numerous bacterial species have suggested that each genome of a single species may have a significant fraction of its gene content unique or shared by a very few genomes (i.e., ORFans). We selected nine bacterial genera, each containing at least five pathogenic and five nonpathogenic genomes, to compare their ORFans in relation to pathogenicity-related genes. Pathogens in these genera are known to cause a number of common and devastating human diseases such as pneumonia, diphtheria, melioidosis, and tuberculosis. Thus, they are worthy of in-depth systems microbiology investigations, including the comparative study of ORFans between pathogens and nonpathogens. We provide direct evidence to suggest that ORFans shared by more pathogens are more associated with pathogenicity-related genes and thus are more important targets for development of new diagnostic markers or therapeutic drugs for bacterial infectious diseases.}, } @article {pmid30783197, year = {2019}, author = {Lin, JN and Lai, CH and Yang, CH and Huang, YH and Lin, HH}, title = {Genomic Features, Comparative Genomics, and Antimicrobial Susceptibility Patterns of Elizabethkingia bruuniana.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {2267}, pmid = {30783197}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial/drug effects/*genetics ; Flavobacteriaceae/*genetics/metabolism ; *Genome, Bacterial ; }, abstract = {Elizabethkingia bruuniana is a novel species of the Elizabethkingia genus. There is scant information on this microorganism. Here, we report the whole-genome features and antimicrobial susceptibility patterns of E. bruuniana strain EM798-26. Elizabethkingia strain EM798-26 was initially identified as E. miricola. This isolate contained a circular genome of 4,393,011 bp. The whole-genome sequence-based phylogeny revealed that Elizabethkingia strain EM798-26 was in the same group of the type strain E. bruuniana G0146T. Both in silico DNA-DNA hybridization and average nucleotide identity analysis clearly demonstrated that Elizabethkingia strain EM798-26 was a species of E. bruuniana. The pan-genome analysis identified 2,875 gene families in the core genome and 5,199 gene families in the pan genome of eight publicly available E. bruuniana genome sequences. The unique genes accounted for 0.2-12.1% of the pan genome in each E. bruuniana. A total of 59 potential virulence factor homologs were predicted in the whole-genome of E. bruuniana strain EM798-26. This isolate was nonsusceptible to multiple antibiotics, but susceptible to aminoglycosides, minocycline, and levofloxacin. The whole-genome sequence analysis of E. bruuniana EM798-26 revealed 29 homologs of antibiotic resistance-related genes. This study presents the genomic features of E. bruuniana. Knowledge of the genomic characteristics provides valuable insights into a novel species.}, } @article {pmid30782660, year = {2019}, author = {López-Pérez, M and Jayakumar, JM and Haro-Moreno, JM and Zaragoza-Solas, A and Reddi, G and Rodriguez-Valera, F and Shapiro, OH and Alam, M and Almagro-Moreno, S}, title = {Evolutionary Model of Cluster Divergence of the Emergent Marine Pathogen Vibrio vulnificus: From Genotype to Ecotype.}, journal = {mBio}, volume = {10}, number = {1}, pages = {}, pmid = {30782660}, issn = {2150-7511}, mesh = {Aquaculture ; Aquatic Organisms/microbiology ; Cluster Analysis ; Computational Biology ; *Ecotype ; Evolution, Molecular ; Gene Flow ; Gene Transfer, Horizontal ; *Genetic Variation ; Genome, Bacterial ; *Genotype ; Phenotype ; Recombination, Genetic ; Vibrio vulnificus/*classification/*genetics/isolation & purification/physiology ; }, abstract = {Vibrio vulnificus, an opportunistic pathogen, is the causative agent of a life-threatening septicemia and a rising problem for aquaculture worldwide. The genetic factors that differentiate its clinical and environmental strains remain enigmatic. Furthermore, clinical strains have emerged from every clade of V. vulnificus In this work, we investigated the underlying genomic properties and population dynamics of the V. vulnificus species from an evolutionary and ecological point of view. Genome comparisons and bioinformatic analyses of 113 V. vulnificus isolates indicate that the population of V. vulnificus is made up of four different clusters. We found evidence that recombination and gene flow between the two largest clusters (cluster 1 [C1] and C2) have drastically decreased to the point where they are diverging independently. Pangenome and phenotypic analyses showed two markedly different lifestyles for these two clusters, indicating commensal (C2) and bloomer (C1) ecotypes, with differences in carbohydrate utilization, defense systems, and chemotaxis, among other characteristics. Nonetheless, we identified frequent intra- and interspecies exchange of mobile genetic elements (e.g., antibiotic resistance plasmids, novel "chromids," or two different and concurrent type VI secretion systems) that provide high levels of genetic diversity in the population. Surprisingly, we identified strains from both clusters in the mucosa of aquaculture species, indicating that manmade niches are bringing strains from the two clusters together. We propose an evolutionary model of V. vulnificus that could be broadly applicable to other pathogenic vibrios and facultative bacterial pathogens to pursue strategies to prevent their infections and emergence.IMPORTANCEVibrio vulnificus is an emergent marine pathogen and is the cause of a deadly septicemia. However, the genetic factors that differentiate its clinical and environmental strains and its several biotypes remain mostly enigmatic. In this work, we investigated the underlying genomic properties and population dynamics of the V. vulnificus species to elucidate the traits that make these strains emerge as a human pathogen. The acquisition of different ecological determinants could have allowed the development of highly divergent clusters with different lifestyles within the same environment. However, we identified strains from both clusters in the mucosa of aquaculture species, indicating that manmade niches are bringing strains from the two clusters together, posing a potential risk of recombination and of emergence of novel variants. We propose a new evolutionary model that provides a perspective that could be broadly applicable to other pathogenic vibrios and facultative bacterial pathogens to pursue strategies to prevent their infections.}, } @article {pmid30781742, year = {2019}, author = {Issa, E and Salloum, T and Panossian, B and Ayoub, D and Abboud, E and Tokajian, S}, title = {Genome Mining and Comparative Analysis of Streptococcus intermedius Causing Brain Abscess in a Child.}, journal = {Pathogens (Basel, Switzerland)}, volume = {8}, number = {1}, pages = {}, pmid = {30781742}, issn = {2076-0817}, abstract = {Streptococcus intermedius (SI) is associated with prolonged hospitalization and low survival rates. The genetic mechanisms involved in brain abscess development and genome evolution in comparison to other members of the Streptococcus anginosus group are understudied. We performed a whole-genome comparative analysis of an SI isolate, LAU_SINT, associated with brain abscess following sinusitis with all SI genomes in addition to S. constellatus and S. anginosus. Selective pressure on virulence factors, phages, pan-genome evolution and single-nucleotide polymorphism analysis were assessed. The structural details of the type seven secretion system (T7SS) was elucidated and compared with different organisms. ily and nanA were both abundant and conserved. Nisin resistance determinants were found in 47% of the isolates. Pan-genome and SNPs-based analysis didn't reveal significant geo-patterns. Our results showed that two SC isolates were misidentified as SI. We propose the presence of four T7SS modules (I⁻IV) located on various genomic islands. We detected a variety of factors linked to metal ions binding on the GIs carrying T7SS. This is the first detailed report characterizing the T7SS and its link to nisin resistance and metal ions binding in SI. These and yet uncharacterized T7SS transmembrane proteins merit further studies and could represent potential therapeutic targets.}, } @article {pmid30779737, year = {2019}, author = {Grytten, I and Rand, KD and Nederbragt, AJ and Storvik, GO and Glad, IK and Sandve, GK}, title = {Graph Peak Caller: Calling ChIP-seq peaks on graph-based reference genomes.}, journal = {PLoS computational biology}, volume = {15}, number = {2}, pages = {e1006731}, pmid = {30779737}, issn = {1553-7358}, mesh = {Algorithms ; Arabidopsis/genetics ; Chromatin Immunoprecipitation/*methods ; Genome/genetics ; Genomics/*methods ; Protein Binding ; Sequence Analysis, DNA/*methods ; Software ; Transcription Factors ; }, abstract = {Graph-based representations are considered to be the future for reference genomes, as they allow integrated representation of the steadily increasing data on individual variation. Currently available tools allow de novo assembly of graph-based reference genomes, alignment of new read sets to the graph representation as well as certain analyses like variant calling and haplotyping. We here present a first method for calling ChIP-Seq peaks on read data aligned to a graph-based reference genome. The method is a graph generalization of the peak caller MACS2, and is implemented in an open source tool, Graph Peak Caller. By using the existing tool vg to build a pan-genome of Arabidopsis thaliana, we validate our approach by showing that Graph Peak Caller with a pan-genome reference graph can trace variants within peaks that are not part of the linear reference genome, and find peaks that in general are more motif-enriched than those found by MACS2.}, } @article {pmid30775379, year = {2019}, author = {Lu, QF and Cao, DM and Su, LL and Li, SB and Ye, GB and Zhu, XY and Wang, JP}, title = {Genus-Wide Comparative Genomics Analysis of Neisseria to Identify New Genes Associated with Pathogenicity and Niche Adaptation of Neisseria Pathogens.}, journal = {International journal of genomics}, volume = {2019}, number = {}, pages = {6015730}, pmid = {30775379}, issn = {2314-436X}, abstract = {N. gonorrhoeae and N. meningitidis, the only two human pathogens of Neisseria, are closely related species. But the niches they survived in and their pathogenic characteristics are distinctly different. However, the genetic basis of these differences has not yet been fully elucidated. In this study, comparative genomics analysis was performed based on 15 N. gonorrhoeae, 75 N. meningitidis, and 7 nonpathogenic Neisseria genomes. Core-pangenome analysis found 1111 conserved gene families among them, and each of these species groups had opening pangenome. We found that 452, 78, and 319 gene families were unique in N. gonorrhoeae, N. meningitidis, and both of them, respectively. Those unique gene families were regarded as candidates that related to their pathogenicity and niche adaptation. The relationships among them have been partly verified by functional annotation analysis. But at least one-third genes for each gene set have not found the certain functional information. Simple sequence repeat (SSR), the basis of gene phase variation, was found abundant in the membrane or related genes of each unique gene set, which may facilitate their adaptation to variable host environments. Protein-protein interaction (PPI) analysis found at least five distinct PPI clusters in N. gonorrhoeae and four in N. meningitides, and 167 and 52 proteins with unknown function were contained within them, respectively.}, } @article {pmid30760727, year = {2019}, author = {Stevens, MJA and Tasara, T and Klumpp, J and Stephan, R and Ehling-Schulz, M and Johler, S}, title = {Whole-genome-based phylogeny of Bacillus cytotoxicus reveals different clades within the species and provides clues on ecology and evolution.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {1984}, pmid = {30760727}, issn = {2045-2322}, mesh = {Bacillus/*classification/*genetics ; Bacterial Proteins/genetics ; Bacterial Toxins/genetics ; Food Microbiology ; Food Safety ; Foodborne Diseases/*microbiology ; Gastrointestinal Diseases/*microbiology ; Genome, Bacterial/*genetics ; Phylogeny ; Plasmids/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Bacillus cytotoxicus is a member of the Bacillus cereus group linked to fatal cases of diarrheal disease. Information on B. cytotoxicus is very limited; in particular comprehensive genomic data is lacking. Thus, we applied a genomic approach to characterize B. cytotoxicus and decipher its population structure. To this end, complete genomes of ten B. cytotoxicus were sequenced and compared to the four publicly available full B. cytotoxicus genomes and genomes of other B. cereus group members. Average nucleotide identity, core genome, and pan genome clustering resulted in clear distinction of B. cytotoxicus strains from other strains of the B. cereus group. Genomic content analyses showed that a hydroxyphenylalanine operon is present in B. cytotoxicus, but absent in all other members of the B. cereus group. It enables degradation of aromatic compounds to succinate and pyruvate and was likely acquired from another Bacillus species. It allows for utilization of tyrosine and might have given a B. cytotoxicus ancestor an evolutionary advantage resulting in species differentiation. Plasmid content showed that B. cytotoxicus is flexible in exchanging genes, allowing for quick adaptation to the environment. Genome-based phylogenetic analyses divided the B. cytotoxicus strains into four clades that also differed in virulence gene content.}, } @article {pmid30745056, year = {2019}, author = {Lye, ZN and Purugganan, MD}, title = {Copy Number Variation in Domestication.}, journal = {Trends in plant science}, volume = {24}, number = {4}, pages = {352-365}, doi = {10.1016/j.tplants.2019.01.003}, pmid = {30745056}, issn = {1878-4372}, mesh = {Animals ; *DNA Copy Number Variations ; *Domestication ; Phenotype ; Plants ; }, abstract = {Domesticated plants have long served as excellent models for studying evolution. Many genes and mutations underlying important domestication traits have been identified, and most causal mutations appear to be SNPs. Copy number variation (CNV) is an important source of genetic variation that has been largely neglected in studies of domestication. Ongoing work demonstrates the importance of CNVs as a source of genetic variation during domestication, and during the diversification of domesticated taxa. Here, we review how CNVs contribute to evolutionary processes underlying domestication, and review examples of domestication traits caused by CNVs. We draw from examples in plant species, but also highlight cases in animal systems that could illuminate the roles of CNVs in the domestication process.}, } @article {pmid30718880, year = {2019}, author = {Arora, S and Steuernagel, B and Gaurav, K and Chandramohan, S and Long, Y and Matny, O and Johnson, R and Enk, J and Periyannan, S and Singh, N and Asyraf Md Hatta, M and Athiyannan, N and Cheema, J and Yu, G and Kangara, N and Ghosh, S and Szabo, LJ and Poland, J and Bariana, H and Jones, JDG and Bentley, AR and Ayliffe, M and Olson, E and Xu, SS and Steffenson, BJ and Lagudah, E and Wulff, BBH}, title = {Resistance gene cloning from a wild crop relative by sequence capture and association genetics.}, journal = {Nature biotechnology}, volume = {37}, number = {2}, pages = {139-143}, doi = {10.1038/s41587-018-0007-9}, pmid = {30718880}, issn = {1546-1696}, support = {BB/E006868/1//Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Chromosome Mapping ; *Cloning, Molecular ; Crops, Agricultural/*genetics ; Disease Resistance/*genetics ; *Genes, Plant ; Genetic Association Studies ; Genetic Variation ; Genomics ; Genotype ; Models, Genetic ; Phenotype ; Phylogeny ; Plant Diseases/*genetics ; Polymorphism, Single Nucleotide ; Seedlings ; Triticum/genetics ; }, abstract = {Disease resistance (R) genes from wild relatives could be used to engineer broad-spectrum resistance in domesticated crops. We combined association genetics with R gene enrichment sequencing (AgRenSeq) to exploit pan-genome variation in wild diploid wheat and rapidly clone four stem rust resistance genes. AgRenSeq enables R gene cloning in any crop that has a diverse germplasm panel.}, } @article {pmid30718868, year = {2019}, author = {Zou, Y and Xue, W and Luo, G and Deng, Z and Qin, P and Guo, R and Sun, H and Xia, Y and Liang, S and Dai, Y and Wan, D and Jiang, R and Su, L and Feng, Q and Jie, Z and Guo, T and Xia, Z and Liu, C and Yu, J and Lin, Y and Tang, S and Huo, G and Xu, X and Hou, Y and Liu, X and Wang, J and Yang, H and Kristiansen, K and Li, J and Jia, H and Xiao, L}, title = {1,520 reference genomes from cultivated human gut bacteria enable functional microbiome analyses.}, journal = {Nature biotechnology}, volume = {37}, number = {2}, pages = {179-185}, doi = {10.1038/s41587-018-0008-8}, pmid = {30718868}, issn = {1546-1696}, mesh = {Bacteria/classification ; Cluster Analysis ; Computational Biology/*methods ; Conserved Sequence ; Feces ; *Gastrointestinal Microbiome ; Genome, Bacterial ; Genomics ; Humans ; *Metagenome ; Metagenomics ; Phylogeny ; Polymorphism, Single Nucleotide ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Reference genomes are essential for metagenomic analyses and functional characterization of the human gut microbiota. We present the Culturable Genome Reference (CGR), a collection of 1,520 nonredundant, high-quality draft genomes generated from >6,000 bacteria cultivated from fecal samples of healthy humans. Of the 1,520 genomes, which were chosen to cover all major bacterial phyla and genera in the human gut, 264 are not represented in existing reference genome catalogs. We show that this increase in the number of reference bacterial genomes improves the rate of mapping metagenomic sequencing reads from 50% to >70%, enabling higher-resolution descriptions of the human gut microbiome. We use the CGR genomes to annotate functions of 338 bacterial species, showing the utility of this resource for functional studies. We also carry out a pan-genome analysis of 38 important human gut species, which reveals the diversity and specificity of functional enrichment between their core and dispensable genomes.}, } @article {pmid30714895, year = {2019}, author = {McCarthy, CGP and Fitzpatrick, DA}, title = {Pan-genome analyses of model fungal species.}, journal = {Microbial genomics}, volume = {5}, number = {2}, pages = {}, doi = {10.1099/mgen.0.000243}, pmid = {30714895}, issn = {2057-5858}, mesh = {Evolution, Molecular ; Fungi/*classification/genetics ; *Genome, Fungal ; Genome-Wide Association Study ; Genomics ; Phylogeny ; }, abstract = {The concept of the species 'pan-genome', the union of 'core' conserved genes and all 'accessory' non-conserved genes across all strains of a species, was first proposed in prokaryotes to account for intraspecific variability. Species pan-genomes have been extensively studied in prokaryotes, but evidence of species pan-genomes has also been demonstrated in eukaryotes such as plants and fungi. Using a previously published methodology based on sequence homology and conserved microsynteny, in addition to bespoke pipelines, we have investigated the pan-genomes of four model fungal species: Saccharomyces cerevisiae, Candida albicans, Cryptococcus neoformans var. grubii and Aspergillus fumigatus. Between 80 and 90 % of gene models per strain in each of these species are core genes that are highly conserved across all strains of that species, many of which are involved in housekeeping and conserved survival processes. In many of these species, the remaining 'accessory' gene models are clustered within subterminal regions and may be involved in pathogenesis and antimicrobial resistance. Analysis of the ancestry of species core and accessory genomes suggests that fungal pan-genomes evolve by strain-level innovations such as gene duplication as opposed to wide-scale horizontal gene transfer. Our findings lend further supporting evidence to the existence of species pan-genomes in eukaryote taxa.}, } @article {pmid30709821, year = {2019}, author = {Lugli, GA and Mancino, W and Milani, C and Duranti, S and Mancabelli, L and Napoli, S and Mangifesta, M and Viappiani, A and Anzalone, R and Longhi, G and van Sinderen, D and Ventura, M and Turroni, F}, title = {Dissecting the Evolutionary Development of the Species Bifidobacterium animalis through Comparative Genomics Analyses.}, journal = {Applied and environmental microbiology}, volume = {85}, number = {7}, pages = {}, pmid = {30709821}, issn = {1098-5336}, mesh = {Animals ; Bifidobacterium/genetics ; Bifidobacterium animalis/enzymology/*genetics/metabolism ; Birds ; Carbohydrate Metabolism ; Carbohydrates ; *Comparative Genomic Hybridization ; *Evolution, Molecular ; Feces/microbiology ; Gastrointestinal Microbiome ; Gastrointestinal Tract/microbiology ; Genes, Bacterial/*genetics ; Genetic Variation ; Genome, Bacterial/genetics ; Humans ; Mammals ; *Phylogeny ; Polysaccharides ; Species Specificity ; }, abstract = {Bifidobacteria are members of the gut microbiota of animals, including mammals, birds, and social insects. In this study, we analyzed and determined the pangenome of Bifidobacterium animalis species, encompassing B. animalis subsp. animalis and the B. animalis subsp. lactis taxon, which is one of the most intensely exploited probiotic bifidobacterial species. In order to reveal differences within the B. animalis species, detailed comparative genomics and phylogenomics analyses were performed, indicating that these two subspecies recently arose through divergent evolutionary events. A subspecies-specific core genome was identified for both B. animalis subspecies, revealing the existence of subspecies-defining genes involved in carbohydrate metabolism. Notably, these in silico analyses coupled with carbohydrate profiling assays suggest genetic adaptations toward a distinct glycan milieu for each member of the B. animalis subspecies, resulting in a divergent evolutionary development of the two subspecies.IMPORTANCE The majority of characterized B. animalis strains have been isolated from human fecal samples. In order to explore genome variability within this species, we isolated 15 novel strains from the gastrointestinal tracts of different animals, including mammals and birds. The present study allowed us to reconstruct the pangenome of this taxon, including the genome contents of 56 B. animalis strains. Through careful assessment of subspecies-specific core genes of the B. animalis subsp. animalis/lactis taxon, we identified genes encoding enzymes involved in carbohydrate transport and metabolism, while unveiling specific gene acquisition and loss events that caused the evolutionary emergence of these two subspecies.}, } @article {pmid30704472, year = {2019}, author = {Nono, AD and Chen, K and Liu, X}, title = {Comparison of different functional prediction scores using a gene-based permutation model for identifying cancer driver genes.}, journal = {BMC medical genomics}, volume = {12}, number = {Suppl 1}, pages = {22}, pmid = {30704472}, issn = {1755-8794}, support = {P30 CA016672/CA/NCI NIH HHS/United States ; }, mesh = {Genes, Neoplasm/*genetics ; Genomics/*methods ; Humans ; *Models, Genetic ; }, abstract = {BACKGROUND: Identifying cancer driver genes (CDG) is a crucial step in cancer genomic toward the advancement of precision medicine. However, driver gene discovery is a very challenging task because we are not only dealing with huge amount of data; but we are also faced with the complexity of the disease including the heterogeneity of background somatic mutation rate in each cancer patient. It is generally accepted that CDG harbor variants conferring growth advantage in the malignant cell and they are positively selected, which are critical to cancer development; whereas, non-driver genes harbor random mutations with no functional consequence on cancer. Based on this fact, function prediction based approaches for identifying CDG have been proposed to interrogate the distribution of functional predictions among mutations in cancer genomes (eLS 1-16, 2016). Assuming most of the observed mutations are passenger mutations and given the quantitative predictions for the functional impact of the mutations, genes enriched of functional or deleterious mutations are more likely to be drivers. The promises of these methods have been continually refined and can therefore be applied to increase accuracy in detecting new candidate CDGs. However, current function prediction based approaches only focus on coding mutations and lack a systematic way to pick the best mutation deleteriousness prediction algorithms for usage.

RESULTS: In this study, we propose a new function prediction based approach to discover CDGs through a gene-based permutation approach. Our method not only covers both coding and non-coding regions of the genes; but it also accounts for the heterogeneous mutational context in cohort of cancer patients. The permutation model was implemented independently using seven popular deleteriousness prediction scores covering splicing regions (SPIDEX), coding regions (MetaLR, and VEST3) and pan-genome (CADD, DANN, Fathmm-MKL coding and Fathmm-MKL noncoding). We applied this new approach to somatic single nucleotide variants (SNVs) from whole-genome sequences of 119 breast and 24 lung cancer patients and compared the seven deleteriousness prediction scores for their performance in this study.

CONCLUSION: The new function prediction based approach not only predicted known cancer genes listed in the Cancer Gene Census (CGC), but also new candidate CDGs that are worth further investigation. The results showed the advantage of utilizing pan-genome deleteriousness prediction scores in function prediction based methods. Although VEST3 score, a deleteriousness prediction score for missense mutations, has the best performance in breast cancer, it was topped by CADD and Fathmm-MKL coding, two pan-genome deleteriousness prediction scores, in lung cancer.}, } @article {pmid30701191, year = {2019}, author = {Leviatan, S and Segal, E}, title = {A Significant Expansion of Our Understanding of the Composition of the Human Microbiome.}, journal = {mSystems}, volume = {4}, number = {1}, pages = {}, pmid = {30701191}, issn = {2379-5077}, abstract = {Shotgun sequencing of samples taken from the human microbiome often reveals only partial mapping of the sequenced metagenomic reads to existing reference genomes. Such partial mappability indicates that many genomes are missing in our reference genome set. This is particularly true for non-Western populations and for samples that do not originate from the gut. Pasolli et al. (E. Pasolli, F. Asnicar, S. Manara, M. Zolfo, et al., Cell, 2019, https://doi.org/10.1016/j.cell.2019.01.001) perform a grand effort to expand the reference set, and to better classify its members, revealing a wider pangenome of existing species as well as identifying new species of previously unknown taxonomic branches.}, } @article {pmid30687297, year = {2018}, author = {Sánchez-Osuna, M and Cortés, P and Barbé, J and Erill, I}, title = {Origin of the Mobile Di-Hydro-Pteroate Synthase Gene Determining Sulfonamide Resistance in Clinical Isolates.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3332}, pmid = {30687297}, issn = {1664-302X}, abstract = {Sulfonamides are synthetic chemotherapeutic agents that work as competitive inhibitors of the di-hydro-pteroate synthase (DHPS) enzyme, encoded by the folP gene. Resistance to sulfonamides is widespread in the clinical setting and predominantly mediated by plasmid- and integron-borne sul1-3 genes encoding mutant DHPS enzymes that do not bind sulfonamides. In spite of their clinical importance, the genetic origin of sul1-3 genes remains unknown. Here we analyze sul genes and their genetic neighborhoods to uncover sul signature elements that enable the elucidation of their genetic origin. We identify a protein sequence Sul motif associated with sul-encoded proteins, as well as consistent association of a phosphoglucosamine mutase gene (glmM) with the sul2 gene. We identify chromosomal folP genes bearing these genetic markers in two bacterial families: the Rhodobiaceae and the Leptospiraceae. Bayesian phylogenetic inference of FolP/Sul and GlmM protein sequences clearly establishes that sul1-2 and sul3 genes originated as a mobilization of folP genes present in, respectively, the Rhodobiaceae and the Leptospiraceae, and indicate that the Rhodobiaceae folP gene was transferred from the Leptospiraceae. Analysis of %GC content in folP/sul gene sequences supports the phylogenetic inference results and indicates that the emergence of the Sul motif in chromosomally encoded FolP proteins is ancient and considerably predates the clinical introduction of sulfonamides. In vitro assays reveal that both the Rhodobiaceae and the Leptospiraceae, but not other related chromosomally encoded FolP proteins confer resistance in a sulfonamide-sensitive Escherichia coli background, indicating that the Sul motif is associated with sulfonamide resistance. Given the absence of any known natural sulfonamides targeting DHPS, these results provide a novel perspective on the emergence of resistance to synthetic chemotherapeutic agents, whereby preexisting resistant variants in the vast bacterial pangenome may be rapidly selected for and disseminated upon the clinical introduction of novel chemotherapeuticals.}, } @article {pmid30687252, year = {2018}, author = {Slama, HB and Cherif-Silini, H and Chenari Bouket, A and Qader, M and Silini, A and Yahiaoui, B and Alenezi, FN and Luptakova, L and Triki, MA and Vallat, A and Oszako, T and Rateb, ME and Belbahri, L}, title = {Screening for Fusarium Antagonistic Bacteria From Contrasting Niches Designated the Endophyte Bacillus halotolerans as Plant Warden Against Fusarium.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3236}, pmid = {30687252}, issn = {1664-302X}, abstract = {Date palm (Phoenix dactylifera L.) plantations in North Africa are nowadays threatened with the spread of the Bayoud disease caused by Fusarium oxysporum f. sp. albedinis, already responsible for destroying date production in other infected areas, mainly in Morocco. Biological control holds great promise for sustainable and environmental-friendly management of the disease. In this study, the additional benefits to agricultural ecosystems of using plant growth promoting rhizobacteria (PGPR) or endophytes are addressed. First, PGPR or endophytes can offer an interesting bio-fertilization, meaning that it can add another layer to the sustainability of the approach. Additionally, screening of contrasting niches can yield bacterial actors that could represent wardens against whole genera or groups of plant pathogenic agents thriving in semi-arid to arid ecosystems. Using this strategy, we recovered four bacterial isolates, designated BFOA1, BFOA2, BFOA3 and BFOA4, that proved very active against F. oxysporum f. sp. albedinis. BFOA1-BFOA4 proved also active against 16 Fusarium isolates belonging to four species: F. oxysporum (with strains phytopathogenic of Olea europaea and tomato), F. solani (with different strains attacking O. europaea and potato), F. acuminatum (pathogenic on O. europaea) and F. chlamydosporum (phytopathogenic of O. europaea). BFOA1-BFOA4 bacterial isolates exhibited strong activities against another four major phytopathogens: Botrytis cinerea, Alternaria alternata, Phytophthora infestans, and Rhizoctonia bataticola. Isolates BFOA1-BFOA4 had the ability to grow at temperatures up to 35°C, pH range of 5-10, and tolerate high concentrations of NaCl and up to 30% PEG. The isolates also showed relevant direct and indirect PGP features, including growth on nitrogen-free medium, phosphate solubilization and auxin biosynthesis, as well as resistance to metal and xenobiotic stress. Phylogenomic analysis of BFOA1-BFOA4 isolates indicated that they all belong to Bacillus halotolerans, which could therefore considered as a warden against Fusarium infection in plants. Comparative genomics allowed us to functionally describe the open pan genome of B. halotolerans and LC-HRMS and GCMS analyses, enabling the description of diverse secondary metabolites including pulegone, 2-undecanone, and germacrene D, with important antimicrobial and insecticidal properties. In conclusion, B. halotolerans could be used as an efficient bio-fertilizer and bio-control agent in semi-arid and arid ecosystems.}, } @article {pmid30679463, year = {2019}, author = {Arabaghian, H and Salloum, T and Alousi, S and Panossian, B and Araj, GF and Tokajian, S}, title = {Molecular Characterization of Carbapenem Resistant Klebsiella pneumoniae and Klebsiella quasipneumoniae Isolated from Lebanon.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {531}, pmid = {30679463}, issn = {2045-2322}, mesh = {Carbapenems/*pharmacology ; Genome, Bacterial ; Humans ; Klebsiella/drug effects/*genetics ; Klebsiella Infections/drug therapy/epidemiology/*microbiology ; Klebsiella pneumoniae/drug effects/*genetics ; Lebanon/epidemiology ; Multilocus Sequence Typing ; *beta-Lactam Resistance ; }, abstract = {Klebsiella pneumoniae is a Gram-negative organism and a major public health threat. In this study, we used whole-genome sequences to characterize 32 carbapenem-resistant K. pneumoniae (CRKP) and two carbapenem-resistant K. quasipneumoniae (CRKQ). Antimicrobial resistance was assessed using disk diffusion and E-test, while virulence was assessed in silico. The capsule type was determined by sequencing the wzi gene. The plasmid diversity was assessed by PCR-based replicon typing to detect the plasmid incompatibility (Inc) groups. The genetic relatedness was determined by multilocus sequence typing, pan-genome, and recombination analysis. All of the isolates were resistant to ertapenem together with imipenem and/or meropenem. Phenotypic resistance was due to blaOXA-48, blaNDM-1, blaNDM-7, or the coupling of ESBLs and outer membrane porin modifications. This is the first comprehensive study reporting on the WGS of CRKP and the first detection of CRKQ in the region. The presence and dissemination of CRKP and CRKQ, with some additionally having characteristics of hypervirulent clones such as the hypermucoviscous phenotype and the capsular type K2, are particularly concerning. Additionally, mining the completely sequenced K. pneumoniae genomes revealed the key roles of mobile genetic elements in the spread of antibiotic resistance and in understanding the epidemiology of these clinically significant pathogens.}, } @article {pmid30658579, year = {2019}, author = {Zhu, D and He, J and Yang, Z and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Liu, Y and Zhang, L and Yu, Y and You, Y and Chen, X and Cheng, A}, title = {Comparative analysis reveals the Genomic Islands in Pasteurella multocida population genetics: on Symbiosis and adaptability.}, journal = {BMC genomics}, volume = {20}, number = {1}, pages = {63}, pmid = {30658579}, issn = {1471-2164}, support = {No. 2017YFD050080//the National Key Research and Development Program of China/ ; No. CARS-42-17//China Agricultural Research System/ ; No. 2016JPT0004//Special Fund for Key Laboratory of Animal Disease and Human Health of Sichuan Province/ ; CARS-SVDIP//Sichuan Veterinary Medicine and Drug Innovation Group of China Agricultural Research System/ ; }, mesh = {Animals ; Genes, Bacterial/genetics ; Genetics, Population ; Genome, Bacterial/*genetics ; Genomic Islands/*genetics ; Genomics/methods ; Humans ; Pasteurella Infections/microbiology ; Pasteurella multocida/classification/*genetics/physiology ; Phylogeny ; Species Specificity ; Symbiosis/*genetics ; }, abstract = {BACKGROUND: Pasteurella multocida (P. multocida) is a widespread opportunistic pathogen that infects human and various animals. Genomic Islands (GIs) are one of the most important mobile components that quickly help bacteria acquire large fragments of foreign genes. However, the effects of GIs on P. multocida are unknown in the evolution of bacterial populations.

RESULTS: Ten avian-sourced P. multocida obtained through high-throughput sequencing together with 104 publicly available P. multocida genomes were used to analyse their population genetics, thus constructed a pan-genome containing 3948 protein-coding genes. Through the pan-genome, the open evolutionary pattern of P. multocida was revealed, and the functional components of 944 core genes, 2439 accessory genes and 565 unique genes were analysed. In addition, a total of 280 GIs were predicted in all strains. Combined with the pan-genome of P. multocida, the GIs accounted for 5.8% of the core genes in the pan-genome, mainly related to functional metabolic activities; the accessory genes accounted for 42.3%, mainly for the enrichment of adaptive genes; and the unique genes accounted for 35.4%, containing some defence mechanism-related genes.

CONCLUSIONS: The effects of GIs on the population genetics of P. multocida evolution and adaptation to the environment are reflected by the proportion and function of the pan-genome acquired from GIs, and the large quantities of GI data will aid in additional population genetics studies.}, } @article {pmid30657445, year = {2019}, author = {Wilson, K and Ely, B}, title = {Analyses of four new Caulobacter Phicbkviruses indicate independent lineages.}, journal = {The Journal of general virology}, volume = {100}, number = {2}, pages = {321-331}, doi = {10.1099/jgv.0.001218}, pmid = {30657445}, issn = {1465-2099}, support = {R25 GM076277/GM/NIGMS NIH HHS/United States ; }, mesh = {Bacteriophages/*classification/genetics/*isolation & purification ; Caulobacter/*virology ; Evolution, Molecular ; Gene Order ; Gene Transfer, Horizontal ; Genes, Viral ; *Genome, Viral ; Genomics ; Phylogeny ; Sequence Homology, Nucleic Acid ; }, abstract = {Bacteriophages with genomes larger than 200 kbp are considered giant phages, and the giant Phicbkviruses are the most frequently isolated Caulobacter crescentus phages. In this study, we compare six bacteriophage genomes that differ from the genomes of the majority of Phicbkviruses. Four of these genomes are much larger than those of the rest of the Phicbkviruses, with genome sizes that are more than 250 kbp. A comparison of 16 Phicbkvirus genomes identified a 'core genome' of 69 genes that is present in all of these Phicbkvirus genomes, as well as shared accessory genes and genes that are unique for each phage. Most of the core genes are clustered into the regions coding for structural proteins or those involved in DNA replication. A phylogenetic analysis indicated that these 16 CaulobacterPhicbkvirus genomes are related, but they represent four distinct branches of the Phicbkvirus genomic tree with distantly related branches sharing little nucleotide homology. In contrast, pairwise comparisons within each branch of the phylogenetic tree showed that more than 80 % of the entire genome is shared among phages within a group. This conservation of the genomes within each branch indicates that horizontal gene transfer events between the groups are rare. Therefore, the Phicbkvirus genus consists of at least four different phylogenetic branches that are evolving independently from one another. One of these branches contains a 27-gene inversion relative to the other three branches. Also, an analysis of the tRNA genes showed that they are relatively mobile within the Phicbkvirus genus.}, } @article {pmid30647471, year = {2019}, author = {Sherman, RM and Forman, J and Antonescu, V and Puiu, D and Daya, M and Rafaels, N and Boorgula, MP and Chavan, S and Vergara, C and Ortega, VE and Levin, AM and Eng, C and Yazdanbakhsh, M and Wilson, JG and Marrugo, J and Lange, LA and Williams, LK and Watson, H and Ware, LB and Olopade, CO and Olopade, O and Oliveira, RR and Ober, C and Nicolae, DL and Meyers, DA and Mayorga, A and Knight-Madden, J and Hartert, T and Hansel, NN and Foreman, MG and Ford, JG and Faruque, MU and Dunston, GM and Caraballo, L and Burchard, EG and Bleecker, ER and Araujo, MI and Herrera-Paz, EF and Campbell, M and Foster, C and Taub, MA and Beaty, TH and Ruczinski, I and Mathias, RA and Barnes, KC and Salzberg, SL}, title = {Author Correction: Assembly of a pan-genome from deep sequencing of 910 humans of African descent.}, journal = {Nature genetics}, volume = {51}, number = {2}, pages = {364}, doi = {10.1038/s41588-018-0335-1}, pmid = {30647471}, issn = {1546-1718}, abstract = {In the version of this article initially published, the statement "there are no pan-genomes for any other animal or plant species" was incorrect. The statement has been corrected to "there are no reported pan-genomes for any other animal species, to our knowledge." We thank David Edwards for bringing this error to our attention. The error has been corrected in the HTML and PDF versions of the article.}, } @article {pmid30637341, year = {2019}, author = {Blaustein, RA and McFarland, AG and Ben Maamar, S and Lopez, A and Castro-Wallace, S and Hartmann, EM}, title = {Pangenomic Approach To Understanding Microbial Adaptations within a Model Built Environment, the International Space Station, Relative to Human Hosts and Soil.}, journal = {mSystems}, volume = {4}, number = {1}, pages = {}, pmid = {30637341}, issn = {2379-5077}, support = {R25 GM079300/GM/NIGMS NIH HHS/United States ; }, abstract = {Understanding underlying mechanisms involved in microbial persistence in the built environment (BE) is essential for strategically mitigating potential health risks. To test the hypothesis that BEs impose selective pressures resulting in characteristic adaptive responses, we performed a pangenomics meta-analysis leveraging 189 genomes (accessed from GenBank) of two epidemiologically important taxa, Bacillus cereus and Staphylococcus aureus, isolated from various origins: the International Space Station (ISS; a model BE), Earth-based BEs, soil, and humans. Our objectives were to (i) identify differences in the pangenomic composition of generalist and host-associated organisms, (ii) characterize genes and functions involved in BE-associated selection, and (iii) identify genomic signatures of ISS-derived strains of potential relevance for astronaut health. The pangenome of B. cereus was more expansive than that of S. aureus, which had a dominant core component. Genomic contents of both taxa significantly correlated with isolate origin, demonstrating an importance for biogeography and potential niche adaptations. ISS/BE-enriched functions were often involved in biosynthesis, catabolism, materials transport, metabolism, and stress response. Multiple origin-enriched functions also overlapped across taxa, suggesting conserved adaptive processes. We further characterized two mobile genetic elements with local neighborhood genes encoding biosynthesis and stress response functions that distinctively associated with B. cereus from the ISS. Although antibiotic resistance genes were present in ISS/BE isolates, they were also common in counterparts elsewhere. Overall, despite differences in microbial lifestyle, some functions appear common to remaining viable in the BE, and those functions are not typically associated with direct impacts on human health. IMPORTANCE The built environment contains a variety of microorganisms, some of which pose critical human health risks (e.g., hospital-acquired infection, antibiotic resistance dissemination). We uncovered a combination of complex biological functions that may play a role in bacterial survival under the presumed selective pressures in a model built environment-the International Space Station-by using an approach to compare pangenomes of bacterial strains from two clinically relevant species (B. cereus and S. aureus) isolated from both built environments and humans. Our findings suggest that the most crucial bacterial functions involved in this potential adaptive response are specific to bacterial lifestyle and do not appear to have direct impacts on human health.}, } @article {pmid30631083, year = {2019}, author = {Abreo, E and Altier, N}, title = {Pangenome of Serratia marcescens strains from nosocomial and environmental origins reveals different populations and the links between them.}, journal = {Scientific reports}, volume = {9}, number = {1}, pages = {46}, pmid = {30631083}, issn = {2045-2322}, mesh = {Computational Biology ; Cross Infection/*microbiology ; *Environmental Microbiology ; *Genetic Variation ; Genome, Bacterial ; *Genotype ; Humans ; Multilocus Sequence Typing ; Serratia Infections/*microbiology ; Serratia marcescens/*genetics/isolation & purification ; }, abstract = {Serratia marcescens is a Gram-negative bacterial species that can be found in a wide range of environments like soil, water and plant surfaces, while it is also known as an opportunistic human pathogen in hospitals and as a plant growth promoting bacteria (PGPR) in crops. We have used a pangenome-based approach, based on publicly available genomes, to apply whole genome multilocus sequence type schemes to assess whether there is an association between source and genotype, aiming at differentiating between isolates from nosocomial sources and the environment, and between strains reported as PGPR from other environmental strains. Most genomes from a nosocomial setting and environmental origin could be assigned to the proposed nosocomial or environmental MLSTs, which is indicative of an association between source and genotype. The fact that a few genomes from a nosocomial source showed an environmental MLST suggests that a minority of nosocomial strains have recently derived from the environment. PGPR strains were assigned to different environmental types and clades but only one clade comprised strains accumulating a low number of known virulence and antibiotic resistance determinants and was exclusively from environmental sources. This clade is envisaged as a group of promissory MLSTs for selecting prospective PGPR strains.}, } @article {pmid30622973, year = {2018}, author = {Hisham, Y and Ashhab, Y}, title = {Identification of Cross-Protective Potential Antigens against Pathogenic Brucella spp. through Combining Pan-Genome Analysis with Reverse Vaccinology.}, journal = {Journal of immunology research}, volume = {2018}, number = {}, pages = {1474517}, pmid = {30622973}, issn = {2314-7156}, mesh = {Animals ; Antigens, Bacterial/genetics/*immunology ; Bacterial Vaccines/*immunology ; Brucella/pathogenicity/*physiology ; Brucellosis/*immunology ; Cattle ; Computational Biology ; Conserved Sequence/genetics ; Cross Reactions ; Epitopes/immunology ; Genome ; Humans ; Proteome ; Vaccines, Attenuated ; Vaccinology ; *Virulence Factors ; Zoonoses ; }, abstract = {Brucellosis is a zoonotic infectious disease caused by bacteria of the genus Brucella. Brucella melitensis, Brucella abortus, and Brucella suis are the most pathogenic species of this genus causing the majority of human and domestic animal brucellosis. There is a need to develop a safe and potent subunit vaccine to overcome the serious drawbacks of the live attenuated Brucella vaccines. The aim of this work was to discover antigen candidates conserved among the three pathogenic species. In this study, we employed a reverse vaccinology strategy to compute the core proteome of 90 completed genomes: 55 B. melitensis, 17 B. abortus, and 18 B. suis. The core proteome was analyzed by a metasubcellular localization prediction pipeline to identify surface-associated proteins. The identified proteins were thoroughly analyzed using various in silico tools to obtain the most potential protective antigens. The number of core proteins obtained from analyzing the 90 proteomes was 1939 proteins. The surface-associated proteins were 177. The number of potential antigens was 87; those with adhesion score ≥ 0.5 were considered antigen with "high potential," while those with a score of 0.4-0.5 were considered antigens with "intermediate potential." According to a cumulative score derived from protein antigenicity, density of MHC-I and MHC-II epitopes, MHC allele coverage, and B-cell epitope density scores, a final list of 34 potential antigens was obtained. Remarkably, most of the 34 proteins are associated with bacterial adhesion, invasion, evasion, and adaptation to the hostile intracellular environment of macrophages which is adjusted to deprive Brucella of required nutrients. Our results provide a manageable list of potential protective antigens for developing a potent vaccine against brucellosis. Moreover, our elaborated analysis can provide further insights into novel Brucella virulence factors. Our next step is to test some of these antigens using an appropriate antigen delivery system.}, } @article {pmid30619233, year = {2018}, author = {Livingstone, PG and Morphew, RM and Whitworth, DE}, title = {Genome Sequencing and Pan-Genome Analysis of 23 Corallococcus spp. Strains Reveal Unexpected Diversity, With Particular Plasticity of Predatory Gene Sets.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3187}, pmid = {30619233}, issn = {1664-302X}, abstract = {Corallococcus is an abundant genus of predatory soil myxobacteria, containing two species, C. coralloides (for which a genome sequence is available) and C. exiguus. To investigate the genomic basis of predation, we genome-sequenced 23 Corallococcus strains. Genomic similarity metrics grouped the sequenced strains into at least nine distinct genomospecies, divided between two major sub-divisions of the genus, encompassing previously described diversity. The Corallococcus pan-genome was found to be open, with strains exhibiting highly individual gene sets. On average, only 30.5% of each strain's gene set belonged to the core pan-genome, while more than 75% of the accessory pan-genome genes were present in less than four of the 24 genomes. The Corallococcus accessory pan-proteome was enriched for the COG functional category "Secondary metabolism," with each genome containing on average 55 biosynthetic gene clusters (BGCs), of which only 20 belonged to the core pan-genome. Predatory activity was assayed against ten prey microbes and found to be mostly incongruent with phylogeny or BGC complement. Thus, predation seems multifactorial, depending partially on BGC complement, but also on the accessory pan-genome - genes most likely acquired horizontally. These observations encourage further exploration of Corallococcus as a source for novel bioactive secondary metabolites and predatory proteins.}, } @article {pmid30619175, year = {2018}, author = {Wu, Y and Zaiden, N and Cao, B}, title = {The Core- and Pan-Genomic Analyses of the Genus Comamonas: From Environmental Adaptation to Potential Virulence.}, journal = {Frontiers in microbiology}, volume = {9}, number = {}, pages = {3096}, pmid = {30619175}, issn = {1664-302X}, abstract = {Comamonas is often reported to be one of the major members of microbial communities in various natural and engineered environments. Versatile catabolic capabilities of Comamonas have been studied extensively in the last decade. In contrast, little is known about the ecological roles and adaptation of Comamonas to different environments as well as the virulence of potentially pathogenic Comamonas strains. In this study, we provide genomic insights into the potential ecological roles and virulence of Comamonas by analysing the entire gene set (pangenome) and the genes present in all genomes (core genome) using 34 genomes of 11 different Comamonas species. The analyses revealed that the metabolic pathways enabling Comamonas to acquire energy from various nutrient sources are well conserved. Genes for denitrification and ammonification are abundant in Comamonas, suggesting that Comamonas plays an important role in the nitrogen biogeochemical cycle. They also encode sophisticated redox sensory systems and diverse c-di-GMP controlling systems, allowing them to be able to effectively adjust their biofilm lifestyle to changing environments. The virulence factors in Comamonas were found to be highly species-specific. The conserved strategies used by potentially pathogenic Comamonas for surface adherence, motility control, nutrient acquisition and stress tolerance were also revealed.}, } @article {pmid30618776, year = {2018}, author = {Kayani, MR and Zheng, YC and Xie, FC and Kang, K and Li, HY and Zhao, HT}, title = {Genome Sequences and Comparative Analysis of Two Extended-Spectrum Extensively-Drug Resistant Mycobacterium tuberculosis Strains.}, journal = {Frontiers in pharmacology}, volume = {9}, number = {}, pages = {1492}, pmid = {30618776}, issn = {1663-9812}, } @article {pmid33204407, year = {2019}, author = {Bandoy, DD}, title = {Large scale enterohemorrhagic E coli population genomic analysis using whole genome typing reveals recombination clusters and potential drug target.}, journal = {F1000Research}, volume = {8}, number = {}, pages = {33}, pmid = {33204407}, issn = {2046-1402}, abstract = {Enterohemorrhagic Escherichia coli continues to be a significant public health risk. With the onset of next generation sequencing, whole genome sequences require a new paradigm of analysis relevant for epidemiology and drug discovery. A large-scale bacterial population genomic analysis was applied to 702 isolates of serotypes associated with EHEC resulting in five pangenome clusters. Serotype incongruence with pangenome types suggests recombination clusters. Core genome analysis was performed to determine the population wide distribution of sdiA as potential drug target. Protein modelling revealed nonsynonymous variants are notably absent in the ligand binding site for quorum sensing, indicating that population wide conservation of the sdiA ligand site can be targeted for potential prophylactic purposes. Applying pathotype-wide pangenomics as a guide for determining evolution of pharmacophore sites is a potential approach in drug discovery.}, } @article {pmid30608550, year = {2019}, author = {diCenzo, GC and Mengoni, A and Perrin, E}, title = {Chromids Aid Genome Expansion and Functional Diversification in the Family Burkholderiaceae.}, journal = {Molecular biology and evolution}, volume = {36}, number = {3}, pages = {562-574}, doi = {10.1093/molbev/msy248}, pmid = {30608550}, issn = {1537-1719}, mesh = {Adaptation, Biological/genetics ; Burkholderiaceae/*genetics ; Gene Transfer, Horizontal ; Genome Size ; *Genome, Bacterial ; *Replicon ; Selection, Genetic ; }, abstract = {Multipartite genomes, containing at least two large replicons, are found in diverse bacteria; however, the advantage of this genome structure remains incompletely understood. Here, we perform comparative genomics of hundreds of finished β-proteobacterial genomes to gain insights into the role and emergence of multipartite genomes. Almost all essential secondary replicons (chromids) of the β-proteobacteria are found in the family Burkholderiaceae. These replicons arose from just two plasmid acquisition events, and they were likely stabilized early in their evolution by the presence of core genes. On average, Burkholderiaceae genera with multipartite genomes had a larger total genome size, but smaller chromosome, than genera without secondary replicons. Pangenome-level functional enrichment analyses suggested that interreplicon functional biases are partially driven by the enrichment of secondary replicons in the accessory pangenome fraction. Nevertheless, the small overlap in orthologous groups present in each replicon's pangenome indicated a clear functional separation of the replicons. Chromids appeared biased to environmental adaptation, as the functional categories enriched on chromids were also overrepresented on the chromosomes of the environmental genera (Paraburkholderia and Cupriavidus) compared with the pathogenic genera (Burkholderia and Ralstonia). Using ancestral state reconstruction, it was predicted that the rate of accumulation of modern-day genes by chromids was more rapid than the rate of gene accumulation by the chromosomes. Overall, the data are consistent with a model where the primary advantage of secondary replicons is in facilitating increased rates of gene acquisition through horizontal gene transfer, consequently resulting in replicons enriched in genes associated with adaptation to novel environments.}, } @article {pmid30606234, year = {2019}, author = {Dillon, MM and Thakur, S and Almeida, RND and Wang, PW and Weir, BS and Guttman, DS}, title = {Recombination of ecologically and evolutionarily significant loci maintains genetic cohesion in the Pseudomonas syringae species complex.}, journal = {Genome biology}, volume = {20}, number = {1}, pages = {3}, pmid = {30606234}, issn = {1474-760X}, mesh = {*Evolution, Molecular ; Genetic Variation ; *Genome, Bacterial ; *Phylogeny ; Pseudomonas syringae/*genetics ; Recombination, Genetic ; Selection, Genetic ; Type II Secretion Systems/genetics ; }, abstract = {BACKGROUND: Pseudomonas syringae is a highly diverse bacterial species complex capable of causing a wide range of serious diseases on numerous agronomically important crops. We examine the evolutionary relationships of 391 agricultural and environmental strains using whole-genome sequencing and evolutionary genomic analyses.

RESULTS: We describe the phylogenetic distribution of all 77,728 orthologous gene families in the pan-genome, reconstruct the core genome phylogeny using the 2410 core genes, hierarchically cluster the accessory genome, identify the diversity and distribution of type III secretion systems and their effectors, predict ecologically and evolutionary relevant loci, and establish the molecular evolutionary processes operating on gene families. Phylogenetic and recombination analyses reveals that the species complex is subdivided into primary and secondary phylogroups, with the former primarily comprised of agricultural isolates, including all of the well-studied P. syringae strains. In contrast, the secondary phylogroups include numerous environmental isolates. These phylogroups also have levels of genetic diversity typically found among distinct species. An analysis of rates of recombination within and between phylogroups revealed a higher rate of recombination within primary phylogroups than between primary and secondary phylogroups. We also find that "ecologically significant" virulence-associated loci and "evolutionarily significant" loci under positive selection are over-represented among loci that undergo inter-phylogroup genetic exchange.

CONCLUSIONS: While inter-phylogroup recombination occurs relatively rarely, it is an important force maintaining the genetic cohesion of the species complex, particularly among primary phylogroup strains. This level of genetic cohesion, and the shared plant-associated niche, argues for considering the primary phylogroups as a single biological species.}, } @article {pmid30598532, year = {2019}, author = {Hübner, S and Bercovich, N and Todesco, M and Mandel, JR and Odenheimer, J and Ziegler, E and Lee, JS and Baute, GJ and Owens, GL and Grassa, CJ and Ebert, DP and Ostevik, KL and Moyers, BT and Yakimowski, S and Masalia, RR and Gao, L and Ćalić, I and Bowers, JE and Kane, NC and Swanevelder, DZH and Kubach, T and Muños, S and Langlade, NB and Burke, JM and Rieseberg, LH}, title = {Sunflower pan-genome analysis shows that hybridization altered gene content and disease resistance.}, journal = {Nature plants}, volume = {5}, number = {1}, pages = {54-62}, doi = {10.1038/s41477-018-0329-0}, pmid = {30598532}, issn = {2055-0278}, mesh = {Crops, Agricultural/genetics/microbiology ; Disease Resistance/genetics ; Gene Ontology ; Genes, Plant ; Genetic Variation ; Genome, Plant ; Helianthus/*genetics/*microbiology ; *Hybridization, Genetic ; Plant Diseases/*genetics/microbiology ; Recombination, Genetic ; Selection, Genetic ; }, abstract = {Domesticated plants and animals often display dramatic responses to selection, but the origins of the genetic diversity underlying these responses remain poorly understood. Despite domestication and improvement bottlenecks, the cultivated sunflower remains highly variable genetically, possibly due to hybridization with wild relatives. To characterize genetic diversity in the sunflower and to quantify contributions from wild relatives, we sequenced 287 cultivated lines, 17 Native American landraces and 189 wild accessions representing 11 compatible wild species. Cultivar sequences failing to map to the sunflower reference were assembled de novo for each genotype to determine the gene repertoire, or 'pan-genome', of the cultivated sunflower. Assembled genes were then compared to the wild species to estimate origins. Results indicate that the cultivated sunflower pan-genome comprises 61,205 genes, of which 27% vary across genotypes. Approximately 10% of the cultivated sunflower pan-genome is derived through introgression from wild sunflower species, and 1.5% of genes originated solely through introgression. Gene ontology functional analyses further indicate that genes associated with biotic resistance are over-represented among introgressed regions, an observation consistent with breeding records. Analyses of allelic variation associated with downy mildew resistance provide an example in which such introgressions have contributed to resistance to a globally challenging disease.}, } @article {pmid30594655, year = {2019}, author = {Tao, Y and Zhao, X and Mace, E and Henry, R and Jordan, D}, title = {Exploring and Exploiting Pan-genomics for Crop Improvement.}, journal = {Molecular plant}, volume = {12}, number = {2}, pages = {156-169}, doi = {10.1016/j.molp.2018.12.016}, pmid = {30594655}, issn = {1752-9867}, mesh = {Crops, Agricultural/*genetics/growth & development/physiology ; Genes, Plant/genetics ; Genetic Variation ; Genomics/*methods ; }, abstract = {Genetic variation ranging from single-nucleotide polymorphisms to large structural variants (SVs) can cause variation of gene content among individuals within the same species. There is an increasing appreciation that a single reference genome is insufficient to capture the full landscape of genetic diversity of a species. Pan-genome analysis offers a platform to evaluate the genetic diversity of a species via investigation of its entire genome repertoire. Although a recent wave of pan-genomic studies has shed new light on crop diversity and improvement using advanced sequencing technology, the potential applications of crop pan-genomics in crop improvement are yet to be fully exploited. In this review, we highlight the progress achieved in understanding crop pan-genomics, discuss biological activities that cause SVs, review important agronomical traits affected by SVs, and present our perspective on the application of pan-genomics in crop improvement.}, } @article {pmid30588394, year = {2018}, author = {Brankovics, B and Kulik, T and Sawicki, J and Bilska, K and Zhang, H and de Hoog, GS and van der Lee, TA and Waalwijk, C and van Diepeningen, AD}, title = {First steps towards mitochondrial pan-genomics: detailed analysis of Fusarium graminearum mitogenomes.}, journal = {PeerJ}, volume = {6}, number = {}, pages = {e5963}, pmid = {30588394}, issn = {2167-8359}, abstract = {There is a gradual shift from representing a species' genome by a single reference genome sequence to a pan-genome representation. Pan-genomes are the abstract representations of the genomes of all the strains that are present in the population or species. In this study, we employed a pan-genomic approach to analyze the intraspecific mitochondrial genome diversity of Fusarium graminearum. We present an improved reference mitochondrial genome for F. graminearum with an intron-exon annotation that was verified using RNA-seq data. Each of the 24 studied isolates had a distinct mitochondrial sequence. Length variation in the F. graminearum mitogenome was found to be largely due to variation of intron regions (99.98%). The "intronless" mitogenome length was found to be quite stable and could be informative when comparing species. The coding regions showed high conservation, while the variability of intergenic regions was highest. However, the most important variable parts are the intron regions, because they contain approximately half of the variable sites, make up more than half of the mitogenome, and show presence/absence variation. Furthermore, our analyses show that the mitogenome of F. graminearum is recombining, as was previously shown in F. oxysporum, indicating that mitogenome recombination is a common phenomenon in Fusarium. The majority of mitochondrial introns in F. graminearum belongs to group I introns, which are associated with homing endonuclease genes (HEGs). Mitochondrial introns containing HE genes may spread within populations through homing, where the endonuclease recognizes and cleaves the recognition site in the target gene. After cleavage of the "host" gene, it is replaced by the gene copy containing the intron with HEG. We propose to use introns unique to a population for tracking the spread of the given population, because introns can spread through vertical inheritance, recombination as well as via horizontal transfer. We demonstrate how pooled sequencing of strains can be used for mining mitogenome data. The usage of pooled sequencing offers a scalable solution for population analysis and for species level comparisons studies. This study may serve as a basis for future mitochondrial genome variability studies and representations.}, } @article {pmid30587126, year = {2018}, author = {Bochkareva, OO and Moroz, EV and Davydov, II and Gelfand, MS}, title = {Genome rearrangements and selection in multi-chromosome bacteria Burkholderia spp.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {965}, pmid = {30587126}, issn = {1471-2164}, support = {18-14-00358//Russian Science Foundation/ ; 16-54-21004//Russian Foundation of Basic Research/ ; IZLRZ3\163872//Swiss National Science Foundation/ ; }, mesh = {Burkholderia/classification/*genetics ; *Chromosomes, Bacterial ; Databases, Genetic ; Gene Rearrangement/*genetics ; Phylogeny ; }, abstract = {BACKGROUND: The genus Burkholderia consists of species that occupy remarkably diverse ecological niches. Its best known members are important pathogens, B. mallei and B. pseudomallei, which cause glanders and melioidosis, respectively. Burkholderia genomes are unusual due to their multichromosomal organization, generally comprised of 2-3 chromosomes.

RESULTS: We performed integrated genomic analysis of 127 Burkholderia strains. The pan-genome is open with the saturation to be reached between 86,000 and 88,000 genes. The reconstructed rearrangements indicate a strong avoidance of intra-replichore inversions that is likely caused by selection against the transfer of large groups of genes between the leading and the lagging strands. Translocated genes also tend to retain their position in the leading or the lagging strand, and this selection is stronger for large syntenies. Integrated reconstruction of chromosome rearrangements in the context of strains phylogeny reveals parallel rearrangements that may indicate inversion-based phase variation and integration of new genomic islands. In particular, we detected parallel inversions in the second chromosomes of B. pseudomallei with breakpoints formed by genes encoding membrane components of multidrug resistance complex, that may be linked to a phase variation mechanism. Two genomic islands, spreading horizontally between chromosomes, were detected in the B. cepacia group.

CONCLUSIONS: This study demonstrates the power of integrated analysis of pan-genomes, chromosome rearrangements, and selection regimes. Non-random inversion patterns indicate selective pressure, inversions are particularly frequent in a recent pathogen B. mallei, and, together with periods of positive selection at other branches, may indicate adaptation to new niches. One such adaptation could be a possible phase variation mechanism in B. pseudomallei.}, } @article {pmid30587114, year = {2018}, author = {Tyakht, AV and Manolov, AI and Kanygina, AV and Ischenko, DS and Kovarsky, BA and Popenko, AS and Pavlenko, AV and Elizarova, AV and Rakitina, DV and Baikova, JP and Ladygina, VG and Kostryukova, ES and Karpova, IY and Semashko, TA and Larin, AK and Grigoryeva, TV and Sinyagina, MN and Malanin, SY and Shcherbakov, PL and Kharitonova, AY and Khalif, IL and Shapina, MV and Maev, IV and Andreev, DN and Belousova, EA and Buzunova, YM and Alexeev, DG and Govorun, VM}, title = {Genetic diversity of Escherichia coli in gut microbiota of patients with Crohn's disease discovered using metagenomic and genomic analyses.}, journal = {BMC genomics}, volume = {19}, number = {1}, pages = {968}, pmid = {30587114}, issn = {1471-2164}, support = {16-15-00258//Russian Science Foundation/ ; }, mesh = {Cluster Analysis ; Crohn Disease/microbiology/*pathology ; Escherichia coli/*genetics/isolation & purification ; Feces/microbiology ; *Gastrointestinal Microbiome ; *Genetic Variation ; Genome, Bacterial ; Humans ; Intestinal Mucosa/microbiology ; Metagenomics/*methods ; }, abstract = {BACKGROUND: Crohn's disease is associated with gut dysbiosis. Independent studies have shown an increase in the abundance of certain bacterial species, particularly Escherichia coli with the adherent-invasive pathotype, in the gut. The role of these species in this disease needs to be elucidated.

METHODS: We performed a metagenomic study investigating the gut microbiota of patients with Crohn's disease. A metagenomic reconstruction of the consensus genome content of the species was used to assess the genetic variability.

RESULTS: The abnormal shifts in the microbial community structures in Crohn's disease were heterogeneous among the patients. The metagenomic data suggested the existence of multiple E. coli strains within individual patients. We discovered that the genetic diversity of the species was high and that only a few samples manifested similarity to the adherent-invasive varieties. The other species demonstrated genetic diversity comparable to that observed in the healthy subjects. Our results were supported by a comparison of the sequenced genomes of isolates from the same microbiota samples and a meta-analysis of published gut metagenomes.

CONCLUSIONS: The genomic diversity of Crohn's disease-associated E. coli within and among the patients paves the way towards an understanding of the microbial mechanisms underlying the onset and progression of the Crohn's disease and the development of new strategies for the prevention and treatment of this disease.}, } @article {pmid30586440, year = {2018}, author = {Cheleuitte-Nieves, C and Gulvik, CA and McQuiston, JR and Humrighouse, BW and Bell, ME and Villarma, A and Fischetti, VA and Westblade, LF and Lipman, NS}, title = {Genotypic differences between strains of the opportunistic pathogen Corynebacterium bovis isolated from humans, cows, and rodents.}, journal = {PloS one}, volume = {13}, number = {12}, pages = {e0209231}, pmid = {30586440}, issn = {1932-6203}, support = {P30 CA008748/CA/NCI NIH HHS/United States ; }, mesh = {Animals ; Cattle ; Corynebacterium/*genetics/*isolation & purification/pathogenicity ; Corynebacterium Infections/microbiology/veterinary ; DNA, Circular ; Female ; Genome, Bacterial ; Genotype ; Humans ; Mice, Nude ; Opportunistic Infections/microbiology/veterinary ; Phylogeny ; RNA, Bacterial ; RNA, Ribosomal, 16S ; Rats ; Virulence Factors/genetics ; }, abstract = {Corynebacterium bovis is an opportunistic bacterial pathogen shown to cause eye and prosthetic joint infections as well as abscesses in humans, mastitis in dairy cattle, and skin disease in laboratory mice and rats. Little is known about the genetic characteristics and genomic diversity of C. bovis because only a single draft genome is available for the species. The overall aim of this study was to sequence and compare the genome of C. bovis isolates obtained from different species, locations, and time points. Whole-genome sequencing was conducted on 20 C. bovis isolates (six human, four bovine, nine mouse and one rat) using the Illumina MiSeq platform and submitted to various comparative analysis tools. Sequencing generated high-quality contigs (over 2.53 Mbp) that were comparable to the only reported assembly using C. bovis DSM 20582T (97.8 ± 0.36% completeness). The number of protein-coding DNA sequences (2,174 ± 12.4) was similar among all isolates. A Corynebacterium genus neighbor-joining tree was created, which revealed Corynebacterium falsenii as the nearest neighbor to C. bovis (95.87% similarity), although the reciprocal comparison shows Corynebacterium jeikeium as closest neighbor to C. falsenii. Interestingly, the average nucleotide identity demonstrated that the C. bovis isolates clustered by host, with human and bovine isolates clustering together, and the mouse and rat isolates forming a separate group. The average number of genomic islands and putative virulence factors were significantly higher (p<0.001) in the mouse and rat isolates as compared to human/bovine isolates. Corynebacterium bovis' pan-genome contained a total of 3,067 genes of which 1,354 represented core genes. The known core genes of all isolates were primarily related to ''metabolism" and ''information storage/processing." However, most genes were classified as ''function unknown" or "unclassified". Surprisingly, no intact prophages were found in any isolate; however, almost all isolates had at least one complete CRISPR-Cas system.}, } @article {pmid30574560, year = {2018}, author = {Velsko, IM and Chakraborty, B and Nascimento, MM and Burne, RA and Richards, VP}, title = {Species Designations Belie Phenotypic and Genotypic Heterogeneity in Oral Streptococci.}, journal = {mSystems}, volume = {3}, number = {6}, pages = {}, pmid = {30574560}, issn = {2379-5077}, support = {K23 DE023579/DE/NIDCR NIH HHS/United States ; }, abstract = {Health-associated oral Streptococcus species are promising probiotic candidates to protect against dental caries. Ammonia production through the arginine deiminase system (ADS), which can increase the pH of oral biofilms, and direct antagonism of caries-associated bacterial species are desirable properties for oral probiotic strains. ADS and antagonistic activities can vary dramatically among individuals, but the genetic basis for these differences is unknown. We sequenced whole genomes of a diverse set of clinical oral Streptococcus isolates and examined the genetic basis of variability in ADS and antagonistic activities. A total of 113 isolates were included and represented 10 species: Streptococcus australis, A12-like, S. cristatus, S. gordonii, S. intermedius, S. mitis, S. oralis including S. oralis subsp. dentisani, S. parasanguinis, S. salivarius, and S. sanguinis. Mean ADS activity and antagonism on Streptococcus mutans UA159 were measured for each isolate, and each isolate was whole genome shotgun sequenced on an Illumina MiSeq. Phylogenies were built of genes known to be involved in ADS activity and antagonism. Several approaches to correlate the pan-genome with phenotypes were performed. Phylogenies of genes previously identified in ADS activity and antagonism grouped isolates by species, but not by phenotype. A genome-wide association study (GWAS) identified additional genes potentially involved in ADS activity or antagonism across all the isolates we sequenced as well as within several species. Phenotypic heterogeneity in oral streptococci is not necessarily reflected by genotype and is not species specific. Probiotic strains must be carefully selected based on characterization of each strain and not based on inclusion within a certain species. IMPORTANCE Representative type strains are commonly used to characterize bacterial species, yet species are phenotypically and genotypically heterogeneous. Conclusions about strain physiology and activity based on a single strain therefore may be inappropriate and misleading. When selecting strains for probiotic use, the assumption that all strains within a species share the same desired probiotic characteristics may result in selection of a strain that lacks the desired traits, and therefore makes a minimally effective or ineffective probiotic. Health-associated oral streptococci are promising candidates for anticaries probiotics, but strains need to be carefully selected based on observed phenotypes. We characterized the genotypes and anticaries phenotypes of strains from 10 species of oral streptococci and demonstrate poor correlation between genotype and phenotype across all species.}, } @article {pmid30563902, year = {2018}, author = {Potter, RF and Lainhart, W and Twentyman, J and Wallace, MA and Wang, B and Burnham, CA and Rosen, DA and Dantas, G}, title = {Population Structure, Antibiotic Resistance, and Uropathogenicity of Klebsiella variicola.}, journal = {mBio}, volume = {9}, number = {6}, pages = {}, pmid = {30563902}, issn = {2150-7511}, support = {R01 GM099538/GM/NIGMS NIH HHS/United States ; K08 AI127714/AI/NIAID NIH HHS/United States ; R01 AI123394/AI/NIAID NIH HHS/United States ; R01 HD092414/HD/NICHD NIH HHS/United States ; T32 GM007067/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; Anti-Bacterial Agents/*pharmacology ; Carbapenems/pharmacology ; Ciprofloxacin/pharmacology ; Communicable Diseases, Emerging/microbiology ; *Drug Resistance, Multiple, Bacterial ; Female ; Fimbriae, Bacterial/genetics ; Genome, Bacterial ; Humans ; Klebsiella/*drug effects/genetics/*pathogenicity ; Klebsiella Infections/microbiology ; Mice ; Microbial Sensitivity Tests ; Phylogeny ; Urinary Bladder/microbiology ; Urinary Tract Infections/*microbiology ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Klebsiella variicola is a member of the Klebsiella genus and often misidentified as Klebsiella pneumoniae or Klebsiella quasipneumon