Interpretation of genetic variation data is a crucial step to understand the relationship between gene sequence changes and biological function. There are several annotation tools, such as ANNOVAR, VEP, vcfanno, have been developed. These tools make gene variation data annotation more convenient and faster than before. However, because different annotation tools have their own methods of use and design architecture, this increases the difficulty for bioinfomatics beginner to utilize these tools. In addition, many of existing database resources and annotation scripts have not been well integrated and shared.
So, it is worth us to develop an integrated annotation system that not only include the integration of different annotation tools but also integrate the relevant database resources. Here, we present a integrated annotation R package ‘annovarR’ to do this. It provides a series R functions to integrate external annotation tools and annotation databases.
To install annovarR, first you need to install R interpreter (Supported Linux, MAC and Windows). This package have been uploaded on The Comprehensive R Archive Network (CRAN, https://cran.r-project.org). You can use the command to install annovarR package easily:
# setRepositories ind 1 is CRAN, 2 is Bioconductor
setRepositories(ind=1:2)
install.packages('annovarR')
If you want to use the latest development version, you need to use devtools install_github
function.
# Install the cutting edge development version from GitHub:
# install.packages("devtools")
devtools::install_github("JhuangLab/annovarR", ref = "develop")
Lastly, annovarR can also be installed using the source code archive (R CMD INSTALL
). In this situation, you need to manually handle dependencies on many packages.
Tips: When the RMySQL or RSQLite package can not directly installed by R, conda is an optional solution: conda install -c r r-rmysql r-rsqlite
. Or you need root permissions to install the corresponding system dependency.
annovarR use function download.database
to download the annotation databases.
# Show all annovarR supported database
download.database(show.all.names = TRUE)
#> [1] "db_annovar_1000g" "db_annovar_1000g_sqlite"
#> [3] "db_annovar_avsift" "db_annovar_avsnp"
#> [5] "db_annovar_avsnp_sqlite" "db_annovar_brvar"
#> [7] "db_annovar_cadd" "db_annovar_cadd_sqlite"
#> [9] "db_annovar_cg" "db_annovar_civic_gene_summaries"
#> [11] "db_annovar_clinvar" "db_annovar_clinvar_sqlite"
#> [13] "db_annovar_cosmic" "db_annovar_cosmic_sqlite"
#> [15] "db_annovar_cscd" "db_annovar_darned_sqlite"
#> [17] "db_annovar_dbnsfp" "db_annovar_dbnsfp_sqlite"
#> [19] "db_annovar_dbscsnv" "db_annovar_dbscsnv_sqlite"
#> [21] "db_annovar_dhs_gene_connectivity" "db_annovar_eigen"
#> [23] "db_annovar_eigen_sqlite" "db_annovar_ensgene"
#> [25] "db_annovar_epi_genes" "db_annovar_esp6500siv2"
#> [27] "db_annovar_exac03" "db_annovar_exac03_sqlite"
#> [29] "db_annovar_fathmm" "db_annovar_gdi_score"
#> [31] "db_annovar_gerp" "db_annovar_gme"
#> [33] "db_annovar_gme_sqlite" "db_annovar_gnomad"
#> [35] "db_annovar_gnomad_sqlite" "db_annovar_gtex_eqtl_egenes"
#> [37] "db_annovar_gtex_eqtl_pairs" "db_annovar_gwava"
#> [39] "db_annovar_gwava_sqlite" "db_annovar_hgnc"
#> [41] "db_annovar_hrcr1" "db_annovar_hrcr1_sqlite"
#> [43] "db_annovar_icgc21" "db_annovar_icgc_sqlite"
#> [45] "db_annovar_intervar" "db_annovar_intervar_sqlite"
#> [47] "db_annovar_kaviar" "db_annovar_knowngene"
#> [49] "db_annovar_ljb26_all" "db_annovar_lncediting_sqlite"
#> [51] "db_annovar_loftool_scores" "db_annovar_mcap"
#> [53] "db_annovar_mcap_sqlite" "db_annovar_mitimpact"
#> [55] "db_annovar_nci60" "db_annovar_nci60_sqlite"
#> [57] "db_annovar_normal_pool" "db_annovar_omim_genemap2"
#> [59] "db_annovar_popfreq" "db_annovar_popfreq_sqlite"
#> [61] "db_annovar_radar_sqlite" "db_annovar_rddpred_sqlite"
#> [63] "db_annovar_rediportal_sqlite" "db_annovar_refgene"
#> [65] "db_annovar_regsnpintron" "db_annovar_revel"
#> [67] "db_annovar_revel_sqlite" "db_annovar_rvis_esv_score"
#> [69] "db_annovar_seeqtl" "db_annovar_snp"
#> [71] "db_annovar_tall_somatic_genes" "db_annovar_tmcsnpdb"
#> [73] "db_annovar_varcards" "db_annovar_varcards_sqlite"
#> [75] "db_ucsc_cytoband" "db_ucsc_dnase_clustered"
#> [77] "db_ucsc_ensgene" "db_ucsc_knowngene"
#> [79] "db_ucsc_refgene" "db_ucsc_tfbs_clustered"
# Show all supported version of database (e.g. db_annovar_avsnp)
download.database(download.name = "db_annovar_avsnp", show.all.version = TRUE)
#> [1] "avsnp150" "avsnp147" "avsnp144" "avsnp142" "avsnp138"
# Show all supprted buildver of specific version database
download.database(download.name = "db_annovar_avsnp", version = "avsnp147", show.all.buildvers = TRUE)
#> $avsnp138
#> [1] "hg19"
#>
#> $avsnp142
#> [1] "hg38" "hg19"
#>
#> $avsnp144
#> [1] "hg38" "hg19"
#>
#> $avsnp147
#> [1] "hg38" "hg19"
#>
#> $avsnp150
#> [1] "hg38" "hg19"
# To reduce the download time, we use the local demo configuratin file to download demo file
demo.cfg <- system.file("extdata", "demo/demo.cfg", package = "annovarR")
download.database("download_demo", show.all.versions = T, download.cfg = demo.cfg)
#> [1] "demo"
download.database("download_demo", "demo", buildver = "GRCh37", database.dir = sprintf("%s/databases/",
tempdir()), download.cfg = demo.cfg)
#> [1] TRUE
# If you want to download other resource in BioInstaller,
# you can use function `install.bioinfo`
install.bioinfo(show.all.names = TRUE)
#> [1] "abyss" "arnapipe"
#> [3] "asap" "backspin"
#> [5] "bamtools" "bamutil"
#> [7] "bcftools" "bearscc"
#> [9] "bedtools" "bowtie"
#> [11] "bowtie2" "breakdancer"
#> [13] "brie" "bwa"
#> [15] "cnvkit" "cnvnator"
#> [17] "dart" "delly"
#> [19] "fastp" "fastq_tools"
#> [21] "fastx_toolkit" "freebayes"
#> [23] "fsclvm" "github_demo"
#> [25] "hisat2" "htseq"
#> [27] "igraph" "isop"
#> [29] "jvarkit" "libgtextutils"
#> [31] "lofreq" "macs"
#> [33] "mdseq" "mimosca"
#> [35] "multiqc" "oases"
#> [37] "oncotator" "outrigger"
#> [39] "picard" "pindel"
#> [41] "pxz" "raceid"
#> [43] "rca" "rum"
#> [45] "samtools_old" "sclvm"
#> [47] "scnorm" "seqtk"
#> [49] "seurat" "singlesplice"
#> [51] "sleuth" "somaticsniper"
#> [53] "sparsehash" "speedseq"
#> [55] "star" "strawberry"
#> [57] "tmap" "tophat2"
#> [59] "tracer" "trimgalore"
#> [61] "trinityrnaseq" "varscan2"
#> [63] "vcflib" "vcftools"
#> [65] "vep" "zifa"
#> [67] "annovar" "armadillo"
#> [69] "bcl2fastq" "blast"
#> [71] "blat" "bzip2"
#> [73] "cesa" "cnvnator_samtools"
#> [75] "curl" "demo_2"
#> [77] "edena" "ensemble_grch37_reffa"
#> [79] "ensemble_grch38_reffa" "fastqc"
#> [81] "fatotwobit" "fusioncatcher"
#> [83] "fusioncatcher_reffa" "gatk"
#> [85] "gatk_bundle" "gmap"
#> [87] "gridss" "hisat2_reffa"
#> [89] "htslib" "imagej"
#> [91] "interproscan" "liftover"
#> [93] "lzo" "lzop"
#> [95] "mapsplice2" "miniconda2"
#> [97] "miniconda3" "mutect"
#> [99] "ngs_qc_toolkit" "novoalign"
#> [101] "pcre" "pigz"
#> [103] "prinseq" "r"
#> [105] "reditools" "root"
#> [107] "samstat" "samtools"
#> [109] "snpeff" "solexaqa"
#> [111] "sqlite" "sratools"
#> [113] "srnanalyzer" "ssaha2"
#> [115] "strelka" "subread"
#> [117] "svtoolkit" "tvc"
#> [119] "ucsc_reffa" "ucsc_utils"
#> [121] "vcfanno" "velvet"
#> [123] "xz" "zlib"
#> [125] "db_atcircdb" "db_biosystems"
#> [127] "db_cancer_hotspot" "db_cgi"
#> [129] "db_circbase" "db_circnet"
#> [131] "db_circrnadb" "db_civic"
#> [133] "db_cscd" "db_denovo_db"
#> [135] "db_dgidb" "db_differentialnet"
#> [137] "db_diseaseenhancer" "db_disgenet"
#> [139] "db_docm" "db_drugbank"
#> [141] "db_ecodrug" "db_eggnog"
#> [143] "db_exorbase" "db_expression_atlas"
#> [145] "db_exsnp" "db_fantom_cage_peaks"
#> [147] "db_fantom_co_expression_clusters" "db_fantom_enhancers"
#> [149] "db_fantom_motifs" "db_fantom_ontology"
#> [151] "db_fantom_tss_classifier" "db_funcoup"
#> [153] "db_gtex" "db_hgnc"
#> [155] "db_hpo" "db_inbiomap"
#> [157] "db_interpro" "db_intogen"
#> [159] "db_lncediting" "db_medreaders"
#> [161] "db_mndr" "db_msdd"
#> [163] "db_omim_open" "db_omim_private"
#> [165] "db_oncotator" "db_pancanqtl"
#> [167] "db_proteinatlas" "db_rbp_var"
#> [169] "db_rddpred" "db_remap"
#> [171] "db_remap2" "db_rsnp3"
#> [173] "db_rvarbase" "db_seecancer"
#> [175] "db_seeqtl" "db_snipa3"
#> [177] "db_srnanalyzer" "db_superdrug2"
#> [179] "db_tumorfusions" "db_varcards"
#> [181] "db_annovar_1000g" "db_annovar_1000g_sqlite"
#> [183] "db_annovar_avsift" "db_annovar_avsnp"
#> [185] "db_annovar_avsnp_sqlite" "db_annovar_brvar"
#> [187] "db_annovar_cadd" "db_annovar_cadd_sqlite"
#> [189] "db_annovar_cg" "db_annovar_civic_gene_summaries"
#> [191] "db_annovar_clinvar" "db_annovar_clinvar_sqlite"
#> [193] "db_annovar_cosmic" "db_annovar_cosmic_sqlite"
#> [195] "db_annovar_cscd" "db_annovar_darned_sqlite"
#> [197] "db_annovar_dbnsfp" "db_annovar_dbnsfp_sqlite"
#> [199] "db_annovar_dbscsnv" "db_annovar_dbscsnv_sqlite"
#> [201] "db_annovar_dhs_gene_connectivity" "db_annovar_eigen"
#> [203] "db_annovar_eigen_sqlite" "db_annovar_ensgene"
#> [205] "db_annovar_epi_genes" "db_annovar_esp6500siv2"
#> [207] "db_annovar_exac03" "db_annovar_exac03_sqlite"
#> [209] "db_annovar_fathmm" "db_annovar_gdi_score"
#> [211] "db_annovar_gerp" "db_annovar_gme"
#> [213] "db_annovar_gme_sqlite" "db_annovar_gnomad"
#> [215] "db_annovar_gnomad_sqlite" "db_annovar_gtex_eqtl_egenes"
#> [217] "db_annovar_gtex_eqtl_pairs" "db_annovar_gwava"
#> [219] "db_annovar_gwava_sqlite" "db_annovar_hgnc"
#> [221] "db_annovar_hrcr1" "db_annovar_hrcr1_sqlite"
#> [223] "db_annovar_icgc21" "db_annovar_icgc_sqlite"
#> [225] "db_annovar_intervar" "db_annovar_intervar_sqlite"
#> [227] "db_annovar_kaviar" "db_annovar_knowngene"
#> [229] "db_annovar_ljb26_all" "db_annovar_lncediting_sqlite"
#> [231] "db_annovar_loftool_scores" "db_annovar_mcap"
#> [233] "db_annovar_mcap_sqlite" "db_annovar_mitimpact"
#> [235] "db_annovar_nci60" "db_annovar_nci60_sqlite"
#> [237] "db_annovar_normal_pool" "db_annovar_omim_genemap2"
#> [239] "db_annovar_popfreq" "db_annovar_popfreq_sqlite"
#> [241] "db_annovar_radar_sqlite" "db_annovar_rddpred_sqlite"
#> [243] "db_annovar_rediportal_sqlite" "db_annovar_refgene"
#> [245] "db_annovar_regsnpintron" "db_annovar_revel"
#> [247] "db_annovar_revel_sqlite" "db_annovar_rvis_esv_score"
#> [249] "db_annovar_seeqtl" "db_annovar_snp"
#> [251] "db_annovar_tall_somatic_genes" "db_annovar_tmcsnpdb"
#> [253] "db_annovar_varcards" "db_annovar_varcards_sqlite"
#> [255] "db_ucsc_cytoband" "db_ucsc_dnase_clustered"
#> [257] "db_ucsc_ensgene" "db_ucsc_knowngene"
#> [259] "db_ucsc_refgene" "db_ucsc_tfbs_clustered"
#> [261] "db_blast_env_nr" "db_blast_est_human"
#> [263] "db_blast_est_mouse" "db_blast_est_others"
#> [265] "db_blast_gss" "db_blast_htgs"
#> [267] "db_blast_human_genomic" "db_blast_landmark"
#> [269] "db_blast_mouse_genomic" "db_blast_nr"
#> [271] "db_blast_nt" "db_blast_other_genomic"
#> [273] "db_blast_pataa" "db_blast_patnt"
#> [275] "db_blast_pdbaa" "db_blast_pdbnt"
#> [277] "db_blast_ref_prok_rep_genomes" "db_blast_ref_viroids_rep_genomes"
#> [279] "db_blast_ref_viruses_rep_genomes" "db_blast_refseq_genomic"
#> [281] "db_blast_refseq_protein" "db_blast_refseq_rna"
#> [283] "db_blast_refseqgene" "db_blast_sts"
#> [285] "db_blast_swissprot" "db_blast_taxdb"
#> [287] "db_blast_tsa_nr" "db_blast_tsa_nt"
#> [289] "db_blast_vector"
# Get all supprted anno.name in annovarR
get.annotation.names()
#> [1] "1000g2015aug_all"
#> [2] "1000g2015aug_afr"
#> [3] "1000g2015aug_amr"
#> [4] "1000g2015aug_eas"
#> [5] "1000g2015aug_eur"
#> [6] "1000g2015aug_sas"
#> [7] "perl_annovar_refGene"
#> [8] "perl_annovar_ensGene"
#> [9] "perl_annovar_knownGene"
#> [10] "perl_annovar_cytoBand"
#> [11] "perl_annovar_genomicSuperDups"
#> [12] "perl_annovar_clinvar_20170905"
#> [13] "perl_annovar_esp6500siv2_all"
#> [14] "perl_annovar_1000g2015aug_all"
#> [15] "perl_annovar_1000g2015aug_afr"
#> [16] "perl_annovar_1000g2015aug_eas"
#> [17] "perl_annovar1000g2015aug_eur"
#> [18] "perl_annovar_avsnp142"
#> [19] "perl_annovar_avsnp144"
#> [20] "perl_annovar_avsnp147"
#> [21] "perl_annovar_avsnp150"
#> [22] "perl_annovar_dbnsfp33a"
#> [23] "perl_annovar_cosmic70"
#> [24] "perl_annovar_cosmic81"
#> [25] "perl_annovar_cosmic82"
#> [26] "perl_annovar_eigen"
#> [27] "perl_annovar_gwava"
#> [28] "perl_annovar_cadd13"
#> [29] "perl_annovar_cadd13gt10"
#> [30] "perl_annovar_cadd13gt20"
#> [31] "perl_annovar_regsnpintron"
#> [32] "perl_annovar_mitimpact24"
#> [33] "perl_annovar_popfreq_all_20150413"
#> [34] "perl_annovar_popfreq_max_20150413"
#> [35] "perl_annovar_icgc21"
#> [36] "perl_annovar_nci60"
#> [37] "perl_annovar_gme"
#> [38] "perl_annovar_hrcr1"
#> [39] "perl_annovar_kaviar_20150923"
#> [40] "perl_annovar_gnomad_genome"
#> [41] "perl_annovar_gnomad_exome"
#> [42] "perl_annovar_exac03nonpsych"
#> [43] "perl_annovar_exac03nontcga"
#> [44] "perl_annovar_exac03"
#> [45] "perl_annovar_cg69"
#> [46] "perl_annovar_cg46"
#> [47] "perl_annovar_intervar_20170202"
#> [48] "perl_annovar_dbscsnv11"
#> [49] "perl_annovar_merge"
#> [50] "avsnp138"
#> [51] "avsnp142"
#> [52] "avsnp144"
#> [53] "avsnp147"
#> [54] "avsnp150"
#> [55] "bioc_org_hs_eg"
#> [56] "bioc_gene2"
#> [57] "bioc_gene2alias"
#> [58] "bioc_gene2ensembl_gene_id"
#> [59] "bioc_gene2entrez_gene_id"
#> [60] "bioc_gene2gene_full_name"
#> [61] "brvar_v1_core"
#> [62] "brvar_v1_extra"
#> [63] "cadd"
#> [64] "caddgt10"
#> [65] "caddgt20"
#> [66] "cadd13"
#> [67] "cadd13gt10"
#> [68] "cadd13gt20"
#> [69] "nightly_civic_gene_summaries"
#> [70] "clinvar_20170130"
#> [71] "cosmic70"
#> [72] "cosmic81"
#> [73] "cosmic82"
#> [74] "cscd_cancer_circrna"
#> [75] "cscd_common_circrna"
#> [76] "cscd_normal_circrna"
#> [77] "darned"
#> [78] "dbnsfp30a"
#> [79] "dbnsfp31a_interpro"
#> [80] "dbnsfp33a"
#> [81] "dbscsnv11"
#> [82] "dhs_gene_connectivity_2012"
#> [83] "eigen"
#> [84] "epi_genes_v1"
#> [85] "exac03"
#> [86] "exac03nontcga"
#> [87] "exac03nonpsych"
#> [88] "gdi_score_full_10282015"
#> [89] "gme"
#> [90] "gnomad_exome"
#> [91] "gnomad_genome"
#> [92] "gtex_adipose_subcutaneous.v7.egenes"
#> [93] "gtex_adipose_visceral_omentum.v7.egenes"
#> [94] "gtex_adrenal_gland.v7.egenes"
#> [95] "gtex_artery_aorta.v7.egenes"
#> [96] "gtex_artery_coronary.v7.egenes"
#> [97] "gtex_artery_tibial.v7.egenes"
#> [98] "gtex_brain_amygdala.v7.egenes"
#> [99] "gtex_brain_anterior_cingulate_cortex_ba24.v7.egenes"
#> [100] "gtex_brain_caudate_basal_ganglia.v7.egenes"
#> [101] "gtex_brain_cerebellar_hemisphere.v7.egenes"
#> [102] "gtex_brain_cerebellum.v7.egenes"
#> [103] "gtex_brain_cortex.v7.egenes"
#> [104] "gtex_brain_frontal_cortex_ba9.v7.egenes"
#> [105] "gtex_brain_hippocampus.v7.egenes"
#> [106] "gtex_brain_hypothalamus.v7.egenes"
#> [107] "gtex_brain_nucleus_accumbens_basal_ganglia.v7.egenes"
#> [108] "gtex_brain_putamen_basal_ganglia.v7.egenes"
#> [109] "gtex_brain_spinal_cord_cervical_c_1.v7.egenes"
#> [110] "gtex_brain_substantia_nigra.v7.egenes"
#> [111] "gtex_breast_mammary_tissue.v7.egenes"
#> [112] "gtex_cells_ebv_transformed_lymphocytes.v7.egenes"
#> [113] "gtex_cells_transformed_fibroblasts.v7.egenes"
#> [114] "gtex_colon_sigmoid.v7.egenes"
#> [115] "gtex_colon_transverse.v7.egenes"
#> [116] "gtex_esophagus_gastroesophageal_junction.v7.egenes"
#> [117] "gtex_esophagus_mucosa.v7.egenes"
#> [118] "gtex_esophagus_muscularis.v7.egenes"
#> [119] "gtex_heart_atrial_appendage.v7.egenes"
#> [120] "gtex_heart_left_ventricle.v7.egenes"
#> [121] "gtex_liver.v7.egenes"
#> [122] "gtex_lung.v7.egenes"
#> [123] "gtex_minor_salivary_gland.v7.egenes"
#> [124] "gtex_muscle_skeletal.v7.egenes"
#> [125] "gtex_nerve_tibial.v7.egenes"
#> [126] "gtex_ovary.v7.egenes"
#> [127] "gtex_pancreas.v7.egenes"
#> [128] "gtex_pituitary.v7.egenes"
#> [129] "gtex_prostate.v7.egenes"
#> [130] "gtex_skin_not_sun_exposed_suprapubic.v7.egenes"
#> [131] "gtex_skin_sun_exposed_lower_leg.v7.egenes"
#> [132] "gtex_small_intestine_terminal_ileum.v7.egenes"
#> [133] "gtex_spleen.v7.egenes"
#> [134] "gtex_stomach.v7.egenes"
#> [135] "gtex_testis.v7.egenes"
#> [136] "gtex_thyroid.v7.egenes"
#> [137] "gtex_uterus.v7.egenes"
#> [138] "gtex_vagina.v7.egenes"
#> [139] "gtex_whole_blood.v7.egenes"
#> [140] "gwava"
#> [141] "hgnc_coding_gene2alias"
#> [142] "hgnc_coding_pre2gene"
#> [143] "hrcr1"
#> [144] "icgc21"
#> [145] "intervar_20170202"
#> [146] "kaviar_20150923"
#> [147] "lncediting"
#> [148] "loftool_scores"
#> [149] "mcap"
#> [150] "nci60"
#> [151] "2016sih_wes_ball"
#> [152] "2016sih_wes_tall"
#> [153] "2016sih_wes_nkt"
#> [154] "2016sih_wgs_nkt"
#> [155] "2016sih_wgs_dlbcl"
#> [156] "omim_genemap2_ensembl_gene_id2phenotype"
#> [157] "omim_genemap2_entrez_gene_id2phenotype"
#> [158] "omim_genemap2_symbol2phenotype"
#> [159] "popfreq_max_20150413"
#> [160] "popfreq_all_20150413"
#> [161] "radar2"
#> [162] "rddpred_mes"
#> [163] "REDIportal"
#> [164] "ucsc_refgene"
#> [165] "ens_refgene"
#> [166] "revel"
#> [167] "rs2pos138"
#> [168] "rs2pos142"
#> [169] "rs2pos144"
#> [170] "rs2pos147"
#> [171] "rs2pos150"
#> [172] "rvis_exac_4kw"
#> [173] "seeqtl_qvalue_hapmap3_cis"
#> [174] "seeqtl_qvalue_hapmap3_trans"
#> [175] "seeqtl_qvalue_myers_cis"
#> [176] "seeqtl_qvalue_myers_trans"
#> [177] "tall_somatic_genes_20171206"
#> [178] "tmcsnpdb"
#> [179] "varcards"
#> [180] "vcfanno_demo"
#> [181] "vep_all"
# Get annotation name needed download.name and
# you can use download.database to download database using the download.name.
download.name <- get.download.name('avsnp147')
# Database configuration file
database.cfg <- system.file('extdata', 'config/databases.toml', package = "annovarR")
# Get anno.name needed input cols
get.annotation.needcols('avsnp147')
#> [1] "chr" "start" "end" "ref" "alt"
# build sqlite database
for(i in c("hg19_ALL.sites.2015_08", "hg19_avsnp147")) {
database <- system.file("extdata", sprintf("demo/%s.txt", i), package = "annovarR")
sqlite.db <- sprintf("%s/%s.sqlite", tempdir(), i)
file.copy(database, sprintf("%s/%s.txt", tempdir(), i))
sqlite.build(database, sqlite.connect.params = list(dbname = sqlite.db, table.name = sprintf("%s",
i)))
}
# use the defined rule to annotate 1000 Genome Project frequency
database.dir <- tempdir()
chr <- c("chr1", "chr2", "chr1")
start <- c("10177", "10177", "10020")
end <- c("10177", "10177", "10020")
ref <- c("-", "A", "A")
alt <- c("C", "AC", "-")
dat <- data.table(chr = chr, start = start, end = end, ref = ref, alt = alt)
x <- annotation(dat = dat, anno.name = "1000g2015aug_all", database.dir = database.dir, db.type = "txt")
x
#> 1000g2015aug_all
#> 1: 0.425319
#> 2: NA
#> 3: NA
x <- annotation(dat = dat, anno.name = "1000g2015aug_all", database.dir = database.dir, db.type = "sqlite")
x
#> 1000g2015aug_all
#> 1: 0.425319
#> 2: NA
#> 3: NA
# Do annotation using full match function (default to use chr, start to select data
# and use chr, start, end, ref, and alt to match data)
# Use `?annotation.cols.match` to see more detail about `annotation.cols.match`
chr <- c("chr1", "chr2", "chr1")
start <- c("10020", "10020", "10020")
end <- c("10020", "10020", "10020")
ref <- c("A", "A", "A")
alt <- c("-", "-", "-")
dat <- data.table(chr = chr, start = start, end = end, ref = ref, alt = alt)
x <- annotation.cols.match(dat, "avsnp147", database.dir = database.dir,
return.col.names = "avSNP147", db.type = "sqlite")
x
#> avSNP147
#> 1: rs775809821
#> 2: NA
#> 3: rs775809821
# Region match mode
bed.file <- system.file("extdata", "demo/example.bed", package = "annovarR")
chr <- c("chr10", "chr1")
start <- c("100188904", "100185955")
end <- c("100188904", "100185955")
dat <- data.table(chr = chr, start = start, end = end)
# format.cols.plus.chr will add "chr" in chr colum
# if your input chr colum not contain string 'chr'
# format.db.region.tb will process the region matched data
#x <- annotation.region.match(dat = dat, database.dir = tempdir(), dbname.fixed = bed.file,
# table.name.fixed = "bed", db.type = "txt", format.dat.fun = "format.cols.plus.chr",
# format.db.tb.fun = "format.db.region.tb")
#x
# Convert snp rs number to genomic location
snp.id <- c("rs775809821", "rs768019142")
x <- annotation(dat = data.table(rs = rep(snp.id, 3)), database.dir = database.dir, anno.name = "rs2pos147",
buildver = "hg19", verbose = FALSE, db.type = "txt")
# Annotate avinput format R data using ANNOVAR
# set debug to TRUE will not to run command
chr = "chr1"
start = "123"
end = "123"
ref = "A"
alt = "C"
dat <- data.table(chr, start, end, ref, alt)
x <- annotation(dat, "perl_annovar_refGene", annovar.dir = "/opt/bin/annovar",
database.dir = "{{annovar.dir}}/humandb", debug = TRUE)
#> /usr/bin/perl /u4/jhuanglabbin/annovar/table_annovar.pl /tmp/RtmpsBkFv9/file3af747e0194f {annovar.dir}/humandb -buildver hg19 -remove -protocol refGene -operation g -nastring NA
# Annotate VCF file using ANNOVAR
# set debug to TRUE will not to run command
x <- annotation(anno.name = "perl_annovar_ensGene", input.file = "/tmp/test.vcf",
annovar.dir = "/opt/bin/annovar/", database.dir = "{{annovar.dir}}/humandb",
out = tempfile(), vcfinput = TRUE, debug = TRUE)
#> /usr/bin/perl /u4/jhuanglabbin/annovar/table_annovar.pl /tmp/test.vcf {annovar.dir}/humandb -buildver hg19 -out /tmp/RtmpsBkFv9/file3af732639362 -remove -protocol ensGene -operation g -nastring NA -vcfinput
# Annotation VCF file use VEP
vep(debug = TRUE)
#> vep --cache_version 91 --assembly GRCh37 --dir /home/ljf/.vep --output_file variant_effect_output.txt --cache --offline --everything
#> [1] "vep --cache_version 91 --assembly GRCh37 --dir /home/ljf/.vep --output_file variant_effect_output.txt --cache --offline --everything "
x <- annotation(anno.name = "vep_all", input.file = "/tmp/test.vcf",
out = tempfile(), debug = TRUE)
#> vep --cache_version 91 --assembly hg19 --dir /home/ljf/.vep --output_file /tmp/RtmpsBkFv9/file3af72133bad9 --input_file /tmp/test.vcf --cache --offline --everything
# Annotation VCF file use vcfanno
vcfanno(debug = TRUE)
#> vcfanno_linux64 -p 2 /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/conf.toml input.vcf > output.vcf
#> [1] "vcfanno_linux64 -p 2 /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/conf.toml input.vcf > output.vcf"
x <- annotation(anno.name = "vcfanno_demo", input.file = system.file("extdata", "demo/vcfanno_demo/query.vcf.gz",
package = "annovarR"), out = "test.vcf", vcfanno = "/path/vcfanno", debug = TRUE)
#> vcfanno_linux64 -base-path /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/ -lua /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/custom.lua -p 2 /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/conf.toml /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/query.vcf.gz > test.vcf
# Annotate gene from BioConductor org.hs.eg.db
gene <- c("TP53", "NSD2")
annotation(dat = gene, anno.name = "bioc_gene2alias")
#> 'select()' returned 1:many mapping between keys and columns
#> SYMBOL ALIAS
#> 1: TP53 BCC7
#> 2: TP53 LFS1
#> 3: TP53 P53
#> 4: TP53 TRP53
#> 5: TP53 TP53
#> 6: NSD2 KMT3F
#> 7: NSD2 KMT3G
#> 8: NSD2 MMSET
#> 9: NSD2 REIIBP
#> 10: NSD2 TRX5
#> 11: NSD2 WHS
#> 12: NSD2 WHSC1
#> 13: NSD2 NSD2