Fetch homozygous genotypes of inbred mouse strains

Introduction

This R package provides methods for genetic finemapping in inbred mice by taking advantage of their very high homozygosity rate (>95%).

Method fetch allows to download homozygous genotypes of 37 inbred mouse strains for a given genetic region.

Installation

if(!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("MouseFM")

Loading package

library(MouseFM)
#> 
#>   ---------
#>   For example usage please run: vignette('MouseFM')
#>   
#>   Support me: http://matthiasmunz.de/support_me
#> 
#>   Citation appreciated:
#>   Munz M, Khodaygani M, Aherrahrou Z, Busch H, Wohlers I (2021) In silico candidate variant and gene identification using inbred mouse strains. PeerJ. doi:10.7717/peerj.11017
#>   
#>   Github Repo: https://github.com/matmu/MouseFM
#>   MouseFM Backend: https://github.com/matmu/MouseFM-Backend
#>   ---------

Example function calls

Fetch genotypes for region chr1:5000000-6000000.

df = fetch("chr1", start=5000000, end=6000000)
#> Query chr1:5,000,000-6,000,000

df[1:10,]
#>    chr     pos        rsid ref alt most_severe_consequence
#> 1    1 5000016  rs47088541   A   T          intron_variant
#> 2    1 5000029  rs48827827   G   A          intron_variant
#> 3    1 5000057  rs48099867   C   T          intron_variant
#> 4    1 5000062 rs246021564   G   C          intron_variant
#> 5    1 5000067 rs265132353   C   T          intron_variant
#> 6    1 5000068  rs51419610   A   G          intron_variant
#> 7    1 5000101 rs253320650   C   G          intron_variant
#> 8    1 5000156        <NA>   C   T          intron_variant
#> 9    1 5000157 rs216747169   G   A          intron_variant
#> 10   1 5000240        <NA>   T   G          intron_variant
#>                                                           consequences C57BL_6J
#> 1  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 2  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 3  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 4  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 5  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 6  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 7  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 8  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 9  non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#> 10 non_coding_transcript_variant,intron_variant,NMD_transcript_variant        0
#>    129P2_OlaHsd 129S1_SvImJ 129S5SvEvBrd AKR_J A_J BALB_cJ BTBR BUB_BnJ C3H_HeH
#> 1             0           0            0     0   0       0    0       0       1
#> 2             0           0            0     0   0       0    0       0       1
#> 3             0           0            0     0   0       0    0       0       1
#> 4             0           0            0     0   0       0    0       0       1
#> 5             0           0            0     0   0       0    0       0       1
#> 6             0           0            0     0   0       0    0       0       1
#> 7             0           0            0     0   0       0    0       0       1
#> 8             0           0            0     0   0       0    0       0       0
#> 9             0           0            0     0   0       0    0       0       1
#> 10            0           0            0     0   0       0    0       0       0
#>    C3H_HeJ C57BL_10J C57BL_6NJ C57BR_cdJ C57L_J C58_J CAST_EiJ CBA_J DBA_1J
#> 1        1         0         0         0      0     0        1     1      1
#> 2        1         0         0         0      0     0        0     1      1
#> 3        1         0         0         0      0     0        0     1      1
#> 4        1         0         0         0      0     0        0     1      1
#> 5        1         0         0         0      0     0        0     1      1
#> 6        1         0         0         0      0     0        0     1      1
#> 7        1         0         0         0      0     0        0     1      1
#> 8        0         0         0         0      0     0        0     0      0
#> 9        1         0         0         0      0     0        0     1      0
#> 10       0         0         0         0      0     0        0     0      0
#>    DBA_2J FVB_NJ I_LnJ KK_HiJ LEWES_EiJ LP_J MOLF_EiJ NOD_ShiLtJ NZB_B1NJ
#> 1       1      0     0      0         1    0        0          0        1
#> 2       1      0     0      0         1    0        0          0        0
#> 3       1      0     0      0         1    0        0          0        0
#> 4       1      0     0      0         1    0        0          0        0
#> 5       1      0     0      0         1    0        0          0        0
#> 6       1      0     0      0         1    0        0          0        0
#> 7       1      0     0      0         1    0        0          0        0
#> 8       0      0     0      0         0    0        0          0        1
#> 9       0      0     0      0         1    0        0          0        0
#> 10      0      0     0      0         0    0        0          0        1
#>    NZO_HlLtJ NZW_LacJ PWK_PhJ RF_J SEA_GnJ SPRET_EiJ ST_bJ WSB_EiJ ZALENDE_EiJ
#> 1          0        0       1    1       0         1     0       1           1
#> 2          0        0       1    1       0         1     0       1           1
#> 3          0        0       1    1       0         1     0       1           1
#> 4          0        0       1    1       0         1     0       1           1
#> 5          0        0       1    1       0         0     0       1           1
#> 6          0        0       1    1       0         1     0       1           1
#> 7          0        0       1    1       0         1     0       1           1
#> 8          0        0       0    0       0         0     0       0           0
#> 9          0        0       0    1       0         0     0       1           1
#> 10         0        0       0    0       0         0     0       0           0

View meta information

comment(df)
#> [1] "#Alleles of strain C57BL_6J represent the reference (ref) alleles"
#> [2] "#reference_version=GRCm38"

Output of Session Info

The output of sessionInfo() on the system on which this document was compiled:

sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] MouseFM_1.17.0   BiocStyle_2.35.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] KEGGREST_1.47.0         gtable_0.3.6            ggplot2_3.5.1          
#>  [4] xfun_0.49               bslib_0.8.0             httr2_1.0.7            
#>  [7] rlist_0.4.6.2           Biobase_2.67.0          vctrs_0.6.5            
#> [10] tools_4.4.2             generics_0.1.3          curl_6.0.1             
#> [13] stats4_4.4.2            tibble_3.2.1            fansi_1.0.6            
#> [16] AnnotationDbi_1.69.0    RSQLite_2.3.8           blob_1.2.4             
#> [19] pkgconfig_2.0.3         data.table_1.16.2       dbplyr_2.5.0           
#> [22] S4Vectors_0.45.2        lifecycle_1.0.4         GenomeInfoDbData_1.2.13
#> [25] compiler_4.4.2          stringr_1.5.1           Biostrings_2.75.1      
#> [28] progress_1.2.3          munsell_0.5.1           GenomeInfoDb_1.43.2    
#> [31] htmltools_0.5.8.1       sys_3.4.3               buildtools_1.0.0       
#> [34] sass_0.4.9              yaml_2.3.10             tidyr_1.3.1            
#> [37] pillar_1.9.0            crayon_1.5.3            jquerylib_0.1.4        
#> [40] cachem_1.1.0            gtools_3.9.5            tidyselect_1.2.1       
#> [43] digest_0.6.37           stringi_1.8.4           purrr_1.0.2            
#> [46] reshape2_1.4.4          dplyr_1.1.4             maketools_1.3.1        
#> [49] grid_4.4.2              biomaRt_2.63.0          fastmap_1.2.0          
#> [52] colorspace_2.1-1        cli_3.6.3               magrittr_2.0.3         
#> [55] utf8_1.2.4              scales_1.3.0            filelock_1.0.3         
#> [58] prettyunits_1.2.0       UCSC.utils_1.3.0        rappdirs_0.3.3         
#> [61] bit64_4.5.2             rmarkdown_2.29          XVector_0.47.0         
#> [64] httr_1.4.7              bit_4.5.0               png_0.1-8              
#> [67] hms_1.1.3               memoise_2.0.1           evaluate_1.0.1         
#> [70] knitr_1.49              GenomicRanges_1.59.1    IRanges_2.41.1         
#> [73] BiocFileCache_2.15.0    rlang_1.1.4             Rcpp_1.0.13-1          
#> [76] glue_1.8.0              DBI_1.2.3               xml2_1.3.6             
#> [79] BiocManager_1.30.25     BiocGenerics_0.53.3     jsonlite_1.8.9         
#> [82] plyr_1.8.9              R6_2.5.1                zlibbioc_1.52.0