Perform Genomic Liftover

Getting Started

Installation

if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("easylift")

Documentation

To view documentation:

browseVignettes("easylift")

Import

library("easylift")
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: generics
#> 
#> Attaching package: 'generics'
#> The following objects are masked from 'package:base':
#> 
#>     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
#>     setequal, union
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:generics':
#> 
#>     intersect, setdiff, setequal, union
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, saveRDS, setdiff,
#>     setequal, table, tapply, union, unique, unsplit, which.max,
#>     which.min
#> Loading required package: S4Vectors
#> Warning: multiple methods tables found for 'setequal'
#> 
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:utils':
#> 
#>     findMatches
#> The following objects are masked from 'package:base':
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'IRanges'
#> Loading required package: GenomeInfoDb
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'GenomeInfoDb'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'GenomicRanges'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'XVector'
#> Loading required package: BiocFileCache
#> Loading required package: dbplyr
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'rtracklayer'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'Biostrings'
#> Warning: multiple methods tables found for 'setequal'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'GenomicAlignments'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'SummarizedExperiment'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'S4Arrays'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'DelayedArray'
#> Warning: replacing previous import 'BiocGenerics::setequal' by
#> 'S4Vectors::setequal' when loading 'SparseArray'
#> Warning: replacing previous import 'S4Arrays::read_block' by
#> 'DelayedArray::read_block' when loading 'SummarizedExperiment'

Arguments

Create a GRanges object, assign a genome to it, and specify chain file

gr <- GRanges(
  seqname = Rle(
    c("chr1", "chr2"), 
    c(100000, 100000)
  ),
  ranges = IRanges(
    start = 1, end = 200000
  )
)
# Here, "hg19" is the source genome
genome(gr) <- "hg19"

# Here, we use the `system.file()` function because the chain file is in the
# package (however if you need to point to any other file on your machine,
# just do 'chain <- "path/to/your/hg19ToHg38.over.chain.gz"'):
chain <- system.file("extdata", "hg19ToHg38.over.chain.gz", package = "easylift")

gr
#> GRanges object with 200000 ranges and 0 metadata columns:
#>            seqnames    ranges strand
#>               <Rle> <IRanges>  <Rle>
#>        [1]     chr1  1-200000      *
#>        [2]     chr1  1-200000      *
#>        [3]     chr1  1-200000      *
#>        [4]     chr1  1-200000      *
#>        [5]     chr1  1-200000      *
#>        ...      ...       ...    ...
#>   [199996]     chr2  1-200000      *
#>   [199997]     chr2  1-200000      *
#>   [199998]     chr2  1-200000      *
#>   [199999]     chr2  1-200000      *
#>   [200000]     chr2  1-200000      *
#>   -------
#>   seqinfo: 2 sequences from hg19 genome; no seqlengths

Run

Call easylift with GRanges object, target genome and the chain file.

# Here, "hg38" is the target genome
easylift(gr, "hg38", chain)
#> GRanges object with 300000 ranges and 0 metadata columns:
#>            seqnames        ranges strand
#>               <Rle>     <IRanges>  <Rle>
#>        [1]     chr1  10001-177376      *
#>        [2]    chr19 242824-242864      *
#>        [3]     chr1  10001-177376      *
#>        [4]    chr19 242824-242864      *
#>        [5]     chr1  10001-177376      *
#>        ...      ...           ...    ...
#>   [299996]     chr2  10001-200000      *
#>   [299997]     chr2  10001-200000      *
#>   [299998]     chr2  10001-200000      *
#>   [299999]     chr2  10001-200000      *
#>   [300000]     chr2  10001-200000      *
#>   -------
#>   seqinfo: 25 sequences (1 circular) from hg38 genome

Run with BiocFileCache

To use BiocFileCache for the chain file, add it to the cache:

chain_file <- "/path/to/your/hg19ToHg38.over.chain.gz"
bfc <- BiocFileCache()

# Add chain file to cache if already not available
if (nrow(bfcquery(bfc, basename(chain_file))) == 0)
    bfcadd(bfc, chain_file)

Then, use it in easylift:

easylift(gr, "hg38") 
# or
gr |> easylift("hg38")

Citation

To cite package easylift in publications use:

Al Nahid A, Pagès H, Love M (2023). easylift: An R package to perform genomic liftover. R package version 1.0.0, https://github.com/nahid18/easylift.

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {easylift: An R package to perform genomic liftover},
    author = {Abdullah Al Nahid, Hervé Pagès, Michael Love},
    year = {2023},
    note = {R package version 1.0.0},
    url = {https://github.com/nahid18/easylift},
  }

Please note that the easylift was only made possible thanks to many other R and bioinformatics software authors, which are cited either in the vignettes and/or the paper(s) describing this package.

Code of Conduct

Please note that the easylift project is released with a Contributor Code of Conduct. By contributing to this project, you agree to abide by its terms.

Session information

sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] easylift_1.5.0       BiocFileCache_2.15.0 dbplyr_2.5.0        
#>  [4] GenomicRanges_1.59.0 GenomeInfoDb_1.43.0  IRanges_2.41.0      
#>  [7] S4Vectors_0.45.0     BiocGenerics_0.53.1  generics_0.1.3      
#> [10] BiocStyle_2.35.0    
#> 
#> loaded via a namespace (and not attached):
#>  [1] SummarizedExperiment_1.37.0 rjson_0.2.23               
#>  [3] xfun_0.48                   bslib_0.8.0                
#>  [5] lattice_0.22-6              Biobase_2.67.0             
#>  [7] vctrs_0.6.5                 tools_4.4.1                
#>  [9] bitops_1.0-9                parallel_4.4.1             
#> [11] curl_5.2.3                  tibble_3.2.1               
#> [13] fansi_1.0.6                 RSQLite_2.3.7              
#> [15] blob_1.2.4                  pkgconfig_2.0.3            
#> [17] R.oo_1.26.0                 Matrix_1.7-1               
#> [19] lifecycle_1.0.4             GenomeInfoDbData_1.2.13    
#> [21] compiler_4.4.1              Rsamtools_2.23.0           
#> [23] Biostrings_2.75.0           codetools_0.2-20           
#> [25] htmltools_0.5.8.1           sys_3.4.3                  
#> [27] buildtools_1.0.0            sass_0.4.9                 
#> [29] RCurl_1.98-1.16             yaml_2.3.10                
#> [31] pillar_1.9.0                crayon_1.5.3               
#> [33] jquerylib_0.1.4             R.utils_2.12.3             
#> [35] BiocParallel_1.41.0         DelayedArray_0.33.1        
#> [37] cachem_1.1.0                abind_1.4-8                
#> [39] tidyselect_1.2.1            digest_0.6.37              
#> [41] dplyr_1.1.4                 restfulr_0.0.15            
#> [43] maketools_1.3.1             grid_4.4.1                 
#> [45] fastmap_1.2.0               SparseArray_1.7.0          
#> [47] cli_3.6.3                   magrittr_2.0.3             
#> [49] S4Arrays_1.7.1              XML_3.99-0.17              
#> [51] utf8_1.2.4                  filelock_1.0.3             
#> [53] UCSC.utils_1.3.0            bit64_4.5.2                
#> [55] rmarkdown_2.28              XVector_0.47.0             
#> [57] httr_1.4.7                  matrixStats_1.4.1          
#> [59] bit_4.5.0                   R.methodsS3_1.8.2          
#> [61] memoise_2.0.1               evaluate_1.0.1             
#> [63] knitr_1.48                  BiocIO_1.17.0              
#> [65] rtracklayer_1.67.0          rlang_1.1.4                
#> [67] glue_1.8.0                  DBI_1.2.3                  
#> [69] BiocManager_1.30.25         jsonlite_1.8.9             
#> [71] R6_2.5.1                    MatrixGenerics_1.19.0      
#> [73] GenomicAlignments_1.43.0    zlibbioc_1.51.2