A note on fimo16 in TFutils

Introduction

Sequence-based TF affinity scoring can be conducted with the FIMO suite, see @Sonawane2017. We have serialized an object with references to FIMO outputs for 16 TFs.

suppressPackageStartupMessages({
library(TFutils)
library(GenomicRanges)
})
fimo16
## GenomicFiles object with 1 ranges and 16 files: 
## files: M0635_1.02sort.bed.gz, M3433_1.02sort.bed.gz, ..., M6159_1.02sort.bed.gz, M6497_1.02sort.bed.gz 
## detail: use files(), rowRanges(), colData(), ...

While the token bed is used in the filenames, the files are not actually bed format!

Importing with scanTabix

We can use reduceByRange to import selected scans.

if (.Platform$OS.type != "windows") {
 si = TFutils::seqinfo_hg19_chr17
 myg = GRanges("chr17", IRanges(38.07e6,38.09e6), seqinfo=si)
 colnames(fimo16) = fimo16$HGNC 
 lk2 = reduceByRange(fimo16[, c("POU2F1", "VDR")],
   MAP=function(r,f) scanTabix(f, param=r))
 str(lk2)
}
## List of 1
##  $ :List of 2
##   ..$ POU2F1:List of 1
##   .. ..$ chr17:38077000-38084000: chr [1:12] "chr17\t38077313\t38077331\tchr17:38077313-38077331\t2.15306\t+\t0.000352" "chr17\t38078549\t38078567\tchr17:38078549-38078567\t0.102041\t-\t0.000634" "chr17\t38078556\t38078574\tchr17:38078556-38078574\t-0.0408163\t-\t0.00066" "chr17\t38080045\t38080063\tchr17:38080045-38080063\t1.66327\t-\t0.000407" ...
##   ..$ VDR   :List of 1
##   .. ..$ chr17:38077000-38084000: chr [1:18] "chr17\t38077445\t38077460\tchr17:38077445-38077460\t-1.9899\t-\t0.000666" "chr17\t38078536\t38078551\tchr17:38078536-38078551\t-2.66667\t-\t0.0008" "chr17\t38078574\t38078589\tchr17:38078574-38078589\t1.65657\t+\t0.000235" "chr17\t38078796\t38078811\tchr17:38078796-38078811\t2.07071\t-\t0.000207" ...

This result can be massaged into a GRanges or other desirable structure. fimo_granges takes care of this.

#fimo_ranges = function(gf, query) { # prototypical code
# rowRanges(gf) = query
# ans = reduceByRange(gf, MAP=function(r,f) scanTabix(f, param=r))
# ans = unlist(ans, recursive=FALSE)  # drop top list structure
# tabs = lapply(ans, lapply, function(x) {
#     con = textConnection(x)
#     on.exit(close(con))
#     dtf = read.delim(con, h=FALSE, stringsAsFactors=FALSE, sep="\t")
#     colnames(dtf) = c("chr", "start", "end", "rname", "score", "dir", "pval")
#     ans = with(dtf, GRanges(seqnames=chr, IRanges(start, end),
#            rname=rname, score=score, dir=dir, pval=pval))
#     ans
#     })
# GRangesList(unlist(tabs, recursive=FALSE))
#}
if (.Platform$OS.type != "windows") {
 rr = fimo_granges(fimo16[, c("POU2F1", "VDR")], myg)
 rr
}
## $POU2F1
## $POU2F1$`chr17:38070000-38090000`
## GRanges object with 76 ranges and 4 metadata columns:
##        seqnames            ranges strand |                  rname      score
##           <Rle>         <IRanges>  <Rle> |            <character>  <numeric>
##    [1]    chr17 38070239-38070257      * | chr17:38070239-38070.. -1.5408200
##    [2]    chr17 38070579-38070597      * | chr17:38070579-38070.. -0.9693880
##    [3]    chr17 38070851-38070869      * | chr17:38070851-38070..  0.1224490
##    [4]    chr17 38071025-38071043      * | chr17:38071025-38071..  0.0918367
##    [5]    chr17 38071253-38071271      * | chr17:38071253-38071..  3.6734700
##    ...      ...               ...    ... .                    ...        ...
##   [72]    chr17 38088602-38088620      * | chr17:38088602-38088..    4.06122
##   [73]    chr17 38088637-38088655      * | chr17:38088637-38088..   11.69390
##   [74]    chr17 38089141-38089159      * | chr17:38089141-38089..   13.18370
##   [75]    chr17 38089439-38089457      * | chr17:38089439-38089..   -1.35714
##   [76]    chr17 38089822-38089840      * | chr17:38089822-38089..    3.67347
##                dir      pval
##        <character> <numeric>
##    [1]           +  0.000989
##    [2]           -  0.000849
##    [3]           -  0.000631
##    [4]           -  0.000636
##    [5]           +  0.000222
##    ...         ...       ...
##   [72]           +  1.98e-04
##   [73]           -  1.32e-05
##   [74]           -  7.09e-06
##   [75]           -  9.42e-04
##   [76]           -  2.22e-04
##   -------
##   seqinfo: 1 sequence from hg19 genome
## 
## 
## $VDR
## $VDR$`chr17:38070000-38090000`
## GRanges object with 40 ranges and 4 metadata columns:
##        seqnames            ranges strand |                  rname      score
##           <Rle>         <IRanges>  <Rle> |            <character>  <numeric>
##    [1]    chr17 38070016-38070031      * | chr17:38070016-38070..  0.0505051
##    [2]    chr17 38070387-38070402      * | chr17:38070387-38070.. -3.3838400
##    [3]    chr17 38070925-38070940      * | chr17:38070925-38070..  5.7171700
##    [4]    chr17 38071183-38071198      * | chr17:38071183-38071.. -0.6767680
##    [5]    chr17 38072289-38072304      * | chr17:38072289-38072.. -0.3333330
##    ...      ...               ...    ... .                    ...        ...
##   [36]    chr17 38086915-38086930      * | chr17:38086915-38086..  -0.353535
##   [37]    chr17 38087304-38087319      * | chr17:38087304-38087..   0.101010
##   [38]    chr17 38087866-38087881      * | chr17:38087866-38087..   5.343430
##   [39]    chr17 38088893-38088908      * | chr17:38088893-38088..  -0.111111
##   [40]    chr17 38089214-38089229      * | chr17:38089214-38089..   1.101010
##                dir      pval
##        <character> <numeric>
##    [1]           +  3.77e-04
##    [2]           +  9.66e-04
##    [3]           -  6.44e-05
##    [4]           +  4.63e-04
##    [5]           -  4.21e-04
##    ...         ...       ...
##   [36]           -  0.000423
##   [37]           -  0.000371
##   [38]           +  0.000073
##   [39]           -  0.000395
##   [40]           -  0.000277
##   -------
##   seqinfo: 1 sequence from hg19 genome
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] UpSetR_1.4.0                magrittr_2.0.3             
##  [3] dplyr_1.1.4                 gwascat_2.37.0             
##  [5] GSEABase_1.67.0             graph_1.83.0               
##  [7] annotate_1.83.0             XML_3.99-0.17              
##  [9] png_0.1-8                   ggplot2_3.5.1              
## [11] knitr_1.48                  data.table_1.16.0          
## [13] GO.db_3.19.1                GenomicFiles_1.41.0        
## [15] rtracklayer_1.65.0          Rsamtools_2.21.1           
## [17] Biostrings_2.73.1           XVector_0.45.0             
## [19] BiocParallel_1.39.0         SummarizedExperiment_1.35.1
## [21] GenomicRanges_1.57.1        GenomeInfoDb_1.41.1        
## [23] MatrixGenerics_1.17.0       matrixStats_1.3.0          
## [25] org.Hs.eg.db_3.19.1         AnnotationDbi_1.67.0       
## [27] IRanges_2.39.2              S4Vectors_0.43.2           
## [29] Biobase_2.65.1              BiocGenerics_0.51.0        
## [31] TFutils_1.25.0              BiocStyle_2.33.1           
## 
## loaded via a namespace (and not attached):
##  [1] DBI_1.2.3                bitops_1.0-8             gridExtra_2.3           
##  [4] readxl_1.4.3             rlang_1.1.4              compiler_4.4.1          
##  [7] RSQLite_2.3.7            GenomicFeatures_1.57.0   vctrs_0.6.5             
## [10] pkgconfig_2.0.3          crayon_1.5.3             fastmap_1.2.0           
## [13] dbplyr_2.5.0             labeling_0.4.3           utf8_1.2.4              
## [16] promises_1.3.0           rmarkdown_2.28           tzdb_0.4.0              
## [19] UCSC.utils_1.1.0         bit_4.0.5                xfun_0.47               
## [22] zlibbioc_1.51.1          cachem_1.1.0             jsonlite_1.8.8          
## [25] blob_1.2.4               highr_0.11               later_1.3.2             
## [28] DelayedArray_0.31.11     parallel_4.4.1           R6_2.5.1                
## [31] VariantAnnotation_1.51.0 bslib_0.8.0              jquerylib_0.1.4         
## [34] cellranger_1.1.0         Rcpp_1.0.13              readr_2.1.5             
## [37] splines_4.4.1            httpuv_1.6.15            Matrix_1.7-0            
## [40] tidyselect_1.2.1         abind_1.4-5              yaml_2.3.10             
## [43] codetools_0.2-20         miniUI_0.1.1.1           curl_5.2.2              
## [46] plyr_1.8.9               lattice_0.22-6           tibble_3.2.1            
## [49] withr_3.0.1              shiny_1.9.1              KEGGREST_1.45.1         
## [52] evaluate_0.24.0          survival_3.7-0           BiocFileCache_2.13.0    
## [55] snpStats_1.55.0          pillar_1.9.0             BiocManager_1.30.25     
## [58] filelock_1.0.3           generics_0.1.3           RCurl_1.98-1.16         
## [61] hms_1.1.3                munsell_0.5.1            scales_1.3.0            
## [64] xtable_1.8-4             glue_1.7.0               maketools_1.3.0         
## [67] tools_4.4.1              BiocIO_1.15.2            sys_3.4.2               
## [70] BSgenome_1.73.0          GenomicAlignments_1.41.0 buildtools_1.0.0        
## [73] colorspace_2.1-1         GenomeInfoDbData_1.2.12  restfulr_0.0.15         
## [76] cli_3.6.3                fansi_1.0.6              S4Arrays_1.5.7          
## [79] gtable_0.3.5             sass_0.4.9               digest_0.6.37           
## [82] SparseArray_1.5.31       farver_2.1.2             rjson_0.2.22            
## [85] memoise_2.0.1            htmltools_0.5.8.1        lifecycle_1.0.4         
## [88] httr_1.4.7               mime_0.12                bit64_4.0.5