Title: | Transcriptome CUTteR |
---|---|
Description: | Various mRNA sequencing library preparation methods generate sequencing reads specifically from the transcript ends. Analyses that focus on quantification of isoform usage from such data can be aided by using truncated versions of transcriptome annotations, both at the alignment or pseudo-alignment stage, as well as in downstream analysis. This package implements some convenience methods for readily generating such truncated annotations and their corresponding sequences. |
Authors: | Mervin Fansler [aut, cre] |
Maintainer: | Mervin Fansler <[email protected]> |
License: | GPL-3 |
Version: | 1.13.0 |
Built: | 2024-11-19 04:43:22 UTC |
Source: | https://github.com/bioc/txcutr |
Internal function for operating on individual GRanges
, where ranges
represent exons in a transcript. This is designed to be used in an
*apply
function over a GRangesList
object.
.clipTranscript(gr, maxTxLength)
.clipTranscript(gr, maxTxLength)
gr |
a |
maxTxLength |
a positive integer |
the clipped GRanges
object
Convert GRanges to Single Range
.fillReduce(gr, validate = TRUE)
.fillReduce(gr, validate = TRUE)
gr |
a |
validate |
logical determining whether entries should be checked for compatible seqnames and strands. |
The validation assumes seqnames and strand are Rle
objects.
GRanges
with single interval
Efficient Metadata Columns Mutation
## S4 method for signature 'CompressedGRangesList' .mutateEach(grl, ...)
## S4 method for signature 'CompressedGRangesList' .mutateEach(grl, ...)
grl |
a CompressedGRangesList |
... |
named list of vectors to insert as metadata columns on each element
|
a CompressedGRangesList
with all element GRanges
updated
with supplied metadata columns
Propagate Transcript Merge Map
.propagateMap(df, MAXITERS = 1000)
.propagateMap(df, MAXITERS = 1000)
df |
a |
MAXITERS |
a numeric controlling the maximum number of iterations |
a converged data.frame
, such that, tx_out
is not
present in any tx_in
Export Transcriptome as FASTA
exportFASTA(txdb, genome, file, ...)
exportFASTA(txdb, genome, file, ...)
txdb |
a |
genome |
a |
file |
a string for output FASTA file. File names ending in ".gz" will automatically use gzip compression. |
... |
additional arguments to pass through to
|
The txdb
argument is invisibly returned.
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) library(BSgenome.Scerevisiae.UCSC.sacCer3) ## load annotation and genome txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene sacCer3 <- BSgenome.Scerevisiae.UCSC.sacCer3 ## restrict to 'chrI' transcripts (makes for briefer example runtime) seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) ## export uncompressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".fa") exportFASTA(txdb_w500, sacCer3, outfile) ## export compressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".fa.gz") exportFASTA(txdb_w500, sacCer3, outfile)
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) library(BSgenome.Scerevisiae.UCSC.sacCer3) ## load annotation and genome txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene sacCer3 <- BSgenome.Scerevisiae.UCSC.sacCer3 ## restrict to 'chrI' transcripts (makes for briefer example runtime) seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) ## export uncompressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".fa") exportFASTA(txdb_w500, sacCer3, outfile) ## export compressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".fa.gz") exportFASTA(txdb_w500, sacCer3, outfile)
Exports a TxDb annotation to a GTF file
exportGTF(txdb, file, source = "txcutr")
exportGTF(txdb, file, source = "txcutr")
txdb |
transcriptome to be output |
file |
a string or |
source |
a string to go in the |
The txdb
argument is invisibly returned.
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) ## export uncompressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".gtf") exportGTF(txdb_w500, outfile) ## export compressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".gtf.gz") exportGTF(txdb_w500, outfile)
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) ## export uncompressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".gtf") exportGTF(txdb_w500, outfile) ## export compressed outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".gtf.gz") exportGTF(txdb_w500, outfile)
Export Merge Table for Transcriptome
exportMergeTable(txdb, file, minDistance = 200L)
exportMergeTable(txdb, file, minDistance = 200L)
txdb |
a |
file |
a string or |
minDistance |
the minimum separation to regard overlapping transcripts as unique. |
The txdb
argument is invisibly returned.
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts (makes for briefer example runtime) seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) ## export plain format outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".tsv") exportMergeTable(txdb_w500, outfile) ## export compressed format outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".tsv.gz") exportMergeTable(txdb_w500, outfile)
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts (makes for briefer example runtime) seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) ## export plain format outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".tsv") exportMergeTable(txdb_w500, outfile) ## export compressed format outfile <- tempfile("sacCer3.sgdGene.w500", fileext=".tsv.gz") exportMergeTable(txdb_w500, outfile)
Generate Merge Table
generateMergeTable(txdb, minDistance = 200) ## S4 method for signature 'TxDb' generateMergeTable(txdb, minDistance = 200L)
generateMergeTable(txdb, minDistance = 200) ## S4 method for signature 'TxDb' generateMergeTable(txdb, minDistance = 200L)
txdb |
an object representing a transcriptome |
minDistance |
the minimum separation to regard overlapping transcripts as unique |
a data.frame
with three columns
- tx_in
the input transcript
- tx_out
the transcript merged into
- gene_out
the gene merged into
a data.frame
with three columns
- tx_in
the input transcript
- tx_out
the transcript merged into
- gene_out
the gene merged into
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) txdb_w500 ## last 100 nts per tx txdb_w100 <- truncateTxome(txdb, maxTxLength=100) txdb_w100
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) txdb_w500 ## last 100 nts per tx txdb_w100 <- truncateTxome(txdb, maxTxLength=100) txdb_w100
Truncate Transcriptome
truncateTxome(txdb, maxTxLength = 500, ...) ## S4 method for signature 'TxDb' truncateTxome(txdb, maxTxLength = 500, BPPARAM = bpparam())
truncateTxome(txdb, maxTxLength = 500, ...) ## S4 method for signature 'TxDb' truncateTxome(txdb, maxTxLength = 500, BPPARAM = bpparam())
txdb |
a |
maxTxLength |
the maximum length of transcripts |
... |
additional arguments |
BPPARAM |
A BiocParallelParam object specifying whether and how the method should be parallelized. |
a TxDb
object
a TxDb
object
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) txdb_w500 ## last 100 nts per tx txdb_w100 <- truncateTxome(txdb, maxTxLength=100) txdb_w100
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene ## restrict to 'chrI' transcripts seqlevels(txdb) <- c("chrI") ## last 500 nts per tx txdb_w500 <- truncateTxome(txdb) txdb_w500 ## last 100 nts per tx txdb_w100 <- truncateTxome(txdb, maxTxLength=100) txdb_w100
Convert TxDb object to GRangesList
txdbToGRangesList( txdb, geneCols = c("gene_id"), transcriptCols = c("gene_id", "tx_name"), exonCols = c("gene_id", "tx_name", "exon_id", "exon_rank") )
txdbToGRangesList( txdb, geneCols = c("gene_id"), transcriptCols = c("gene_id", "tx_name"), exonCols = c("gene_id", "tx_name", "exon_id", "exon_rank") )
txdb |
a |
geneCols |
names of columns to include in the |
transcriptCols |
names of columns to include in the |
exonCols |
names of columns to include in the |
a GRangesList
object with entries c(genes, transcripts, exons)
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene grl <- txdbToGRangesList(txdb) grl
library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene) ## load annotation txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene grl <- txdbToGRangesList(txdb) grl