| Title: | splicelogic: differential transcripts to splice events |
|---|---|
| Description: | Translate differential transcript usage results into discrete splice events. |
| Authors: | Beatriz Campillo [aut, cre] (ORCID: <https://orcid.org/0000-0001-7323-9125>), Michael Love [aut] (ORCID: <https://orcid.org/0000-0001-8401-0545>), NIH NHGRI [fnd], Wellcome Trust [fnd] |
| Maintainer: | Beatriz Campillo <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 1.1.2 |
| Built: | 2026-05-14 17:52:17 UTC |
| Source: | https://github.com/bioc/splicelogic |
For more details on the features of splicelogic, read the vignette:
browseVignettes(package = "splicelogic")
Maintainer: Beatriz Campillo [email protected] (ORCID)
Authors:
Michael Love [email protected] (ORCID)
Other contributors:
NIH NHGRI [funder]
Wellcome Trust [funder]
Useful links:
Report bugs at https://github.com/thelovelab/splicelogic/issues
Create mock GRanges data for splicing event testing
create_mock_data( n_genes = 1, n_tx_per_gene = 2, n_exons_per_tx = 5, coef_range = c(-1, 1) )create_mock_data( n_genes = 1, n_tx_per_gene = 2, n_exons_per_tx = 5, coef_range = c(-1, 1) )
n_genes |
Number of genes to simulate |
n_tx_per_gene |
Number of transcripts per gene |
n_exons_per_tx |
Number of exons per transcript |
coef_range |
Range of coefficient values to sample from |
A GRanges object with simulated transcripts and exons
# create mock data with 2 genes, 4 transcripts # per gene, and 4 exons per transcript gr <- create_mock_data(n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4)# create mock data with 2 genes, 4 transcripts # per gene, and 4 exons per transcript gr <- create_mock_data(n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4)
Functions to find different types of alternative splicing events from preprocessed GRanges exon data. Events include skipped exon (se), included exon (ie), mutatualy exclusive exons (mxe), retained intron (ri), and alternative 5' and 3' splice sites (a5ss / a3ss).
find_se(gr, type = c("boundary", "over", "in"), inverse = FALSE) find_ie(gr, type = c("boundary", "over", "in")) find_mxe(gr, type = c("boundary", "in", "over")) find_ri(gr) find_a5ss(gr) find_a3ss(gr) find_all_events(gr, type = c("boundary", "over", "in"), verbose = TRUE)find_se(gr, type = c("boundary", "over", "in"), inverse = FALSE) find_ie(gr, type = c("boundary", "over", "in")) find_mxe(gr, type = c("boundary", "in", "over")) find_ri(gr) find_a5ss(gr) find_a3ss(gr) find_all_events(gr, type = c("boundary", "over", "in"), verbose = TRUE)
gr |
A GRanges object with exon annotations, including 'tx_id', 'exon', and 'coef_col' metadata columns and preprocessed with preprocess(). |
type |
The type of overlap to consider when identifying events. |
inverse |
If TRUE, identifies included exons instead of skipped exons. |
verbose |
If TRUE, prints progress messages. Default TRUE. |
A GRanges object with the detected exon ranges and the following additional metadata columns:
event_typeThe type of splicing event detected (e.g.
"se", "ie", "mxe", "ri", "a5ss",
"a3ss").
event_tx_idTranscript ID of the paired transcript involved in the event.
event_estimateDTU coefficient of the paired transcript.
event_<col>One column per name in
metadata(gr)$additional_columns, prefixed with event_,
carrying the corresponding value from the paired transcript.
find_se(): skipped exons
find_ie(): included exons
find_mxe(): mutually exclusive exons
find_ri(): retained introns
find_a5ss(): alternative 5' splice sites
find_a3ss(): alternative 3' splice sites
find_all_events(): all detected events
# make some mock data and run the function gr <- create_mock_data(n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4) |> preprocess(coef_col = "estimate") |> generate_se(n_events = 1) # this should find the skipped exon events we generated find_se(gr, type = "boundary") find_ie(gr, type = "boundary") # detect mutually exclusive exons gr_mx <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_mxe(n_events = 1) find_mxe(gr_mx, type = "boundary") # detect retained introns gr_ri <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_ri(n_events = 1) find_ri(gr_ri) # detect alternative 5' splice sites gr_a5 <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_a5ss(n_events = 1) find_a5ss(gr_a5) # detect alternative 3' splice sites gr_a3 <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_a3ss(n_events = 1) find_a3ss(gr_a3) # detect all event types at once gr_all <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_se(n_events = 1) find_all_events(gr_all, type = "boundary", verbose = FALSE)# make some mock data and run the function gr <- create_mock_data(n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4) |> preprocess(coef_col = "estimate") |> generate_se(n_events = 1) # this should find the skipped exon events we generated find_se(gr, type = "boundary") find_ie(gr, type = "boundary") # detect mutually exclusive exons gr_mx <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_mxe(n_events = 1) find_mxe(gr_mx, type = "boundary") # detect retained introns gr_ri <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_ri(n_events = 1) find_ri(gr_ri) # detect alternative 5' splice sites gr_a5 <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_a5ss(n_events = 1) find_a5ss(gr_a5) # detect alternative 3' splice sites gr_a3 <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_a3ss(n_events = 1) find_a3ss(gr_a3) # detect all event types at once gr_all <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) |> preprocess(coef_col = "estimate") |> generate_se(n_events = 1) find_all_events(gr_all, type = "boundary", verbose = FALSE)
Functions to introduce specific types of alternative splicing events into mock GRanges data for testing purposes.
generate_se(gr, n_events = 1) generate_mxe(gr, n_events = 1) generate_ri(gr, n_events = 1) generate_a5ss(gr, n_events = 1) generate_a3ss(gr, n_events = 1)generate_se(gr, n_events = 1) generate_mxe(gr, n_events = 1) generate_ri(gr, n_events = 1) generate_a5ss(gr, n_events = 1) generate_a3ss(gr, n_events = 1)
gr |
A GRanges object with metadata columns: 'exon_rank', 'gene_id', 'tx_id', and 'estimate'. |
n_events |
Number of events to generate |
generate_se(): A GRanges object with skipped exon
events introduced
generate_mxe(): A GRanges object with mutually exclusive exon
events introduced
generate_ri(): A GRanges object with retained intron
events introduced
generate_a5ss(): A GRanges object with alternative 5' splice site
events introduced
generate_a3ss(): A GRanges object with alternative 3' splice site
events introduced
gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_se(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_mxe(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_ri(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_a5ss(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_a3ss(gr, n_events = 1)gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_se(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_mxe(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_ri(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_a5ss(gr, n_events = 1) gr <- create_mock_data( n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4 ) generate_a3ss(gr, n_events = 1)
Extracts exon ranges from a TxDb object, merges them with
differential transcript usage (DTU) results, and returns a flat
GRanges ready for preprocess.
prepare_exons( txdb, dtu_table, coef_col, tx_id_col = "tx_id", gene_id_col = "gene_id", verbose = TRUE )prepare_exons( txdb, dtu_table, coef_col, tx_id_col = "tx_id", gene_id_col = "gene_id", verbose = TRUE )
txdb |
A |
dtu_table |
A data.frame or tibble with DTU results. Must contain columns for transcript ID, gene ID, and a coefficient. |
coef_col |
Column name in |
tx_id_col |
Column name in |
gene_id_col |
Column name in |
verbose |
Whether to print progress messages. Default |
A GRanges object with metadata columns: gene_id,
tx_id, exon_rank, the coefficient column, and any
additional columns from dtu_table.
library(AnnotationHub) library(AnnotationDbi) library(GenomicFeatures) library(tibble) ah <- AnnotationHub() txdb <- ah[["AH84134"]] # fly TxDb (Drosophila melanogaster) # build a simulated DTU table from the TxDb transcripts txps <- txdb |> AnnotationDbi::select( keys(txdb, "TXID"), c("TXNAME", "GENEID"), "TXID" ) |> tibble::as_tibble() |> dplyr::select(tx_id = TXNAME, gene_id = GENEID)|> dplyr::filter(!is.na(gene_id)) sim_dtu_table <- txps |> dplyr::mutate( padj = runif(dplyr::n()), effect_est = rnorm(dplyr::n()) ) fly_exons <- prepare_exons( txdb, sim_dtu_table, coef_col = "effect_est", verbose = TRUE )library(AnnotationHub) library(AnnotationDbi) library(GenomicFeatures) library(tibble) ah <- AnnotationHub() txdb <- ah[["AH84134"]] # fly TxDb (Drosophila melanogaster) # build a simulated DTU table from the TxDb transcripts txps <- txdb |> AnnotationDbi::select( keys(txdb, "TXID"), c("TXNAME", "GENEID"), "TXID" ) |> tibble::as_tibble() |> dplyr::select(tx_id = TXNAME, gene_id = GENEID)|> dplyr::filter(!is.na(gene_id)) sim_dtu_table <- txps |> dplyr::mutate( padj = runif(dplyr::n()), effect_est = rnorm(dplyr::n()) ) fly_exons <- prepare_exons( txdb, sim_dtu_table, coef_col = "effect_est", verbose = TRUE )
Combines two transcript partitions (up- and down-regulated) and assigns
an estimate coefficient: +1 to up and -1 to
down. Accepts either GRanges objects or character vectors of
transcript IDs (in which case txdb is required to look up exon
coordinates). The result is ready to pass to preprocess
with coef_col = "estimate".
prepare_exons_by_partition( up, down, txdb = NULL, tx_id_col = "TXNAME", verbose = TRUE )prepare_exons_by_partition( up, down, txdb = NULL, tx_id_col = "TXNAME", verbose = TRUE )
up |
A GRanges object or character vector of transcript IDs for
the upregulated partition (assigned |
down |
A GRanges object or character vector of transcript IDs for
the downregulated partition (assigned |
txdb |
A |
tx_id_col |
The keytype in |
verbose |
Whether to print progress messages. Default |
When up and down are GRanges, both must have
exon_rank, gene_id, and tx_id metadata columns.
Extra columns are kept; if one object lacks a column present in the
other, those entries receive NA.
A combined GRanges object with an estimate column
(+1 for up, -1 for down),
ready for preprocess.
# GRanges input gr <- create_mock_data(n_genes = 1, n_tx_per_gene = 4, n_exons_per_tx = 4) gr <- generate_se(gr, n_events = 1) gr_down <- gr[gr$estimate < 0] gr_up <- gr[gr$estimate > 0] prepare_exons_by_partition(gr_up, gr_down) |> preprocess(coef_col = "estimate")# GRanges input gr <- create_mock_data(n_genes = 1, n_tx_per_gene = 4, n_exons_per_tx = 4) gr <- generate_se(gr, n_events = 1) gr_down <- gr[gr$estimate < 0] gr_up <- gr[gr$estimate > 0] prepare_exons_by_partition(gr_up, gr_down) |> preprocess(coef_col = "estimate")
This function checks that the input is a valid GRanges object with required metadata columns, then adds a unique key, the number of exons per transcript, and an 'internal' flag for each exon.
preprocess(gr, coef_col, method_string = NULL, additional_columns = NULL)preprocess(gr, coef_col, method_string = NULL, additional_columns = NULL)
gr |
A GRanges object with metadata columns: 'exon_rank', 'gene_id', 'tx_id', 'coef'. |
coef_col |
The name of the metadata column indicating upregulated (+1) and downregulated (-1) exons. |
method_string |
The Differential Transcript Usage (DTU) method used to obtain the coef_col, for annotation purposes (optional). |
additional_columns |
A character vector of metadata column
names to record for downstream use. Stored in
|
A GRanges object with added 'key', 'nexons', and 'internal' columns.
# create mock data and run preprocessing gr <- create_mock_data(n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4) |> preprocess(coef_col = "estimate", method_string = "mock_method")# create mock data and run preprocessing gr <- create_mock_data(n_genes = 2, n_tx_per_gene = 4, n_exons_per_tx = 4) |> preprocess(coef_col = "estimate", method_string = "mock_method")