Title: | Splice Interpreter of Transcripts |
---|---|
Description: | Provides tools to analyze alternative splicing sites, interpret outcomes based on sequence information, select and design primers for site validiation and give visual representation of the event to guide downstream experiments. |
Authors: | Diana Low [aut, cre] |
Maintainer: | Diana Low <[email protected]> |
License: | GPL-2 |
Version: | 1.33.0 |
Built: | 2024-11-18 04:44:02 UTC |
Source: | https://github.com/bioc/SPLINTER |
Acceptor site mammalian frequency matrices for GT-AG pairs from SpliceDB
data("acceptor.m")
data("acceptor.m")
The format is: num [1:4, 1:15] 9 31.03 12.5 42.36 8.44 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:4] "A" "C" "G" "T" ..$ : chr [1:15] "V1" "V2" "V3" "V4" ...
http://www.softberry.com/spldb/SpliceDB.html
Burset M., Seledtsov I., Solovyev V. (Nucl.Acids Res.,2000,28,4364-4375; Nucl. Acids Res.,2001,29,255-259)
data(acceptor.m)
data(acceptor.m)
Adds annotation to extractSpliceEvents
object (if not present)
addEnsemblAnnotation(data, species = "hsapiens")
addEnsemblAnnotation(data, species = "hsapiens")
data |
|
species |
character. biomaRt species passed to retrieve annotation. Common species include: 'hsapiens','mmusculus' |
extractSpliceEvents
object with annotated genes under $geneSymbol
Diana Low
http://asia.ensembl.org/info/data/biomart/biomart_r_package.html#biomartexamples
data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) #splice_data<-addEnsemblAnnotation(data=splice_data,species="mmusculus")
data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) #splice_data<-addEnsemblAnnotation(data=splice_data,species="mmusculus")
Gives detailed description of splicing event in terms of splicing outcome post translation. Currently supports exon skipping and intron retention events.
annotateEvents( thedata, db, bsgenome, outputdir, full_output = FALSE, output_prefix = "results" )
annotateEvents( thedata, db, bsgenome, outputdir, full_output = FALSE, output_prefix = "results" )
thedata |
list. output of extractSpliceEvents. |
db |
TxDb object |
bsgenome |
BSGenome object |
outputdir |
character. relative output directory to current location. |
full_output |
logical. writes out detailed text report and generate figures. |
output_prefix |
character. text prefix for full_output files. |
list containing information on (1) data.frame with splicing regions (2) splice event type
Diana LOW
call primer3 for a given set of DNAstringSet object
callPrimer3( seq, size_range = "150-500", Tm = c(57, 59, 62), name = "Primer1", primer3 = "primer3-2.3.7/bin/primer3_core", thermo.param = "primer3-2.3.7/src/primer3_config/", sequence_target = NULL, settings = "primer3-2.3.7/primer3web_v4_0_0_default_settings.txt" )
callPrimer3( seq, size_range = "150-500", Tm = c(57, 59, 62), name = "Primer1", primer3 = "primer3-2.3.7/bin/primer3_core", thermo.param = "primer3-2.3.7/src/primer3_config/", sequence_target = NULL, settings = "primer3-2.3.7/primer3web_v4_0_0_default_settings.txt" )
seq |
DNAstring object, one DNA string for the given amplicon |
size_range |
default: '151-500' |
Tm |
melting temprature parameters default:c(55,57,58) |
name |
name of the amplicon in chr_start_end format |
primer3 |
primer3 path |
thermo.param |
thermodynamic parameters folder |
sequence_target |
If one or more targets is specified then a legal primer pair must flank at least one of them. |
settings |
text file for parameters |
modified to include SEQUENCE_TARGET as an option
data.frame of designed primers and parameters
Altuna Akalin's modified Arnaud Krebs' original function further modified here by Diana Low
### NOT RUN ### # primer_results<-callPrimer3(seq='')
### NOT RUN ### # primer_results<-callPrimer3(seq='')
checkPrimer
checkPrimer(pp, genome, roi = NULL)
checkPrimer(pp, genome, roi = NULL)
pp |
data.frame defining primers, or output of |
genome |
BSgenome object |
roi |
|
list of GRanges with primer locations
Diana Low
# create a primer pair roi primer_pair <- data.frame(PRIMER_LEFT_SEQUENCE="agctcttgaaattggagctgac", PRIMER_RIGHT_SEQUENCE="cttagaaagaacaggaaatcc", stringsAsFactors=FALSE)
# create a primer pair roi primer_pair <- data.frame(PRIMER_LEFT_SEQUENCE="agctcttgaaattggagctgac", PRIMER_RIGHT_SEQUENCE="cttagaaagaacaggaaatcc", stringsAsFactors=FALSE)
compatible_cds
data(compatible_cds) ## maybe str(compatible_cds) ; plot(compatible_cds) ...
data(compatible_cds) ## maybe str(compatible_cds) ; plot(compatible_cds) ...
compatible_tx
data(compatible_tx) ## maybe str(compatible_tx) ; plot(compatible_tx) ...
data(compatible_tx) ## maybe str(compatible_tx) ; plot(compatible_tx) ...
Donor site mammalian frequency matrices for GT-AG pairs from SpliceDB
data("donor.m")
data("donor.m")
The format is: num [1:4, 1:9] 34.1 36.2 18.3 11.4 60.4 ... - attr(*, "dimnames")=List of 2 ..$ : chr [1:4] "A" "C" "G" "T" ..$ : chr [1:9] "V1" "V2" "V3" "V4" ...
http://www.softberry.com/spldb/SpliceDB.html
Burset M., Seledtsov I., Solovyev V. (Nucl.Acids Res.,2000,28,4364-4375; Nucl. Acids Res.,2001,29,255-259)
data(donor.m)
data(donor.m)
Compares two sequences and gives differences if there's a switch from 1->2 if seq2 is NULL, assume seq1 is a list of length 2 to compare
eventOutcomeCompare( seq1, seq2 = NULL, genome, direction = TRUE, fullseq = TRUE, verbose = FALSE )
eventOutcomeCompare( seq1, seq2 = NULL, genome, direction = TRUE, fullseq = TRUE, verbose = FALSE )
seq1 |
GRangesList |
seq2 |
GRangesList |
genome |
BSGenome object |
direction |
logical. Report direction of sequence change. |
fullseq |
logical. Report full sequences. |
verbose |
logical. turn messages on/off. |
list containing
(1) tt : PairwiseAlignmentsSingleSubject pairwise alignment
(2) eventtypes : string detailing primary event classification
Diana LOW
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 eventOutcomeCompare(seq1=compatible_cds$hits[[1]],seq2=region_minus_exon, genome=bsgenome,direction=TRUE)
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 eventOutcomeCompare(seq1=compatible_cds$hits[[1]],seq2=region_minus_exon, genome=bsgenome,direction=TRUE)
translates sequences, reports if NMD or NTC
eventOutcomeTranslate( seq1, genome, direction = FALSE, fullseq = TRUE, verbose = FALSE )
eventOutcomeTranslate( seq1, genome, direction = FALSE, fullseq = TRUE, verbose = FALSE )
seq1 |
GRangesList |
genome |
BSGenome object |
direction |
logical. Report direction of sequence change. |
fullseq |
logical. Output full AA sequence. |
verbose |
logical. turn messages on/off. |
list of translated sequences
Diana LOW
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 translation_results<-eventOutcomeTranslate(compatible_cds,genome=bsgenome, direction=TRUE)
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 translation_results<-eventOutcomeTranslate(compatible_cds,genome=bsgenome, direction=TRUE)
eventPlot
eventPlot( transcripts, roi_plot = NULL, bams = c(), names = c(), annoLabel = c("Gene A"), rspan = 1000, pfam_dom = NULL, showAll = TRUE )
eventPlot( transcripts, roi_plot = NULL, bams = c(), names = c(), annoLabel = c("Gene A"), rspan = 1000, pfam_dom = NULL, showAll = TRUE )
transcripts |
GRanges object |
roi_plot |
GRanges object region to plot |
bams |
character vector of bam file locations |
names |
character vector of name labels |
annoLabel |
character. annotation label |
rspan |
integer or NULL. number of basepairs to span from roi. if NULL, will consider whole gene of roi |
pfam_dom |
optional GRanges object of PFAM domains from UCSC Tables. |
showAll |
logical. TRUE = display splice junctions of entire view or FALSE = just roi. |
a Gviz plot of genomic region
Diana Low
# define BAM files data_path<-system.file("extdata",package="SPLINTER") mt<-paste(data_path,"/mt_chr14.bam",sep="") wt<-paste(data_path,"/wt_chr14.bam",sep="") # plot results eventPlot(transcripts=valid_tx,roi_plot=roi,bams=c(wt,mt), names=c('wt','mt'),rspan=1000)
# define BAM files data_path<-system.file("extdata",package="SPLINTER") mt<-paste(data_path,"/mt_chr14.bam",sep="") wt<-paste(data_path,"/wt_chr14.bam",sep="") # plot results eventPlot(transcripts=valid_tx,roi_plot=roi,bams=c(wt,mt), names=c('wt','mt'),rspan=1000)
extend the span of the current ROI by n number of up/downstream exon(s) by modifying roi_range within the makeROI object while retaining legacy sites by keeping $roi and $flank
extendROI(roi, tx, up = 0, down = 0, type = 1)
extendROI(roi, tx, up = 0, down = 0, type = 1)
roi |
|
tx |
GRangesList transcript list to pull regions from |
up |
integer. number of exons to extend upstream |
down |
integer. number of exons to extend downstream |
type |
integer. 1=full cassette, 2=flank only |
makeROI
object with modified ranges
extendROI(roi,valid_tx,up=1)
extendROI(roi,valid_tx,up=1)
Extracts the location of target, upstream and downstream splice sites Used for calculations and genome visualizations
extractSpliceEvents( data = NULL, filetype = "mats", splicetype = "SE", fdr = 1, inclusion = 1, start0 = TRUE )
extractSpliceEvents( data = NULL, filetype = "mats", splicetype = "SE", fdr = 1, inclusion = 1, start0 = TRUE )
data |
character. path to file |
filetype |
character. type of splicing output. c('mats','custom'). see Details. |
splicetype |
character. c('SE', 'RI', 'MXE', 'A5SS', 'A3SS') |
fdr |
numeric. false discovery rate filter range [0,1] |
inclusion |
numeric. splicing inclusion range, takes absolute value |
start0 |
boolean 0-base start |
filetype 'custom' should provide a 9-column tab-delimited text file with the following columns: ID (Ensembl gene id), Symbol (gene name), chr, strand, exonStart, exonEnd, exon2Start, exon2End, upstreamStart, upstreamEnd, downstreamStart, downstreamEnd eg. ENSG0000012345 chr1 + 3 4 5 6 1 2 7 8
list containing information on
(1) original file type
(2) splice event type
(3) data.frame with splicing regions
Diana Low
http://rnaseq-mats.sourceforge.net/user_guide.htm for MATS file definition
data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep=""))
data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep=""))
Extracts and formats to bed the location of target, upstream and downstream splice sites
extractSpliceSites( df, target = "SE", site = "donor", motif_range = c(-3, 6), start0 = TRUE )
extractSpliceSites( df, target = "SE", site = "donor", motif_range = c(-3, 6), start0 = TRUE )
df |
extractSpliceEvents object |
target |
the target site to extract. See Details. |
site |
character donor or acceptor |
motif_range |
numeric vector of splice position to extract |
start0 |
boolean 0-base start |
target : the site to extract the sequence from. It can be either the event in question (SE, RI, MXE - first exon, MXE2 - second exon, A5SSlong, A5SSshort, A3SSlong, A3SSshort, upstream or downstream). If this function is used in conjunction with shapiroDonor or shapiroAcceptor to compute scores, then most likely it will be run twice - once for the event, and the other either up- or downstream as a comparison.
GRanges object
Diana Low
http://rnaseq-mats.sourceforge.net/user_guide.htm for MATS file definition
data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) splice_sites<-extractSpliceSites(splice_data,target="SE")
data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) splice_sites<-extractSpliceSites(splice_data,target="SE")
Which transcript contains the event? Each event has 2 possibilities, as long as the transcript fulfills one, it passes the test Has to be exact (inner junctions)
findCompatibleEvents(tx, tx2 = NULL, roi, sequential = TRUE, verbose = FALSE)
findCompatibleEvents(tx, tx2 = NULL, roi, sequential = TRUE, verbose = FALSE)
tx |
GRangesList object of transcripts |
tx2 |
optional GRangesList object of transcripts if tx is list of cds |
roi |
|
sequential |
logical. Exons have to appear sequentially to be considered compatible |
verbose |
logical. printouts and messages. |
Seperates into event/region1 and 2 for the alternative case
list of length 4
(1) GRangesList
(2) Hits status [c]=coding; [nc]=non-coding
(3) ct - compatible transcripts
(4) tt - total transcripts
Diana Low
compatible_cds <- findCompatibleEvents(valid_cds,roi=roi,verbose=TRUE)
compatible_cds <- findCompatibleEvents(valid_cds,roi=roi,verbose=TRUE)
Finds compatible exon in annotation with the one present in roi object
findCompatibleExon(tx, roi, verbose = FALSE)
findCompatibleExon(tx, roi, verbose = FALSE)
tx |
GRangesList object of transcripts |
roi |
|
verbose |
logical. printouts and messages. |
list of length 3
(1) GRangesList hits
(2) Number of transcripts
(3) Original number of input transcripts
Diana Low
compatible_exons <- findCompatibleExon(valid_cds,roi)
compatible_exons <- findCompatibleExon(valid_cds,roi)
Given an ENSEMBL id, find all transcripts that matches id
findTX(id, db, tx, valid = FALSE, verbose = FALSE)
findTX(id, db, tx, valid = FALSE, verbose = FALSE)
id |
character. transcript identification (currently ENSEMBL gene names) |
db |
TxDb object |
tx |
GRangesList |
valid |
logical. check if in multiples of 3 [TRUE] for CDS translation. |
verbose |
logical. turn messages on/off. |
GRangesList
Diana Low
valid_cds <-findTX(id=splice_data$data[2,]$ID,tx=thecds,db=txdb,valid=FALSE)
valid_cds <-findTX(id=splice_data$data[2,]$ID,tx=thecds,db=txdb,valid=FALSE)
returns length of product given a GRanges span and GRangesList of transcripts
getPCRsizes(pcr_span, txlist, verbose = FALSE)
getPCRsizes(pcr_span, txlist, verbose = FALSE)
pcr_span |
GRanges object |
txlist |
GRangesList object |
verbose |
logical. report intermediate output. |
data.frame of transcript names with detected sizes in basepairs
Diana Low
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 ## create a primer pair ## for actual use, obtain primer pair from primer design (callPrimer3) primer_pair <- data.frame(PRIMER_LEFT_SEQUENCE="agctcttgaaattggagctgac", PRIMER_RIGHT_SEQUENCE="cttagaaagaacaggaaatcc", stringsAsFactors=FALSE) ## confirm location cp<-checkPrimer(primer_pair,bsgenome,roi) cp ## get the PCR sizes pcr_result1 <- getPCRsizes(cp,theexons)
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 ## create a primer pair ## for actual use, obtain primer pair from primer design (callPrimer3) primer_pair <- data.frame(PRIMER_LEFT_SEQUENCE="agctcttgaaattggagctgac", PRIMER_RIGHT_SEQUENCE="cttagaaagaacaggaaatcc", stringsAsFactors=FALSE) ## confirm location cp<-checkPrimer(primer_pair,bsgenome,roi) cp ## get the PCR sizes pcr_result1 <- getPCRsizes(cp,theexons)
get DNA sequence give a region of interest
getRegionDNA(roi, genome, introns = FALSE)
getRegionDNA(roi, genome, introns = FALSE)
roi |
|
genome |
BSgenome object |
introns |
TRUE/FALSE. whether to include intronic (lowercase) DNA. By default returns only exonic (uppercase) DNA. |
list of
(1) DNA sequence
(2) Junction start (for primer design)
Diana Low
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 getRegionDNA(roi,bsgenome)
suppressMessages(library(BSgenome.Mmusculus.UCSC.mm9)) bsgenome<-BSgenome.Mmusculus.UCSC.mm9 getRegionDNA(roi,bsgenome)
inserts a region (exon or intron) into roi object
insertRegion(subject, roi)
insertRegion(subject, roi)
subject |
GrangesList |
roi |
|
in the case of intron retention, replaces exon with intron retention range reduce() the GRanges in question
GRanges object
Diana Low
#Inserts the exon defined in roi GRanges object from a GRanges/GRangesList region_minus_exon region_with_exon<-insertRegion(region_minus_exon,roi)
#Inserts the exon defined in roi GRanges object from a GRanges/GRangesList region_minus_exon region_with_exon<-insertRegion(region_minus_exon,roi)
Creates an object to store information about the splice site (region of interest) including flanking regions and alternative splice outcome
makeROI(df, type = "SE")
makeROI(df, type = "SE")
df |
data.frame object from |
type |
type of splicing event c("SE","RI","MXE","A5SS","A3SS") |
a list containing
(1) type : splice type
(2) name : ID of transcript
(3) roi : GRanges object of splice site
(4) flank : GRanges object of flanking exons of splice site
(5) roi_range : GRangesList of splice site and it's alternative outcome based on type
Diana Low
single_record<-splice_data$data[which(grepl("Prmt5",splice_data$data$Symbol)),] roi <- makeROI(single_record,type="SE")
single_record<-splice_data$data[which(grepl("Prmt5",splice_data$data$Symbol)),] roi <- makeROI(single_record,type="SE")
Makes unique ID names from event location
makeUniqueIDs(ddata)
makeUniqueIDs(ddata)
ddata |
extractSpliceEvents object |
original extractSpliceEvents list object with unique ID appended to data accessor
Diana Low
data_with_id<-makeUniqueIDs(splice_data)
data_with_id<-makeUniqueIDs(splice_data)
Plots the sequence logo of a given set of FASTA sequences
plot_seqlogo(fasta_seq)
plot_seqlogo(fasta_seq)
fasta_seq |
DNAStringSet or path to fasta-formatted file |
sequence logo image
Diana Low
head(splice_fasta) plot_seqlogo(Biostrings::DNAStringSet(splice_fasta$V2))
head(splice_fasta) plot_seqlogo(Biostrings::DNAStringSet(splice_fasta$V2))
primers designed using Primer3 for sample data
data("primers")
data("primers")
A data frame with 5 observations on the following 28 variables.
i
a numeric vector
PRIMER_LEFT_SEQUENCE
a character vector
PRIMER_RIGHT_SEQUENCE
a character vector
PRIMER_LEFT_TM
a numeric vector
PRIMER_RIGHT_TM
a numeric vector
PRIMER_LEFT_pos
a numeric vector
PRIMER_LEFT_len
a numeric vector
PRIMER_RIGHT_pos
a numeric vector
PRIMER_RIGHT_len
a numeric vector
PRIMER_PAIR_PENALTY
a numeric vector
PRIMER_LEFT_PENALTY
a numeric vector
PRIMER_RIGHT_PENALTY
a numeric vector
PRIMER_LEFT_GC_PERCENT
a numeric vector
PRIMER_RIGHT_GC_PERCENT
a numeric vector
PRIMER_LEFT_SELF_ANY_TH
a numeric vector
PRIMER_RIGHT_SELF_ANY_TH
a numeric vector
PRIMER_LEFT_SELF_END_TH
a numeric vector
PRIMER_RIGHT_SELF_END_TH
a numeric vector
PRIMER_LEFT_HAIRPIN_TH
a numeric vector
PRIMER_RIGHT_HAIRPIN_TH
a numeric vector
PRIMER_LEFT_END_STABILITY
a numeric vector
PRIMER_RIGHT_END_STABILITY
a numeric vector
PRIMER_LEFT_TEMPLATE_MISPRIMING
a numeric vector
PRIMER_RIGHT_TEMPLATE_MISPRIMING
a numeric vector
PRIMER_PAIR_COMPL_ANY_TH
a numeric vector
PRIMER_PAIR_COMPL_END_TH
a numeric vector
PRIMER_PAIR_PRODUCT_SIZE
a numeric vector
PRIMER_PAIR_TEMPLATE_MISPRIMING
a numeric vector
Dataframe of primer design results
data(primers)
data(primers)
Plots percentage spliced in (PSI) values in terms of inclusion levels
psiPlot(df = NULL, type = "MATS", sample_labels = c("Sample 1", "Sample 2"))
psiPlot(df = NULL, type = "MATS", sample_labels = c("Sample 1", "Sample 2"))
df |
data.frame containing PSI values |
type |
character. either 'MATS' output (will read in MATS headers) or 'generic' (provide 4 or 6 column data.frame) |
sample_labels |
x-axis labels for the plot |
bar plot of PSI values
Diana Low
#we give inclusion and skipped numbers as reads #this will be converted into percentages df<-data.frame(inclusion1=c("6,4,6"),skipped1=c("10,12,12"),inclusion2=c("15,15,15"), skipped2=c("3,3,4"),stringsAsFactors = FALSE) psiPlot(df,type='generic')
#we give inclusion and skipped numbers as reads #this will be converted into percentages df<-data.frame(inclusion1=c("6,4,6"),skipped1=c("10,12,12"),inclusion2=c("15,15,15"), skipped2=c("3,3,4"),stringsAsFactors = FALSE) psiPlot(df,type='generic')
region_minus_exon
data(region_minus_exon) ## maybe str(region_minus_exon) ; plot(region_minus_exon) ...
data(region_minus_exon) ## maybe str(region_minus_exon) ; plot(region_minus_exon) ...
removes a region (exon) from a GRanges or GRangesList
removeRegion(subject, roi)
removeRegion(subject, roi)
subject |
GRanges or GrangesList object |
roi |
|
GRanges object
Diana Low
# Removes the exon defined in roi GRanges object from a GRanges/GRangesList compatible_cds$hits[[1]] region_minus_exon<-removeRegion(compatible_cds$hits[[1]],roi)
roi
data("roi")
data("roi")
List containing region of interest information
data(roi)
data(roi)
Shapiro's score of acceptor site (range is from -13 [intron] to +1 [exon]) is: 100 * ((t1 - l1)/(h1 - l1) + (t2 - l2)/(h2 - l2))/2, where t1 is the sum of the best 8 of 10 percentages at positions -13 to -4, l1 is the sum of the lowest 8 of 10 percentages at position -13 to -4, h1 is the sum of the highest 8 of 10 percentages at positions -13 to -4, t2 is the sum of percentages at positions -3 to +1, l2 is the sum of the lowest percentages at positions -3 to +1, and h2 is the sum of the highest percentages at positions -3 to +1
shapiroAcceptor(reference_fasta, target_fasta)
shapiroAcceptor(reference_fasta, target_fasta)
reference_fasta |
vector of strings or DNAStringSet of reference splice list |
target_fasta |
vector of strings or DNAStringSet of fasta to score |
data.frame with Shapiro scores
Diana Low
http://www.softberry.com/spldb/SpliceDB.html
library(BSgenome.Mmusculus.UCSC.mm9) bsgenome <- BSgenome.Mmusculus.UCSC.mm9 data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) splice_sites<-extractSpliceSites(splice_data,site="acceptor") acceptor.ss<-getSeq(bsgenome,splice_sites) ##sacceptor<-shapiroAcceptor(acceptor.m,acceptor.ss)
library(BSgenome.Mmusculus.UCSC.mm9) bsgenome <- BSgenome.Mmusculus.UCSC.mm9 data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) splice_sites<-extractSpliceSites(splice_data,site="acceptor") acceptor.ss<-getSeq(bsgenome,splice_sites) ##sacceptor<-shapiroAcceptor(acceptor.m,acceptor.ss)
convenience function for plotting Shapiro score density
shapiroDensity(ctrl_scores, treat_scores, sample = c(1, 2))
shapiroDensity(ctrl_scores, treat_scores, sample = c(1, 2))
ctrl_scores |
output of shapiroDonor or shapiroAcceptor |
treat_scores |
output of shapiroDonor or shapiroAcceptor |
sample |
samplenames |
density plot of Shapiro scores
Diana Low
Shapiro and Senapathy (1987) have developed a method to score the strength of a splice site based on percentages of each nucleotide at each position. Shapiro's score of donor site (range is from -3 [exon] to +7 [intron]) is : 100 * (t - min)/ (max - min), where t is the sum of percentages at positions -3 to +7, min is the sum of the lowest percentages at positions -3 to +7, and max is the sum of the highest percentages at positions -3 to +7.
shapiroDonor(reference_fasta, target_fasta)
shapiroDonor(reference_fasta, target_fasta)
reference_fasta |
vector of strings or DNAStringSet of reference splice list |
target_fasta |
vector of strings or DNAStringSet of fasta to score |
data.frame with Shapiro scores
Diana Low
http://www.softberry.com/spldb/SpliceDB.html
library(BSgenome.Mmusculus.UCSC.mm9) bsgenome <- BSgenome.Mmusculus.UCSC.mm9 data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) splice_sites<-extractSpliceSites(splice_data) donor.ss<-getSeq(bsgenome,splice_sites) ##sdonor<-shapiroDonor(donor.m,donor.ss)
library(BSgenome.Mmusculus.UCSC.mm9) bsgenome <- BSgenome.Mmusculus.UCSC.mm9 data_path<-system.file("extdata",package="SPLINTER") splice_data<-extractSpliceEvents(data=paste(data_path,"/skipped_exons.txt",sep="")) splice_sites<-extractSpliceSites(splice_data) donor.ss<-getSeq(bsgenome,splice_sites) ##sdonor<-shapiroDonor(donor.m,donor.ss)
splice_data
data("splice_data")
data("splice_data")
List containing splice event file information
data(splice_data)
data(splice_data)
splice_fasta
data("splice_fasta")
data("splice_fasta")
A data frame with 0 observations on the following 2 variables.
V1
a numeric vector
V2
a numeric vector
Dataframe of region and fasta sequence
data(splice_fasta)
data(splice_fasta)
splits the PCR alignment into the two AS conditions
splitPCRhit(res, hitlist)
splitPCRhit(res, hitlist)
res |
result from |
hitlist |
|
list of 2 data.frame objects with isoform name (ID) and length of PCR product (bp) matching Type 1 or Type 2 transcripts
Diana Low
## as getPCRsizes gives you all PCR bands when the primers are used, ## splitPCRhit will determine which bands are relevant to the target relevant_pcr_bands<-splitPCRhit(pcr_result1,compatible_tx)
## as getPCRsizes gives you all PCR bands when the primers are used, ## splitPCRhit will determine which bands are relevant to the target relevant_pcr_bands<-splitPCRhit(pcr_result1,compatible_tx)
thecds
data("thecds")
data("thecds")
List containing GRanges info
data(thecds)
data(thecds)
theexons
data("thecds")
data("thecds")
List containing GRanges info
data(theexons)
data(theexons)
valid_cds
data("valid_cds")
data("valid_cds")
GRangesList
data(valid_cds)
data(valid_cds)
valid_tx
GRangesList
data(valid_tx) ## maybe str(valid_tx) ; plot(valid_tx) ...
data(valid_tx) ## maybe str(valid_tx) ; plot(valid_tx) ...