Title: | Predict Combined Function of Transcription Factors |
---|---|
Description: | Implement the BETA algorithm for infering direct target genes from DNA-binding and perturbation expression data Wang et al. (2013) <doi: 10.1038/nprot.2013.150>. Extend the algorithm to predict the combined function of two DNA-binding elements from comprable binding and expression data. |
Authors: | Mahmoud Ahmed [aut, cre] |
Maintainer: | Mahmoud Ahmed <[email protected]> |
License: | GPL-3 |
Version: | 1.21.0 |
Built: | 2025-01-13 04:04:54 UTC |
Source: | https://github.com/bioc/target |
This function selects overlapping peaks and regions, calculates the distance between them and score each peak.
associated_peaks(peaks, regions, regions_col, base = 1e+05)
associated_peaks(peaks, regions, regions_col, base = 1e+05)
peaks |
A GRanges object |
regions |
A GRanges object |
regions_col |
A character string |
base |
An integer to calculate distances relative to. |
A GRanges object. A similar object to peaks with three added metadata columns.
# load peaks and transcripts data data("real_peaks") data("real_transcripts") # associated peaks ap <- associated_peaks(real_peaks, real_transcripts, 'name2')
# load peaks and transcripts data data("real_peaks") data("real_transcripts") # associated peaks ap <- associated_peaks(real_peaks, real_transcripts, 'name2')
This function selects overlapping peaks and regions, calculates the distance between them, score each peak and region and calculate rank products of the regions.
direct_targets(peaks, regions, regions_col, stats_col, base = 1e+05)
direct_targets(peaks, regions, regions_col, stats_col, base = 1e+05)
peaks |
A GRanges object |
regions |
A GRanges object |
regions_col |
A character string |
stats_col |
A character string |
base |
An integer to calculate distances relative to. |
A GRanges object. A similar object to regions with several added metadata columns.
# load peaks and transcripts data data("real_peaks") data("real_transcripts") # direct targets dt <- direct_targets(real_peaks, real_transcripts, 'name2', 't')
# load peaks and transcripts data data("real_peaks") data("real_transcripts") # direct targets dt <- direct_targets(real_peaks, real_transcripts, 'name2', 't')
Calculate the distance between the elements of two GRanges objects.
find_distance(peaks, regions, how = "center")
find_distance(peaks, regions, how = "center")
peaks |
A GRanges object |
regions |
A GRanges object |
how |
A character string, default 'center' |
A vector of integers
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) find_distance(query, subject)
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) find_distance(query, subject)
Merge two GRanges objects by overlaps
merge_ranges(peaks, regions)
merge_ranges(peaks, regions)
peaks |
A GRanges object |
regions |
A GRanges object |
A DataFrame
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) mergeByOverlaps(query, subject)
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) mergeByOverlaps(query, subject)
Plot the cumulative distribution function of choosen value (e.g. ranks) by a factor of the same lenght, group. Each group is given a color and a label.
plot_predictions(rank, group, colors, labels, ...)
plot_predictions(rank, group, colors, labels, ...)
rank |
A numeric vector |
group |
A factor of length equal that of rank |
colors |
A character vector of colors for each group |
labels |
A character vector of length equal the unique values in groups |
... |
Other arguments passed to points |
NULL.
# generate random values rn1 <- rnorm(100) rn2 <- rnorm(100, 2) e <- c(rn1, rn2) # generate grouping variable g <- rep(c('up', 'down'), times = c(length(rn1), length(rn2))) plot_predictions(e, group = g, colors = c('red', 'green'), labels = c('up', 'down'))
# generate random values rn1 <- rnorm(100) rn2 <- rnorm(100, 2) e <- c(rn1, rn2) # generate grouping variable g <- rep(c('up', 'down'), times = c(length(rn1), length(rn2))) plot_predictions(e, group = g, colors = c('red', 'green'), labels = c('up', 'down'))
Calculate the rank products of the rank of the distances and the statistics.
rank_product(region_score, region_stat, region_id)
rank_product(region_score, region_stat, region_id)
region_score |
A vector of numerics |
region_stat |
A vector of numerics |
region_id |
A vector of characters |
A vector of numerics
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) distance <- find_distance(query, subject) peak_score <- score_peaks(distance, 100000) region_id <- c('region1', 'region1', 'region2') region_score <- score_regions(peak_score, region_id) region_stat <- c(30, 30, -40) rank_product(region_score, region_stat, region_id)
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) distance <- find_distance(query, subject) peak_score <- score_peaks(distance, 100000) region_id <- c('region1', 'region1', 'region2') region_score <- score_regions(peak_score, region_id) region_stat <- c(30, 30, -40) rank_product(region_score, region_stat, region_id)
Androgen recepor peaks from ChIP-Seq experiment in the LNCaP cell line.
real_peaks
real_peaks
A GRanges
https://github.com/suwangbio/BETA/blob/master/BETA_test_data/3656_peaks.bed
# load data data('real_peaks') # locate the raw data system.file('extdata', '3656_peaks.bed.gz', package = 'target') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
# load data data('real_peaks') # locate the raw data system.file('extdata', '3656_peaks.bed.gz', package = 'target') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
The differential expression analysis output of LNCaP cell line treated with DHT for 16 hours compared to non-treated cells. The REFSEQ transcript identifiers were used to merge the data.frame with the transcript coordinates from the hg19 reference genome.
real_transcripts
real_transcripts
A GRanges
https://github.com/suwangbio/BETA/blob/master/BETA_test_data/AR_diff_expr.xls
https://github.com/suwangbio/BETA/blob/master/BETA_1.0.7/BETA/references/hg19.refseq
# load data data('real_transcripts') # locate the raw data system.file('extdata', 'AR_diff_expr.tsv.gz', package = 'target') system.file('extdata', 'hg19.refseq', package = 'target') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
# load data data('real_transcripts') # locate the raw data system.file('extdata', 'AR_diff_expr.tsv.gz', package = 'target') system.file('extdata', 'hg19.refseq', package = 'target') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
Calculate the peak score based on the distance to a region of interest.
score_peaks(distance, base)
score_peaks(distance, base)
distance |
A vector of integers |
base |
An integer to calculate distances relative to. |
A vector of integers
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) distance <- find_distance(query, subject) score_peaks(distance, 100000)
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) distance <- find_distance(query, subject) score_peaks(distance, 100000)
Calculate the region score based on the distance to their assigned peaks.
score_regions(peak_score, region_id)
score_regions(peak_score, region_id)
peak_score |
A vector of integers |
region_id |
A vector of character |
A vector of numerics
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) distance <- find_distance(query, subject) peak_score <- score_peaks(distance, 100000) region_id <- c('region1', 'region1', 'region2') region_score <- score_regions(peak_score, region_id)
library(IRanges) query <- IRanges(c(1, 4, 9), c(5, 7, 10)) subject <- IRanges(c(2, 2, 10), c(2, 3, 12)) distance <- find_distance(query, subject) peak_score <- score_peaks(distance, 100000) region_id <- c('region1', 'region1', 'region2') region_score <- score_regions(peak_score, region_id)
is randomly generated peaks with random distances from the transcripts start sites (TSS) of chromosome 1 of the mm10 mouse genome.
sim_peaks
sim_peaks
A GRanges
# load data data('sim_peaks') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
# load data data('sim_peaks') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
Simulated transcripts The transcripts chromosome 1 of the mm10 mouse genome with randomly singed statistics assigned to each.
sim_transcripts
sim_transcripts
A GRanges
# load data data('sim_transcripts') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
# load data data('sim_transcripts') # locate the source code for preparing the data system.file('extdata', 'make-data.R', package = 'target')
target
: Predict Combined Function of Transcription Factors.Implement the BETA algorithm for infering direct target genes from DNA-binding and perturbation expression data Wang et al. (2013) <doi: 10.1038/nprot.2013.150>. Extend the algorithm to predict the combined effect of two DNA-binding elements from comprable binding and expression data.
Predicting associated peaks and direct targets
associated_peaks
direct_targets
Plotting and testing predictions
plot_predictions
test_predictions
Internal target
functions:
merge_ranges
find_distance
score_peaks
score_regions
rank_product
Run the shiny App
target_app()
target_app()
Runs the shiny app
Test whether the cumulative distribution function of two groups are drawn from the same distribution.
test_predictions(rank, group, compare, ...)
test_predictions(rank, group, compare, ...)
rank |
A numeric vector |
group |
A factor of length equal that of rank |
compare |
A character vector of length two |
... |
Other arguments passed to ks.test |
An htest object
# generate random values rn1 <- rnorm(100) rn2 <- rnorm(100, 2) e <- c(rn1, rn2) # generate grouping variable g <- rep(c('up', 'down'), times = c(length(rn1), length(rn2))) # test test_predictions(e, group = g, compare = c('up', 'down'))
# generate random values rn1 <- rnorm(100) rn2 <- rnorm(100, 2) e <- c(rn1, rn2) # generate grouping variable g <- rep(c('up', 'down'), times = c(length(rn1), length(rn2))) # test test_predictions(e, group = g, compare = c('up', 'down'))