Title: | a tool to perform statistical analysis of differential protein expression for quantitative proteomics data. |
---|---|
Description: | DEqMS is developped on top of Limma. However, Limma assumes same prior variance for all genes. In proteomics, the accuracy of protein abundance estimates varies by the number of peptides/PSMs quantified in both label-free and labelled data. Proteins quantification by multiple peptides or PSMs are more accurate. DEqMS package is able to estimate different prior variances for proteins quantified by different number of PSMs/peptides, therefore acchieving better accuracy. The package can be applied to analyze both label-free and labelled proteomics data. |
Authors: | Yafeng Zhu |
Maintainer: | Yafeng Zhu <[email protected]> |
License: | LGPL |
Version: | 1.25.0 |
Built: | 2024-10-30 05:25:08 UTC |
Source: | https://github.com/bioc/DEqMS |
This function is to normaliza out the differences of protein medians in different samples
equalMedianNormalization(dat)
equalMedianNormalization(dat)
dat |
an numeric data frame or matrix containing protein relative abundance in log2 scale |
a data frame or matrix with normalized protein relative abundance
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) # use the 3 ctrl samples as reference channels to calculate log2 ratio dat.gene = medianSummary(dat.psm.log,group_col = 2,ref_col =c(3,7,10)) dat.gene.nm = equalMedianNormalization(dat.gene)
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) # use the 3 ctrl samples as reference channels to calculate log2 ratio dat.gene = medianSummary(dat.psm.log,group_col = 2,ref_col =c(3,7,10)) dat.gene.nm = equalMedianNormalization(dat.gene)
This function is to calculate proteins'relative abundance by median method
medianSummary(dat,group_col=2,ref_col)
medianSummary(dat,group_col=2,ref_col)
dat |
an data frame with peptide/psm intensities in log2 scale |
group_col |
the column by which peptides/psm intensity are grouped. Usually the gene/protein id column. Default is 2 |
ref_col |
an integer vector indicating the column(s) used as denominator to calcualte relative petide ratio. |
a data frame containing protein relative abundance estimate in log2 scale
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) # use the 3 ctrl samples as reference channels to calculate log2 ratio dat.gene = medianSummary(dat.psm.log,group_col = 2,ref_col =c(3,7,10))
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) # use the 3 ctrl samples as reference channels to calculate log2 ratio dat.gene = medianSummary(dat.psm.log,group_col = 2,ref_col =c(3,7,10))
This function is to calculate proteins'relative abundance by median sweeping method
medianSweeping(dat,group_col=2)
medianSweeping(dat,group_col=2)
dat |
an data frame with peptide/PSM intensities in log2 scale |
group_col |
the column by which peptides/PSM intensity are grouped. Usually the gene/protein id column. Default is 2 |
a data frame with protein relative abundance estimate in log2 scale
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2)
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2)
This function is to calculate proteins'relative abundance by Turkey median polish
medpolishSummary(dat,group_col=2)
medpolishSummary(dat,group_col=2)
dat |
an data frame containing peptide/psm intensities in log2 scale |
group_col |
the column by which peptides/psm intensity are grouped. Usually the gene/protein column. Default is 2 |
a data frame containing protein relative abundance estimate in log2 scale
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene = medpolishSummary(dat.psm.log,group_col=2)
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene = medpolishSummary(dat.psm.log,group_col=2)
This function is to generate DEqMS outputs in a data frame.
outputResult(fit, coef_col=1)
outputResult(fit, coef_col=1)
fit |
an list object produced by spectraCounteBayes function |
coef_col |
is an integer indicating the column of fit$coefficients for which corresponding t-statistics and p-values are extracted in the output |
a data frame object with the last three columns being: sca.t - Peptide or Spectra Count Adjusted posterior t-value sca.P.Value - Adjusted posterior p-value sca.adj - sca.P.Value adjusted by BH method
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) DEqMS.results = outputResult(fit2, coef_col=3)
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) DEqMS.results = outputResult(fit2, coef_col=3)
This function is to plot log2 intensities of all peptides for one gene in different samples.
peptideProfilePlot(dat, col=2, gene)
peptideProfilePlot(dat, col=2, gene)
dat |
a data frame with peptide/psm log2 intensities |
col |
an integer indicates the column number where the gene protein id is. default is 2, asumming the gene/protein is in the second column |
gene |
an character indicates the gene name/id to be plotted |
return a ggplot2 object
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) peptideProfilePlot(dat.psm.log,col=2,gene="TGFBR2")
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) peptideProfilePlot(dat.psm.log,col=2,gene="TGFBR2")
This function is to plot the residuals of fit model on the vertical axis and the peptide or PSM count on the horizontal axis.
Residualplot(fit, xlab="log2(count)", ylab="Variance(fitted - observed)", main="")
Residualplot(fit, xlab="log2(count)", ylab="Variance(fitted - observed)", main="")
fit |
an object returned from |
xlab |
the title for x axis |
ylab |
the title for y axis |
main |
the title for the figure |
return a plot graphic
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) Residualplot(fit2,xlab="log2(PSM count)",main="TMT data PXD004163")
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) Residualplot(fit2,xlab="log2(PSM count)",main="TMT data PXD004163")
This function is to calculate peptide/PSM count adjusted t-statistics, p-values.
spectraCounteBayes(fit, fit.method="loess", coef_col)
spectraCounteBayes(fit, fit.method="loess", coef_col)
fit |
an list object produced by Limma |
fit.method |
the method used to fit variance against the number of
peptides/PSM count quantified. Two available methods: "loess","nls" and
"spline". default "loess"."loess" uses |
coef_col |
an integer vector indicating the column(s) of fit$coefficients for which the function is to be performed. if not specified, all coefficients are used. |
This function adjusts the T-statistics and p-values for quantitative MS proteomics experiment according to the number of peptides/PSMs used for quantification. The method is similar in nature to intensity-based Bayes method (Maureen A. Sartor et al BMC Bioinformatics 2006).
a list object with the following components
count |
Peptide or PSM count used for quantification |
sca.t |
Spectra Count Adjusted posterior t-value |
sca.p |
Spectra Count Adjusted posterior p-value |
sca.dfprior |
Spectra Count Adjusted prior degrees of freedom |
sca.priorvar |
Spectra Count Adjusted prior variance |
sca.postvar |
Spectra Count Adjusted posterior variance |
model |
fitted model |
fit.method |
The method used to fit the model |
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1)
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1)
This function is to draw a boxplot of the variance of genes quantified by different number of peptides/PSMs. Red curve indicate DEqMS prior variance.
VarianceBoxplot(fit,n=20, xlab="count", ylab = "log(Variance)", main="")
VarianceBoxplot(fit,n=20, xlab="count", ylab = "log(Variance)", main="")
fit |
an object returned from |
n |
set a number to plot only the genes with count value smaller or equal to n |
xlab |
the title for x axis |
ylab |
the title for y axis |
main |
the title for the figure |
return a plot graphic
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) VarianceBoxplot(fit2,xlab="PSM count",main="TMT data PXD004163")
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) VarianceBoxplot(fit2,xlab="PSM count",main="TMT data PXD004163")
This function is to draw a scatter plot of the variance against the number of quantified peptides/PSMs.Red curve indicate DEqMS prior variance.
VarianceScatterplot(fit, xlab="log2(count)", ylab = "log(Variance)", main="")
VarianceScatterplot(fit, xlab="log2(count)", ylab = "log(Variance)", main="")
fit |
an object returned from |
xlab |
the title for x axis |
ylab |
the title for y axis |
main |
the title for the figure |
return a plot graphic
Yafeng Zhu
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) VarianceScatterplot(fit2,xlab="log2(PSM count)",main="TMT data PXD004163")
library(ExperimentHub) eh = ExperimentHub(localHub=TRUE) query(eh, "DEqMS") dat.psm = eh[["EH1663"]] dat.psm.log = dat.psm dat.psm.log[,3:12] = log2(dat.psm[,3:12]) dat.gene.nm = medianSweeping(dat.psm.log,group_col = 2) psm.count.table = as.data.frame(table(dat.psm$gene)) # generate PSM count table rownames(psm.count.table)=psm.count.table$Var1 cond = c("ctrl","miR191","miR372","miR519","ctrl", "miR372","miR519","ctrl","miR191","miR372") sampleTable <- data.frame( row.names = colnames(dat.psm)[3:12], cond = as.factor(cond) ) gene.matrix = as.matrix(dat.gene.nm) design = model.matrix(~cond,sampleTable) fit1 <- eBayes(lmFit(gene.matrix,design)) # add PSM count for each gene fit1$count <- psm.count.table[rownames(fit1$coefficients),2] fit2 = spectraCounteBayes(fit1) VarianceScatterplot(fit2,xlab="log2(PSM count)",main="TMT data PXD004163")