| Title: | scQTLtools: an R/Bioconductor package for comprehensive identification and visualization of single-cell eQTLs |
|---|---|
| Description: | scQTLtools is a comprehensive R/Bioconductor package that facilitates end-to-end single-cell eQTL analysis, from preprocessing to visualization |
| Authors: | Xiaofeng Wu [aut, cre, cph] (ORCID: <https://orcid.org/0009-0003-6254-5575>), Xin Huang [aut, cph] (ORCID: <https://orcid.org/0009-0005-2755-0357>), Jingtong Kang [com] (ORCID: <https://orcid.org/0009-0008-8343-3456>), Siwen Xu [aut, cph] (ORCID: <https://orcid.org/0000-0001-7936-0639>) |
| Maintainer: | Xiaofeng Wu <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 1.5.0 |
| Built: | 2026-05-31 06:30:59 UTC |
| Source: | https://github.com/bioc/scQTLtools |
Adjust p-values and filter SNP–gene pairs based on the adjusted p-values.
adjust_pvalues(result, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05)adjust_pvalues(result, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05)
result |
A data frame that contains information of SNP–gene pairs and raw p values. |
pAdjustMethod |
Method for p-value adjustment. One of "bonferroni", "holm", "hochberg", "hommel" or "BH". The default option is "bonferroni". |
pAdjustThreshold |
Only SNP–gene pairs with adjusted p-values meeting the threshold will be displayed. Default by 0.05. |
A data frame with adjusted p-values, filtered by threshold, containing information on SNP–gene pairs.
example_data <- data.frame( gene = c("Gene1", "Gene2", "Gene3", "Gene4"), SNP = c("SNP1", "SNP2", "SNP3", "SNP4"), pvalue = c(0.001, 0.04, 0.03, 0.0005)) pAdjustMethod <- "BH" pAdjustThreshold <- 0.05 adjusted_result <- adjust_pvalues(example_data, pAdjustMethod, pAdjustThreshold)example_data <- data.frame( gene = c("Gene1", "Gene2", "Gene3", "Gene4"), SNP = c("SNP1", "SNP2", "SNP3", "SNP4"), pvalue = c(0.001, 0.04, 0.03, 0.0005)) pAdjustMethod <- "BH" pAdjustThreshold <- 0.05 adjusted_result <- adjust_pvalues(example_data, pAdjustMethod, pAdjustThreshold)
Build a ZINB model.
buildZINB(counts)buildZINB(counts)
counts |
A numeric vector of gene expression values. |
A list containing four parameters estimated from the ZINB model.
data(GeneData) gene <- unlist(GeneData[1, ]) result <-buildZINB(gene)data(GeneData) gene <- unlist(GeneData[1, ]) result <-buildZINB(gene)
This function detects eQTLs using scRNA-seq data and genotype data.
callQTL( eQTLObject, gene_ids = NULL, downstream = NULL, upstream = NULL, gene_mart = NULL, snp_mart = NULL, pAdjustMethod = "bonferroni", useModel = "zinb", pAdjustThreshold = 0.05, logfcThreshold = 0.1 )callQTL( eQTLObject, gene_ids = NULL, downstream = NULL, upstream = NULL, gene_mart = NULL, snp_mart = NULL, pAdjustMethod = "bonferroni", useModel = "zinb", pAdjustThreshold = 0.05, logfcThreshold = 0.1 )
eQTLObject |
An S4 object of class |
gene_ids |
A gene ID or a list of gene IDS. |
downstream |
Distance (in base pairs) downstream of the gene start site to search for associated SNPs. |
upstream |
Distance (in base pairs) upstream of the gene end site to search for associated SNPs. |
gene_mart |
A Mart object representing the BioMart gene database.
If |
snp_mart |
A Mart object representing the BioMart SNP database.
If |
pAdjustMethod |
Method used for multiple testing correction. One of
|
useModel |
Model used for fitting. One of "poisson", "zinb", or "linear." |
pAdjustThreshold |
Only SNP–gene pairs with adjusted p-values below the threshold will be retained. Default is 0.05. |
logfcThreshold |
The minimum beta coefficient required to report a SNP–gene pair as an eQTL. |
A data frame in which each row corresponds to a detected SNP–gene eQTL pair, including statistical and model fitting results.
data(EQTL_obj) library(biomaRt) gene_mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", mirror = 'asia') snp_mart <- useEnsembl(biomart = "snps", dataset = "hsapiens_snp", mirror = 'asia') eqtl <- callQTL( eQTLObject = EQTL_obj, gene_ids = NULL, downstream = NULL, upstream = NULL, gene_mart = gene_mart, snp_mart = snp_mart, pAdjustMethod = 'bonferroni', useModel = 'linear', pAdjustThreshold = 0.05, logfcThreshold = 0.025 )data(EQTL_obj) library(biomaRt) gene_mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", mirror = 'asia') snp_mart <- useEnsembl(biomart = "snps", dataset = "hsapiens_snp", mirror = 'asia') eqtl <- callQTL( eQTLObject = EQTL_obj, gene_ids = NULL, downstream = NULL, upstream = NULL, gene_mart = gene_mart, snp_mart = snp_mart, pAdjustMethod = 'bonferroni', useModel = 'linear', pAdjustThreshold = 0.05, logfcThreshold = 0.025 )
Validate SNP IDs in the input genotype matrix.
checkSNPList(snpList, snp_mart = NULL, snpDataset = "hsapiens_snp")checkSNPList(snpList, snp_mart = NULL, snpDataset = "hsapiens_snp")
snpList |
A list of SNPs IDs. |
snp_mart |
An object of class Mart representing the BioMart SNP
database to connect to. If provided, this should be a Mart object obtained
by calling |
snpDataset |
A character string specifying the SNP dataset to use from
Ensembl. Default is |
A data frame containing the genomic locations of the valid SNPs.
data(SNPData2) snpList <- rownames(SNPData2) snpDataset <- 'hsapiens_snp' snps_loc <- checkSNPList(snpList = snpList, snpDataset = snpDataset)data(SNPData2) snpList <- rownames(SNPData2) snpDataset <- 'hsapiens_snp' snps_loc <- checkSNPList(snpList = snpList, snpDataset = snpDataset)
CPM_normalize() applies Counts Per Million (CPM) normalization to a
raw gene expression matrix.
CPM_normalize(expressionMatrix)CPM_normalize(expressionMatrix)
expressionMatrix |
A numeric matrix of raw gene expression counts, with genes as rows and cells as columns. |
A normalized gene expression matrix after applying CPM normalization.
data(GeneData) CPM_normalize(GeneData)data(GeneData) CPM_normalize(GeneData)
Create a gene location data frame.
createGeneLoc( geneList, gene_mart = NULL, geneDataset = "hsapiens_gene_ensembl", OrgDb )createGeneLoc( geneList, gene_mart = NULL, geneDataset = "hsapiens_gene_ensembl", OrgDb )
geneList |
A gene ID or a list of genes IDs. |
gene_mart |
An object of class Mart representing the BioMart gene
database to connect to. If provided, this should be a Mart object obtained
by calling |
geneDataset |
A character string specifying the gene dataset to use
from Ensembl. Default is |
OrgDb |
The name of the OrgDb package to use for gene annotation.
Supported values include |
A data.frame containing gene location information.
data(GeneData) geneList <- rownames(GeneData) library(yulab.utils) library(biomaRt) OrgDb <- load_OrgDb("org.Hs.eg.db") gene_mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", mirror = 'asia') gene_loc <- createGeneLoc(geneList = geneList, gene_mart = gene_mart, OrgDb = OrgDb)data(GeneData) geneList <- rownames(GeneData) library(yulab.utils) library(biomaRt) OrgDb <- load_OrgDb("org.Hs.eg.db") gene_mart <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", mirror = 'asia') gene_loc <- createGeneLoc(geneList = geneList, gene_mart = gene_mart, OrgDb = OrgDb)
This function creates an S4 object to store genotype and expression data, along with sample grouping and metadata for eQTL analysis.
createQTLObject( snpMatrix, genedata, biClassify = FALSE, species = NULL, group = NULL, ... )createQTLObject( snpMatrix, genedata, biClassify = FALSE, species = NULL, group = NULL, ... )
snpMatrix |
A genotype matrix where each row is a snp and each column is a cell. Encoding should be 0, 1, 2, 3. |
genedata |
A gene expression matrix, or a Seurat object or SingleCellExperiment object. |
biClassify |
Logical; whether to convert genotype encoding in snpMatrix
to 0, 1, and 2. |
species |
The species that the user wants to select, human or mouse. |
group |
A column name in the metadata of the Seurat or SingleCellExperiment object indicating cell groups (e.g., celltype or condition). |
... |
other parameters |
An S4 object of class eQTLObject.
data(SNPData) data(GeneData) eqtl <- createQTLObject(snpMatrix = SNPData, genedata = GeneData, biClassify = FALSE, species = 'human', group = NULL)data(SNPData) data(GeneData) eqtl <- createQTLObject(snpMatrix = SNPData, genedata = GeneData, biClassify = FALSE, species = 'human', group = NULL)
Create SNP location dataframe.
createSNPsLoc(snpList, snp_mart = NULL, snpDataset = "hsapiens_snp")createSNPsLoc(snpList, snp_mart = NULL, snpDataset = "hsapiens_snp")
snpList |
A list of SNPs IDs. |
snp_mart |
An object of class Mart representing the BioMart SNP
database to connect to. If provided, this should be a Mart object obtained
by calling |
snpDataset |
A character string specifying the SNP dataset to use from
Ensembl. Default is |
A data frame containing the SNP genomic locations.
snpList <- c('rs546', 'rs549', 'rs568', 'rs665', 'rs672') library(biomaRt) snp_mart <- useEnsembl(biomart = "snps", dataset = "hsapiens_snp", mirror = 'asia') snp_loc <- createSNPsLoc(snpList = snpList, snp_mart = snp_mart)snpList <- c('rs546', 'rs549', 'rs568', 'rs665', 'rs672') library(biomaRt) snp_mart <- useEnsembl(biomart = "snps", dataset = "hsapiens_snp", mirror = 'asia') snp_loc <- createSNPsLoc(snpList = snpList, snp_mart = snp_mart)
GeneData: A dataset containing example gene expression data. Each row represents a gene and each column represents a cell.
SNPData: A dataset containing single nucleotide (SNP) data. Each row represents a variant and each column represents a cell.
Seurat_obj: A Seurat object containing GeneData along with associated
metadata stored in the meta.data slot.
SNPData2: A smaller SNP dataset containing fewer variants and cells. Each row represents a variant and each column represents a cell.
EQTL_obj: An eQTLObject created using
createQTLObject, where the raw expression matrix is
normalized using normalizeGene, and both the genotype matrix
and normalized expression matrix were filtered using
filterGeneSNP.
A numeric matrix with 100 rows (genes) and 2705 columns (cells).
A numeric matrix with 1000 rows (SNPs) and 2705 columns (cells).
An object of class Seurat.
A numeric matrix with 500 rows (SNPs) and 500 columns (cells).
createQTLObject, normalizeGene and
filterGeneSNP, for the underlying functions that do the work.
An object of class eQTLObject.
DESeq_normalize() normalizes a raw gene expression matrix using the
DESeq2 package.
DESeq_normalize(expressionMatrix)DESeq_normalize(expressionMatrix)
expressionMatrix |
A numeric matrix of raw gene expression counts, with genes as rows and cells as columns. |
A normalized gene expression matrix after applying DESeq normalization.
data(GeneData) DESeq_normalize(GeneData)data(GeneData) DESeq_normalize(GeneData)
draw_boxplot() visualizes gene expression levels across different SNP
genotypes using a boxplot.
draw_boxplot(df, unique_group)draw_boxplot(df, unique_group)
df |
A data frame containing gene expression values, SNP genotypes, and group labels. |
unique_group |
A character string indicating the unique group name. |
ggplot
set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100) unique_group <- unique(i) dataframe <- data.frame( expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_boxplot(df = dataframe, unique_group = unique_group)set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100) unique_group <- unique(i) dataframe <- data.frame( expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_boxplot(df = dataframe, unique_group = unique_group)
draw_histplot() shows the distribution of expression values for each
SNP genotype using histograms.
draw_histplot(df, unique_group)draw_histplot(df, unique_group)
df |
A data frame containing gene expression values, SNP genotypes, and group labels. |
unique_group |
A character string indicating the unique group name. |
ggplot
set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100);unique_group <- unique(i) dataframe <- data.frame(expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_histplot(df = dataframe, unique_group = unique_group)set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100);unique_group <- unique(i) dataframe <- data.frame(expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_histplot(df = dataframe, unique_group = unique_group)
draw_QTLplot() creates a composite plot that overlays violin plots,
boxplots, and scatter points to illustrate the distribution and variability
of gene expression across SNP groups.
draw_QTLplot(df, unique_group)draw_QTLplot(df, unique_group)
df |
A data frame containing gene expression values, SNP genotypes, and group labels. |
unique_group |
A character string indicating the unique group name. |
ggplot
set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100);unique_group <- unique(i) dataframe <- data.frame(expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_QTLplot(df = dataframe, unique_group = unique_group)set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100);unique_group <- unique(i) dataframe <- data.frame(expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_QTLplot(df = dataframe, unique_group = unique_group)
draw_violinplot() visualizes gene expression levels across different
SNP genotypes using violin plots.
draw_violinplot(df, unique_group)draw_violinplot(df, unique_group)
df |
A data frame containing gene expression values, SNP genotypes, and group labels. |
unique_group |
A character string indicating the unique group name. |
ggplot
set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100);unique_group <- unique(i) dataframe <- data.frame(expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_violinplot(df = dataframe, unique_group = unique_group)set.seed(123) counts_Ref <- rnorm(50, mean = 10, sd = 2) counts_Alt <- rnorm(50, mean = 12, sd = 2) i <- rep("GroupA", 100);unique_group <- unique(i) dataframe <- data.frame(expression = c(counts_Ref, counts_Alt), snp = c(rep("REF", length(counts_Ref)), rep("ALT", length(counts_Alt))), group = i) dataframe$snp <- factor(dataframe$snp, levels = c("REF", "ALT")) draw_violinplot(df = dataframe, unique_group = unique_group)
eQTLObject
The eQTLObject class is designed to store data related to eQTL analysis, including data lists, result data frames, and additional metadata such as classification, species, and grouping information.
An S4 object of class eQTLObject.
rawDataA gene expression dataframe, the row names represent gene IDs and the column names represent cell IDs.
filterDataGene expression matrix after normalizing.
eQTLResultThe result dataframe obtained the sc-eQTL results.
biClassifyLogical; whether to convert genotype encoding in snpMatrix
to 0, 1, and 2. TRUE indicates conversion; FALSE indicates no
conversion (default).
speciesThe species that the user wants to select, human or mouse.
groupByOptions for cell grouping, users can choose celltype, cellstatus, etc., depending on metadata.
useModelmodel for fitting dataframe.
Filter a data frame of SNP–gene pairs by absolute beta.
filter_by_abs_b(result, logfcThreshold)filter_by_abs_b(result, logfcThreshold)
result |
A data frame that containing SNP–gene pairs and their beta values (b-value). |
logfcThreshold |
A numeric value specifying the minimum absolute b-value to retain. Default is 0.1. |
A data frame containing only rows with absolute b-values above the specified threshold.
example_result <- data.frame( gene = c("Gene1", "Gene2", "Gene3", "Gene4"), SNP = c("SNP1", "SNP2", "SNP3", "SNP4"), b = c(-2.5, 1.0, -0.5, 3.0)) logfcThreshold <- 0.1 filtered_result <- filter_by_abs_b(example_result, logfcThreshold)example_result <- data.frame( gene = c("Gene1", "Gene2", "Gene3", "Gene4"), SNP = c("SNP1", "SNP2", "SNP3", "SNP4"), b = c(-2.5, 1.0, -0.5, 3.0)) logfcThreshold <- 0.1 filtered_result <- filter_by_abs_b(example_result, logfcThreshold)
Filter gene expression and genotype matrices by cell percentage thresholds.
filterGeneSNP( eQTLObject, snpNumOfCellsPercent = 10, expressionMin = 0, expressionNumOfCellsPercent = 10 )filterGeneSNP( eQTLObject, snpNumOfCellsPercent = 10, expressionMin = 0, expressionNumOfCellsPercent = 10 )
eQTLObject |
An S4 object of class |
snpNumOfCellsPercent |
Numeric. Minimum percentage of cells required for each SNP genotype (e.g., AA, AG, and GG). Only SNPs where each genotype occurs in at least this proportion of cells are retained. Default is 10. |
expressionMin |
Numeric. Expression threshold used in combination with
|
expressionNumOfCellsPercent |
Numeric. Minimum percentage of cells in
which a gene's expression must exceed |
An updated eQTLObject with filtered gene expression and SNP
matrices.
data(SNPData) data(GeneData) eqtl <- createQTLObject(snpMatrix = SNPData, genedata = GeneData) eqtl <- normalizeGene(eqtl) eqtl <- filterGeneSNP(eqtl, snpNumOfCellsPercent = 2, expressionMin = 0, expressionNumOfCellsPercent = 2)data(SNPData) data(GeneData) eqtl <- createQTLObject(snpMatrix = SNPData, genedata = GeneData) eqtl <- normalizeGene(eqtl) eqtl <- filterGeneSNP(eqtl, snpNumOfCellsPercent = 2, expressionMin = 0, expressionNumOfCellsPercent = 2)
This function extracts the names of cells from a SNP matrix that correspond to a specified value for a given SNP.
get_cell_groups(snpMatrix, SNPid, biClassify)get_cell_groups(snpMatrix, SNPid, biClassify)
snpMatrix |
A genotype matrix where each row is a snp and each column is a cell. Encoding should be 0, 1, 2, 3. |
SNPid |
A character string or numeric index representing the specific SNP of interest in the SNP matrix. |
biClassify |
Logical; whether to convert genotype encoding in snpMatrix
to 0, 1, and 2. |
A list of character vectors. Each vector contains the names of cells
(i.e., column names of snpMatrix) corresponding to a specific
genotype value at the given SNP.
data(SNPData) biClassify <- FALSE get_cell_groups(SNPData, "1:632445", biClassify)data(SNPData) biClassify <- FALSE get_cell_groups(SNPData, "1:632445", biClassify)
This function retrieves expression counts for a specified gene from an expression matrix, based on the provided list of cells.
get_counts(expressionMatrix, Geneid, cells)get_counts(expressionMatrix, Geneid, cells)
expressionMatrix |
A numeric matrix of gene expression counts, with genes as rows and cells as columns. |
Geneid |
A character string or numeric index representing the specific
gene of interest in |
cells |
A character vector of cell names (column names of
|
A numeric vector containing the expression counts of the specified gene in the selected cells.
data(GeneData) get_counts(GeneData, "CNN2", c("CGGCAGTGTAGCCCTG", "GGAGGATTCCCGTTCA"))data(GeneData) get_counts(GeneData, "CNN2", c("CGGCAGTGTAGCCCTG", "GGAGGATTCCCGTTCA"))
Access filtered data stored in an eQTLObject.
Method to access eQTLObject filter data.
get_filter_data(x) ## S4 method for signature 'eQTLObject' get_filter_data(x)get_filter_data(x) ## S4 method for signature 'eQTLObject' get_filter_data(x)
x |
An |
Filtered matrices.
Filtered matrices.
data(EQTL_obj) get_filter_data(EQTL_obj)data(EQTL_obj) get_filter_data(EQTL_obj)
Access model specification from an eQTLObject
Method to access eQTLObject used model information.
get_model_info(x) ## S4 method for signature 'eQTLObject' get_model_info(x)get_model_info(x) ## S4 method for signature 'eQTLObject' get_model_info(x)
x |
An |
A character string indicating the model.
used model information of eQTLObject.
data(EQTL_obj) get_model_info(EQTL_obj)data(EQTL_obj) get_model_info(EQTL_obj)
Access raw data stored in an eQTLObject.
Method to access eQTLObject raw data.
get_raw_data(x) ## S4 method for signature 'eQTLObject' get_raw_data(x)get_raw_data(x) ## S4 method for signature 'eQTLObject' get_raw_data(x)
x |
An |
raw data matrix.
raw data matrix.
data(EQTL_obj) get_raw_data(EQTL_obj)data(EQTL_obj) get_raw_data(EQTL_obj)
Access eQTLs results from an eQTLObject.
Method to access the result of identifying eQTLs.
get_result_info(x) ## S4 method for signature 'eQTLObject' get_result_info(x)get_result_info(x) ## S4 method for signature 'eQTLObject' get_result_info(x)
x |
An |
A data frame where each row corresponds to an identified SNP–gene pair.
A data frame with eQTL results.
data(EQTL_obj) get_result_info(EQTL_obj)data(EQTL_obj) get_result_info(EQTL_obj)
Initializes a progress bar to track the computation progress during model
fitting procedures such as linearModel, poissonModel, and
zinbModel. This helps users monitor the status of long-running
analyses.
initialize_progress_bar(total, k)initialize_progress_bar(total, k)
total |
Integer. The total number of iterations or steps to be completed in the analysis. |
k |
Character. An identifier or label for the specific group or subset being analyzed, used to annotate progress messages. |
An object of class progress_bar from the progress
package, which can be updated using the $tick() method.
unique_group <- c("CMP", "GMP") total_snp_count <- 10 # assume each group have 100 SNP. pb_model <- lapply(unique_group, function(k) { pb <- initialize_progress_bar(total = total_snp_count, k) for (i in seq_len(total_snp_count)) { Sys.sleep(0.1) # assume progress time pb$tick() # update pb } })unique_group <- c("CMP", "GMP") total_snp_count <- 10 # assume each group have 100 SNP. pb_model <- lapply(unique_group, function(k) { pb <- initialize_progress_bar(total = total_snp_count, k) for (i in seq_len(total_snp_count)) { Sys.sleep(0.1) # assume progress time pb$tick() # update pb } })
limma_normalize() normalizes an expression matrix using the quantile
normalization method provided by the limma package.
limma_normalize(expressionMatrix)limma_normalize(expressionMatrix)
expressionMatrix |
A numeric matrix of raw gene expression counts, with genes as rows and cells as columns. |
A normalized gene expression matrix after applying limma normalization.
data(GeneData) limma_normalize(GeneData)data(GeneData) limma_normalize(GeneData)
This function performs linear regression to identify gene–SNP associations
based on single-cell expression and genotype data stored in an
eQTLObject.
linearModel( eQTLObject, geneIDs, snpIDs, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.1 )linearModel( eQTLObject, geneIDs, snpIDs, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.1 )
eQTLObject |
An S4 object of class |
geneIDs |
Character vector of gene IDs to include in the model fitting. |
snpIDs |
Character vector of SNP IDs to include in the model fitting. |
biClassify |
Logical; whether to convert genotype encoding in snpMatrix
to 0, 1, and 2. |
pAdjustMethod |
Method used for multiple testing correction. One of
|
pAdjustThreshold |
Only SNP–gene pairs with adjusted p-values below the threshold will be retained. Default is 0.05. |
logfcThreshold |
The minimum beta coefficient (effect size) required to report a SNP–gene pair as an eQTL. |
A data frame of gene–SNP pairs that pass the filtering criteria, including beta coefficients, p-values, adjusted p-values, and group labels.
data(EQTL_obj) Gene <- rownames(slot(EQTL_obj, "filterData")$expMat) SNP <- rownames(slot(EQTL_obj, "filterData")$snpMat) linearResult <- linearModel( eQTLObject = EQTL_obj, geneIDs = Gene, snpIDs = SNP, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.025)data(EQTL_obj) Gene <- rownames(slot(EQTL_obj, "filterData")$expMat) SNP <- rownames(slot(EQTL_obj, "filterData")$snpMat) linearResult <- linearModel( eQTLObject = EQTL_obj, geneIDs = Gene, snpIDs = SNP, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.025)
Access biclassification information from an eQTLObject.
Method to access eQTLObject biclassify information.
load_biclassify_info(x) ## S4 method for signature 'eQTLObject' load_biclassify_info(x)load_biclassify_info(x) ## S4 method for signature 'eQTLObject' load_biclassify_info(x)
x |
An |
A character or list containing biclassification information.
biclassify information of eQTLObject.
data(EQTL_obj) load_biclassify_info(EQTL_obj)data(EQTL_obj) load_biclassify_info(EQTL_obj)
Access cell grouping information from an eQTLObject
Method to access eQTLObject cell grouping information.
load_group_info(x) ## S4 method for signature 'eQTLObject' load_group_info(x)load_group_info(x) ## S4 method for signature 'eQTLObject' load_group_info(x)
x |
An |
A data frame with grouping information.
A data frame with cell group assignments.
data(EQTL_obj) load_group_info(EQTL_obj)data(EQTL_obj) load_group_info(EQTL_obj)
Access species information from an eQTLObject.
Method to access eQTLObject species information.
load_species_info(x) ## S4 method for signature 'eQTLObject' load_species_info(x)load_species_info(x) ## S4 method for signature 'eQTLObject' load_species_info(x)
x |
An |
A character string indicating the species.
species information of eQTLObject.
data(EQTL_obj) load_species_info(EQTL_obj)data(EQTL_obj) load_species_info(EQTL_obj)
log_normalize() transforms an expression matrix by applying logarithm
and scaling operations to normalize data.
log_normalize(expressionMatrix)log_normalize(expressionMatrix)
expressionMatrix |
A numeric matrix of raw gene expression counts, with genes as rows and cells as columns. |
A normalized gene expression matrix after applying logNormalize normalization.
data(GeneData) log_normalize(GeneData)data(GeneData) log_normalize(GeneData)
This function performs normalization of the gene expression matrix stored
within an eQTLObject, aiming to remove technical biases and
improve comparability across cells. Multiple normalization methods are
supported, including "logNormalize", "CPM", "TPM", "DESeq", and "limma".
normalizeGene(eQTLObject, method = "logNormalize")normalizeGene(eQTLObject, method = "logNormalize")
eQTLObject |
An S4 object of class |
method |
Character string specifying the normalization method to use.
Must be one of |
An eQTLObject with the normalized gene expression matrix
stored in the slot named "normExpMat".
data(EQTL_obj) eqtl <- normalizeGene(EQTL_obj, method = "logNormalize")data(EQTL_obj) eqtl <- normalizeGene(EQTL_obj, method = "logNormalize")
Customized ggplot2 Theme for Plots
plots_theme_opts()plots_theme_opts()
A ggplot2 theme object for styling plots.
library(ggplot2) data <- data.frame( x = c("A", "B", "C", "D", "E"), y = c(10, 20, 30, 40, 50)) ggplot(data, aes(x, y)) + geom_point() + plots_theme_opts()library(ggplot2) data <- data.frame( x = c("A", "B", "C", "D", "E"), y = c(10, 20, 30, 40, 50)) ggplot(data, aes(x, y)) + geom_point() + plots_theme_opts()
Poisson model fitting the gene expression matrix and genotype matrix.
poissonModel( eQTLObject, geneIDs, snpIDs, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.1 )poissonModel( eQTLObject, geneIDs, snpIDs, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.1 )
eQTLObject |
An S4 object of class |
geneIDs |
Character vector of gene IDs to include in the model fitting. |
snpIDs |
Character vector of SNP IDs to include in the model fitting. |
biClassify |
Logical; whether to convert genotype encoding in snpMatrix
to 0, 1, and 2. |
pAdjustMethod |
Method used for multiple testing correction. One of
|
pAdjustThreshold |
Only SNP–gene pairs with adjusted p-values below the threshold will be retained. Default is 0.05. |
logfcThreshold |
The minimum beta coefficient (effect size) required to report a SNP–gene pair as an eQTL. |
A data frame of gene–SNP pairs that pass the filtering criteria, including beta coefficients, p-values, adjusted p-values, and group labels.
data(EQTL_obj) Gene <- rownames(slot(EQTL_obj, "filterData")$expMat) SNP <- rownames(slot(EQTL_obj, "filterData")$snpMat) poissonResult <- poissonModel( eQTLObject = EQTL_obj, geneIDs = Gene, snpIDs = SNP, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.025 )data(EQTL_obj) Gene <- rownames(slot(EQTL_obj, "filterData")$expMat) SNP <- rownames(slot(EQTL_obj, "filterData")$snpMat) poissonResult <- poissonModel( eQTLObject = EQTL_obj, geneIDs = Gene, snpIDs = SNP, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05, logfcThreshold = 0.025 )
Process a matrix to extract a row and convert it to a data frame.
process_matrix(id, matrix, name)process_matrix(id, matrix, name)
id |
Character string specifying the row name to extract from the matrix. |
matrix |
A matrix from which the row will be extracted. |
name |
Character string specifying the column name for the extracted values. |
A data frame containing the extracted row and a column with the row names.
rownames <- c("CNN2", "TIGD2", "DTD2") colnames <- c("Col1", "Col2", "Col3", "Col4") matrix_data <- matrix(1:12, nrow = 3, ncol = 4, dimnames = list(rownames, colnames)) geneid <- "CNN2" gene_mat <- process_matrix(geneid, matrix_data, "gene_mat")rownames <- c("CNN2", "TIGD2", "DTD2") colnames <- c("Col1", "Col2", "Col3", "Col4") matrix_data <- matrix(1:12, nrow = 3, ncol = 4, dimnames = list(rownames, colnames)) geneid <- "CNN2" gene_mat <- process_matrix(geneid, matrix_data, "gene_mat")
This function identifies and removes outlier expression values for a specified gene based on the Median Absolute Deviation (MAD) method. It then filters provided genotype-based cell groups, returning only cells with non-outlier expression values.
remove_outliers(exprsMat, Geneid, A_cells, B_cells, C_cells = NULL)remove_outliers(exprsMat, Geneid, A_cells, B_cells, C_cells = NULL)
exprsMat |
Input gene expression matrix with genes as rows and cells as columns. |
Geneid |
Character string specifying the gene ID to examine. |
A_cells |
Character vector of cell names belonging to genotype group A. |
B_cells |
Character vector of cell names belonging to genotype group B. |
C_cells |
Optional character vector of cell names belonging to genotype
group C; if |
A named list of filtered cell vectors.
## Mock expression matrix set.seed(123) exprsMat <- matrix(rnorm(200), nrow = 5) rownames(exprsMat) <- paste0("Gene", 1:nrow(exprsMat)) colnames(exprsMat) <- paste0("cell", 1:ncol(exprsMat)) A_cells <- colnames(exprsMat)[1:13] # Example A cell list B_cells <- colnames(exprsMat)[14:26] # Example B cell list C_cells <- colnames(exprsMat)[27:40] # Example C cell list remove_outliers(exprsMat, "Gene1", A_cells, B_cells, C_cells)## Mock expression matrix set.seed(123) exprsMat <- matrix(rnorm(200), nrow = 5) rownames(exprsMat) <- paste0("Gene", 1:nrow(exprsMat)) colnames(exprsMat) <- paste0("cell", 1:ncol(exprsMat)) A_cells <- colnames(exprsMat)[1:13] # Example A cell list B_cells <- colnames(exprsMat)[14:26] # Example B cell list C_cells <- colnames(exprsMat)[27:40] # Example C cell list remove_outliers(exprsMat, "Gene1", A_cells, B_cells, C_cells)
Set filtered data in an eQTLObject.
Method to set eQTLObject filter data.
set_filter_data(x, value, name) ## S4 method for signature 'eQTLObject' set_filter_data(x, value, name)set_filter_data(x, value, name) ## S4 method for signature 'eQTLObject' set_filter_data(x, value, name)
x |
An |
value |
A matrix to be stored as filtered data. |
name |
A character string indicating the key under which the matrix is
stored in |
An updated eQTLObject.
An updated eQTLObject.
data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_filter_data(EQTL_obj, data123, "expMat") data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_filter_data(EQTL_obj, data123, "expMat")data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_filter_data(EQTL_obj, data123, "expMat") data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_filter_data(EQTL_obj, data123, "expMat")
Set model specification in an eQTLObject
Method to set eQTLObject used model information.
set_model_info(x, value) ## S4 method for signature 'eQTLObject' set_model_info(x, value)set_model_info(x, value) ## S4 method for signature 'eQTLObject' set_model_info(x, value)
x |
An |
value |
A character string specifying the model (e.g., "zinb", "poisson", "linear"). |
An updated eQTLObject with the new model specification.
An updated eQTLObject.
data(EQTL_obj) useModel <- "zinb" set_model_info(EQTL_obj, useModel) data(EQTL_obj) useModel <- "zinb" set_model_info(EQTL_obj, useModel)data(EQTL_obj) useModel <- "zinb" set_model_info(EQTL_obj, useModel) data(EQTL_obj) useModel <- "zinb" set_model_info(EQTL_obj, useModel)
Set raw data in an eQTLObject.
Method to set eQTLObject raw data.
set_raw_data(x, value, name) ## S4 method for signature 'eQTLObject' set_raw_data(x, value, name)set_raw_data(x, value, name) ## S4 method for signature 'eQTLObject' set_raw_data(x, value, name)
x |
An |
value |
The raw data. |
name |
A character string indicating the key under which the matrix is
stored in |
eQTLObject.
An updated eQTLObject.
data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_raw_data(EQTL_obj, data123, "rawExpMat") data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_raw_data(EQTL_obj, data123, "rawExpMat")data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_raw_data(EQTL_obj, data123, "rawExpMat") data(EQTL_obj) data123 <- matrix(0, nrow = 3, ncol = 3) set_raw_data(EQTL_obj, data123, "rawExpMat")
Set eQTL results in an eQTLObject.
Method to set the result of identifying eQTLs from scRNA-seq data.
set_result_info(x, value) ## S4 method for signature 'eQTLObject' set_result_info(x, value)set_result_info(x, value) ## S4 method for signature 'eQTLObject' set_result_info(x, value)
x |
An |
value |
A data frame where each row corresponds to a SNP–gene pair. |
An updated eQTLObject.
An updated eQTLObject.
data(EQTL_obj) result <- data.frame(0, nrow = 3, ncol = 3) set_result_info(EQTL_obj, result) data(EQTL_obj) result <- matrix(0, nrow = 3, ncol = 3) set_result_info(EQTL_obj, result)data(EQTL_obj) result <- data.frame(0, nrow = 3, ncol = 3) set_result_info(EQTL_obj, result) data(EQTL_obj) result <- matrix(0, nrow = 3, ncol = 3) set_result_info(EQTL_obj, result)
TPM_normalize() scales an expression matrix using Transcripts Per
Million (TPM) normalization, applying logarithm and scaling operations to
adjust data based on library size.
TPM_normalize(expressionMatrix)TPM_normalize(expressionMatrix)
expressionMatrix |
A numeric matrix of raw gene expression counts, with genes as rows and cells as columns. |
A normalized gene expression matrix after applying TPM normalization.
data(GeneData) TPM_normalize(GeneData)data(GeneData) TPM_normalize(GeneData)
Visualize eQTL results by SNP–gene pair across groups
visualizeQTL( eQTLObject, SNPid, Geneid, groupName = NULL, plottype = "QTLplot", removeoutlier = FALSE )visualizeQTL( eQTLObject, SNPid, Geneid, groupName = NULL, plottype = "QTLplot", removeoutlier = FALSE )
eQTLObject |
An S4 object of class |
SNPid |
ID of SNP. |
Geneid |
ID of Gene. |
groupName |
A |
plottype |
A |
removeoutlier |
Logical; whether to identify and remove outliers.
Default is |
list
data(EQTL_obj) ## We have to call the eQTLs firstly using \code{\link{callQTL()}}. eqtl <- callQTL(eQTLObject = EQTL_obj, useModel = "linear") visualizeQTL(eQTLObject = eqtl, SNPid = "1:632647", Geneid = "RPS27", groupName = NULL, plottype = "QTLplot", removeoutlier = FALSE)data(EQTL_obj) ## We have to call the eQTLs firstly using \code{\link{callQTL()}}. eqtl <- callQTL(eQTLObject = EQTL_obj, useModel = "linear") visualizeQTL(eQTLObject = eqtl, SNPid = "1:632647", Geneid = "RPS27", groupName = NULL, plottype = "QTLplot", removeoutlier = FALSE)
Zinb model fitting the gene expression matrix.
zinbModel( eQTLObject, geneIDs, snpIDs, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05 )zinbModel( eQTLObject, geneIDs, snpIDs, biClassify = FALSE, pAdjustMethod = "bonferroni", pAdjustThreshold = 0.05 )
eQTLObject |
An S4 object of class |
geneIDs |
Character vector of gene IDs to include in the model fitting. |
snpIDs |
Character vector of SNP IDs to include in the model fitting. |
biClassify |
Logical; whether to convert genotype encoding in snpMatrix
to 0, 1, and 2. |
pAdjustMethod |
Method used for multiple testing correction. One of
|
pAdjustThreshold |
Only gene-SNP pairs with adjusted p-values below the threshold will be retained. Default is 0.05. |
A data frame of SNP–gene pairs that pass the filtering criteria, including p-values, adjusted p-values, and group labels.
data(EQTL_obj) Gene <- rownames(slot(EQTL_obj, 'filterData')$expMat) SNP <- rownames(slot(EQTL_obj, 'filterData')$snpMat) zinbResult <- zinbModel( eQTLObject = EQTL_obj, geneIDs = Gene, snpIDs = SNP, biClassify = FALSE, pAdjustMethod = 'bonferroni', pAdjustThreshold = 0.05)data(EQTL_obj) Gene <- rownames(slot(EQTL_obj, 'filterData')$expMat) SNP <- rownames(slot(EQTL_obj, 'filterData')$snpMat) zinbResult <- zinbModel( eQTLObject = EQTL_obj, geneIDs = Gene, snpIDs = SNP, biClassify = FALSE, pAdjustMethod = 'bonferroni', pAdjustThreshold = 0.05)