| Title: | Evaluating SNV-Induced Disruption of G-Quadruplex Structures |
|---|---|
| Description: | G-quadruplexes (G4s) are unique nucleic acid secondary structures predominantly found in guanine-rich regions and have been shown to be involved in various biological regulatory processes. G4SNVHunter is an R package designed to rapidly identify genomic sequences with G4-forming propensity and to accurately screen user-provided single nucleotide variants—as well as other small-scale variants such as indels and MNVs—for their potential to destabilize these structures. This allows researchers to then screen these critical variants for deeper study, digging into how they might influence biological functions—think gene regulation, for instance—by impairing G4 formation propensity. |
| Authors: | Rongxin Zhang [cre, aut] (ORCID: <https://orcid.org/0000-0002-1643-1185>) |
| Maintainer: | Rongxin Zhang <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 1.5.0 |
| Built: | 2026-05-30 08:22:46 UTC |
| Source: | https://github.com/bioc/G4SNVHunter |
This function is deprecated and will be removed in a future version.
checkSNV(snv_gr = NULL, mode = "wra", ref_col = NULL, alt_col = NULL)checkSNV(snv_gr = NULL, mode = "wra", ref_col = NULL, alt_col = NULL)
snv_gr |
A |
mode |
A character string specifying the checks to be performed.
|
ref_col |
Column name for the ref bases in |
alt_col |
Column name for the alt bases in |
This function checks whether the user-provided SNVs are single nucleotide substitutions.
A logical value indicating whether the user-provided SNVs passed all checks.
This function is no longer supported.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("GenomicRanges") } library(GenomicRanges) gr1 <- GRanges("chr1", IRanges(start = 100, width = 1)) # check width ('w') checkSNV(gr1, mode = "w") gr2 <- GRanges( seqnames = Rle("seq1"), ranges = IRanges(c(100, 200, 300), width = 1), ref = c("A", "C", "G"), alt = c("T", "T", "A") ) # check width ('w'), ref ('r'), and alt ('a') checkSNV(gr2, mode = "wra", ref_col = "ref", alt_col = "alt") # check width ('w') and alt ('a') checkSNV(gr2, mode = "wa", alt_col = "alt") gr3 <- GRanges("chr1", IRanges(start = 100, width = 10)) # widths should be all one checkSNV(gr3, mode = "w") gr4 <- GRanges( seqnames = Rle("seq1"), ranges = IRanges(start = 100, width = 1), ref = "AG", alt = "T" ) # ref should be all one checkSNV(gr4, mode = "wr", ref_col = "ref")if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("GenomicRanges") } library(GenomicRanges) gr1 <- GRanges("chr1", IRanges(start = 100, width = 1)) # check width ('w') checkSNV(gr1, mode = "w") gr2 <- GRanges( seqnames = Rle("seq1"), ranges = IRanges(c(100, 200, 300), width = 1), ref = c("A", "C", "G"), alt = c("T", "T", "A") ) # check width ('w'), ref ('r'), and alt ('a') checkSNV(gr2, mode = "wra", ref_col = "ref", alt_col = "alt") # check width ('w') and alt ('a') checkSNV(gr2, mode = "wa", alt_col = "alt") gr3 <- GRanges("chr1", IRanges(start = 100, width = 10)) # widths should be all one checkSNV(gr3, mode = "w") gr4 <- GRanges( seqnames = Rle("seq1"), ranges = IRanges(start = 100, width = 1), ref = "AG", alt = "T" ) # ref should be all one checkSNV(gr4, mode = "wr", ref_col = "ref")
This function exports a GRanges object containing predicted G4s
generated by the G4HunterDetect function from the G4SNVHunter
package, to a file in TXT, CSV, or XLSX format.
exportG4( G4 = NULL, filename = NULL, include_metadata = TRUE, revcomp_antisense = TRUE )exportG4( G4 = NULL, filename = NULL, include_metadata = TRUE, revcomp_antisense = TRUE )
G4 |
A |
filename |
A |
include_metadata |
A |
revcomp_antisense |
A |
Invisibly returns a data.frame object.
fa_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") seq <- loadSequence(seq_path = fa_path) # Predict G4s G4_detected <- G4HunterDetect(seq) out_xlsx <- file.path(tempdir(), "results.xlsx") out_txt <- file.path(tempdir(), "results.txt") out_csv <- file.path(tempdir(), "results.csv") exportG4(G4_detected, out_xlsx) exportG4(G4_detected, out_txt, include_metadata = FALSE) exportG4(G4_detected, out_csv, revcomp_antisense = FALSE) unlink(c(out_xlsx, out_txt, out_csv))fa_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") seq <- loadSequence(seq_path = fa_path) # Predict G4s G4_detected <- G4HunterDetect(seq) out_xlsx <- file.path(tempdir(), "results.xlsx") out_txt <- file.path(tempdir(), "results.txt") out_csv <- file.path(tempdir(), "results.csv") exportG4(G4_detected, out_xlsx) exportG4(G4_detected, out_txt, include_metadata = FALSE) exportG4(G4_detected, out_csv, revcomp_antisense = FALSE) unlink(c(out_xlsx, out_txt, out_csv))
This function exports a GRanges object containing predicted G4
regions affected by variants to a TXT, CSV, or XLSX file. The output
includes both the original and mutated G-rich sequences, along with
detailed information about the associated variants.
exportMutG4( mut_G4 = NULL, filename = NULL, include_metadata = TRUE, revcomp_G4_seq = TRUE, revcomp_mutG4_seq = TRUE )exportMutG4( mut_G4 = NULL, filename = NULL, include_metadata = TRUE, revcomp_G4_seq = TRUE, revcomp_mutG4_seq = TRUE )
mut_G4 |
A |
filename |
A |
include_metadata |
A |
revcomp_G4_seq |
A |
revcomp_mutG4_seq |
A |
Invisibly returns a data.frame object.
library(GenomicRanges) fa_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") seq <- loadSequence(seq_path = fa_path) # Predict G4s G4_detected <- G4HunterDetect(seq) # create mut granges object mut <- data.frame( chr = c("seq1", "seq5"), pos = c(81, 11), ref = c("GGGTAGGG", "A"), alt = c("G", "AGGGGGGGGGGGGGGGG") ) mut <- GRanges( seqnames = mut$chr, ranges = IRanges(start = mut$pos, end = mut$pos), strand = "*", ref = mut$ref, alt = mut$alt ) mut_G4 <- G4VarImpact(G4_detected, mut, ref_col = "ref", alt_col = "alt") filtered_mut_G4 <- filterVarImpact(mut_G4, score_diff_threshold = -0.2) exportMutG4(filtered_mut_G4, "./result_mut.txt") exportMutG4(filtered_mut_G4, "./result_mut.xlsx", include_metadata = FALSE) exportMutG4(filtered_mut_G4, "./result_mut.csv", revcomp_mutG4_seq = FALSE) # remove all exported files unlink("./result_mut.txt") unlink("./result_mut.xlsx") unlink("./result_mut.csv")library(GenomicRanges) fa_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") seq <- loadSequence(seq_path = fa_path) # Predict G4s G4_detected <- G4HunterDetect(seq) # create mut granges object mut <- data.frame( chr = c("seq1", "seq5"), pos = c(81, 11), ref = c("GGGTAGGG", "A"), alt = c("G", "AGGGGGGGGGGGGGGGG") ) mut <- GRanges( seqnames = mut$chr, ranges = IRanges(start = mut$pos, end = mut$pos), strand = "*", ref = mut$ref, alt = mut$alt ) mut_G4 <- G4VarImpact(G4_detected, mut, ref_col = "ref", alt_col = "alt") filtered_mut_G4 <- filterVarImpact(mut_G4, score_diff_threshold = -0.2) exportMutG4(filtered_mut_G4, "./result_mut.txt") exportMutG4(filtered_mut_G4, "./result_mut.xlsx", include_metadata = FALSE) exportMutG4(filtered_mut_G4, "./result_mut.csv", revcomp_mutG4_seq = FALSE) # remove all exported files unlink("./result_mut.txt") unlink("./result_mut.xlsx") unlink("./result_mut.csv")
This function is deprecated and will be removed in a future version.
filterSNVImpact( gr, raw_score_threshold = NULL, mut_score_threshold = NULL, score_diff_threshold = NULL )filterSNVImpact( gr, raw_score_threshold = NULL, mut_score_threshold = NULL, score_diff_threshold = NULL )
gr |
A |
raw_score_threshold |
A positive numeric value no greater than 4 used
as the threshold for the absolute value of |
mut_score_threshold |
A positive numeric value no greater than 4 used
as the threshold for the absolute value of |
score_diff_threshold |
A negative numeric value no less than -4 used as
the threshold for |
This function filters the SNV Impact GRanges object returned by the
SNVImpactG4 function based on user-defined thresholds for the
G4.info.score, mut.score, and score.diff parameters.
This function filters SNVs that may significantly impair the formation of G4
structures using customizable filtering criteria. You are not required to
specify all three threshold parameters. However, at least one threshold
parameter must be provided.
A filtered GRanges object, containing only the records that
meet the specified threshold criteria.
This function is no longer supported.
Use filterVarImpact instead.
SNVImpactG4 for assessing the impact of SNVs on
G4 formation.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) data(snp_gr) res_snp <- SNVImpactG4(G4, snp_gr, alt_col = "alt") filtered_snv_eff <- filterSNVImpact(res_snp, mut_score_threshold = 1.2, score_diff_threshold = -0.2 ) print(filtered_snv_eff)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) data(snp_gr) res_snp <- SNVImpactG4(G4, snp_gr, alt_col = "alt") filtered_snv_eff <- filterSNVImpact(res_snp, mut_score_threshold = 1.2, score_diff_threshold = -0.2 ) print(filtered_snv_eff)
This function filters the G4 (GRanges) object returned by the
G4VarImpact function based on user-defined thresholds for the
G4.info.max_score, mutated.max_score, and score.diff
columns.
Users are not required to specify all three thresholds;
however, at least one must be provided.
filterVarImpact( mut_G4, raw_score_threshold = NULL, mut_score_threshold = NULL, score_diff_threshold = NULL )filterVarImpact( mut_G4, raw_score_threshold = NULL, mut_score_threshold = NULL, score_diff_threshold = NULL )
mut_G4 |
A |
raw_score_threshold |
A positive numeric value (no greater than 4)
used as the threshold for the absolute value of |
mut_score_threshold |
A positive numeric value (no greater than 4)
used as the threshold for the absolute value of |
score_diff_threshold |
A negative numeric value (no less than -4)
used as the threshold for |
A filtered GRanges object, containing only the records that
meet the specified threshold criteria.
G4VarImpact for assessing the impact of variants on
G4 formation.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) data(snp_gr) res_snp <- G4VarImpact(G4, snp_gr, ref_col = "ref", alt_col = "alt") filtered_var_eff <- filterVarImpact(res_snp, mut_score_threshold = 1.2, score_diff_threshold = -0.2 ) print(filtered_var_eff)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) data(snp_gr) res_snp <- G4VarImpact(G4, snp_gr, ref_col = "ref", alt_col = "alt") filtered_var_eff <- filterVarImpact(res_snp, mut_score_threshold = 1.2, score_diff_threshold = -0.2 ) print(filtered_var_eff)
This function detects G4 sequences from a given DNAStringSet object
using the G4Hunter algorithm.
G4HunterDetect( sequences = NULL, threshold = 1.5, window_size = 25, include_sequences = TRUE, strands = "b" )G4HunterDetect( sequences = NULL, threshold = 1.5, window_size = 25, include_sequences = TRUE, strands = "b" )
sequences |
A |
threshold |
A numeric value specifying the threshold for the G4Hunter score (absolute value). Default is 1.5. It is not recommended to set the threshold below 1.2. |
window_size |
An integer specifying the window size (bp) used for prediction. Default is 25. Another commonly used window size is 20. However, 25 is generally preferred. |
include_sequences |
A logical value ( |
strands |
A character string specifying which strand(s) to consider:
|
A GRanges object containing the predicted G4 sequences.
The GRanges object includes the following metadata columns:
scoreThe final G4Hunter score of the predicted G4 sequence after merging and pruning.
max_scoreThe maximum G4Hunter score observed across all sliding windows covering the G4.
sequenceThe sequence of the predicted G4 (if
include_sequences = TRUE).
Additionally, the following parameters used during detection are stored
in the metadata() of the returned GRanges object:
thresholdThe G4Hunter score threshold used.
window_sizeThe window size used.
include_sequencesWhether sequences were included.
strandsThe strand(s) considered.
If no G4 sequences are detected, an empty GRanges object is returned.
loadSequence for loading genome sequences into a
DNAStringSet object.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("Biostrings", quietly = TRUE)) { BiocManager::install("Biostrings") } library(Biostrings) sequences <- DNAStringSet(c( "AGTGAATGGGATGGGAGGAGGGACGGGGTAGTACAGCATAGCATG", "TAGGTAGCTACGACACCCTGCCCTACCCCTACCCCTATCTA" )) names(sequences) <- c("seq1", "seq2") G4s <- G4HunterDetect(sequences, threshold = 1.5, window_size = 25) print(G4s) seq_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") G4s <- G4HunterDetect(loadSequence(seq_path = seq_path)) print(G4s) seq_path <- system.file("extdata", "seq.txt", package = "G4SNVHunter") G4s <- G4HunterDetect(loadSequence(seq_path = seq_path)) print(G4s)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("Biostrings", quietly = TRUE)) { BiocManager::install("Biostrings") } library(Biostrings) sequences <- DNAStringSet(c( "AGTGAATGGGATGGGAGGAGGGACGGGGTAGTACAGCATAGCATG", "TAGGTAGCTACGACACCCTGCCCTACCCCTACCCCTATCTA" )) names(sequences) <- c("seq1", "seq2") G4s <- G4HunterDetect(sequences, threshold = 1.5, window_size = 25) print(G4s) seq_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") G4s <- G4HunterDetect(loadSequence(seq_path = seq_path)) print(G4s) seq_path <- system.file("extdata", "seq.txt", package = "G4SNVHunter") G4s <- G4HunterDetect(loadSequence(seq_path = seq_path)) print(G4s)
This function calculates the G4Hunter score for a given nucleotide sequence, which reflects the ability of that sequence to form a G4 structure.
G4HunterScore(seq = NULL)G4HunterScore(seq = NULL)
seq |
A single character string representing the nucleotide sequence.
Must contain only the characters |
A numeric value representing the G4Hunter score for the provided sequence.
G4HunterDetect for detecting the G4 sequences in a
given DNAStringSet object.
sequence <- "GGGTAAGGGATGGGTCGGG" score <- G4HunterScore(sequence) print(score) # A negative value indicates that the G4 sequence # is located on the reverse strand sequence <- "GGGTAAGGGATGGGTCGGG" score <- G4HunterScore(sequence) print(score)sequence <- "GGGTAAGGGATGGGTCGGG" score <- G4HunterScore(sequence) print(score) # A negative value indicates that the G4 sequence # is located on the reverse strand sequence <- "GGGTAAGGGATGGGTCGGG" score <- G4HunterScore(sequence) print(score)
This function evaluates the impact of variants (SNVs, indels, and MNVs) on G4 formation.
G4VarImpact( G4 = NULL, variants = NULL, ref_col = NULL, alt_col = NULL, mode = "s", sampleid_col = NULL )G4VarImpact( G4 = NULL, variants = NULL, ref_col = NULL, alt_col = NULL, mode = "s", sampleid_col = NULL )
G4 |
A |
variants |
A |
ref_col |
A |
alt_col |
A |
mode |
A |
sampleid_col |
A |
A GRanges object with variant impact results:
For each variant-G4 overlap:
Original G4 metadata (G4.info.*)
Variant information (variant.info.*)
Mutated sequence (mutated.G4.seq)
Annotated mutation sequence (mutated.G4.anno.seq)
New G4Hunter max_score (mutated.max_score)
Score difference (score.diff)
For each sample-G4 combination:
Original G4 metadata (G4.info.*)
Combined variant information (variant.info.*)
Mutated sequence with all variants incorporated
(mutated.G4.seq)
Annotated mutation sequence (mutated.G4.anno.seq)
New G4Hunter max_score (mutated.max_score)
Score difference (score.diff)
G4HunterDetect for detecting the G4 sequences in a
given DNAStringSet object.
filterVarImpact for filtering out variants with
significant impact.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) # Load sequence for chromosome 21 (hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" # Detect G4s in human chromosome 21 G4 <- G4HunterDetect(chr21_seq) # Load variants data(snv_gr) # 's' mode; single-variant mode ('s') # evaluating each variant individually. res_snv_s <- G4VarImpact(G4, snv_gr, ref_col = "ref", alt_col = "alt") print(res_snv_s) # 'm' mode; multi-variant mode ('m') # evaluating the combined impact of variants on G4s. # Grouped by the sample IDs specified in the 'sampleid_col' column. res_snv_m <- G4VarImpact(G4, snv_gr, ref_col = "ref", alt_col = "alt", mode = "m", sampleid_col = "sampleid" ) print(res_snv_m)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) # Load sequence for chromosome 21 (hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" # Detect G4s in human chromosome 21 G4 <- G4HunterDetect(chr21_seq) # Load variants data(snv_gr) # 's' mode; single-variant mode ('s') # evaluating each variant individually. res_snv_s <- G4VarImpact(G4, snv_gr, ref_col = "ref", alt_col = "alt") print(res_snv_s) # 'm' mode; multi-variant mode ('m') # evaluating the combined impact of variants on G4s. # Grouped by the sample IDs specified in the 'sampleid_col' column. res_snv_m <- G4VarImpact(G4, snv_gr, ref_col = "ref", alt_col = "alt", mode = "m", sampleid_col = "sampleid" ) print(res_snv_m)
This function loads genomic sequences from multiple sources, including a FASTA file, a text file with sequence identifiers and corresponding sequences, or a data frame object.
loadSequence(genome_seq = NULL, seq_path = NULL)loadSequence(genome_seq = NULL, seq_path = NULL)
genome_seq |
A data frame containing sequence identifiers and corresponding sequences. |
seq_path |
A character string specifying the file path to a FASTA file
(suffixed with |
A DNAStringSet object containing the genome sequences.
# File path for sequences in fasta format fa_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") seq <- loadSequence(seq_path = fa_path) print(seq) # Another example # Load sequences from data.frame seq_df <- data.frame( chr = c("seq1", "seq2"), sequence = c( paste0(rep("G", 100), collapse = ""), paste0(rep("A", 100), collapse = "") ) ) seq <- loadSequence(genome_seq = seq_df) print(seq)# File path for sequences in fasta format fa_path <- system.file("extdata", "seq.fa", package = "G4SNVHunter") seq <- loadSequence(seq_path = fa_path) print(seq) # Another example # Load sequences from data.frame seq_df <- data.frame( chr = c("seq1", "seq2"), sequence = c( paste0(rep("G", 100), collapse = ""), paste0(rep("A", 100), collapse = "") ) ) seq <- loadSequence(genome_seq = seq_df) print(seq)
This function loads variant data from either a standard VCF or MAF file and filters for small variants, such as SNVs, INDELs, and DELINs.
loadVariant(variant_file, file_type = c("vcf", "maf"), keep_vcf_id = TRUE)loadVariant(variant_file, file_type = c("vcf", "maf"), keep_vcf_id = TRUE)
variant_file |
A |
file_type |
A |
keep_vcf_id |
A |
A GRanges object containing the variants loaded from the
specified VCF or MAF file. For VCF files, only the ID, REF,
and ALT metadata columns are included in the output. For MAF files,
all available MAF columns are retained.
# load the vcf file, please do not forget to specify the file type vcf_path <- system.file("extdata", "example_variants_chr16.vcf", package = "G4SNVHunter") variants <- loadVariant(vcf_path, file_type = "vcf") # load the maf file maf_path <- system.file("extdata", "example_variants_chr16.maf", package = "G4SNVHunter") variants <- loadVariant(maf_path, file_type = "maf")# load the vcf file, please do not forget to specify the file type vcf_path <- system.file("extdata", "example_variants_chr16.vcf", package = "G4SNVHunter") variants <- loadVariant(vcf_path, file_type = "vcf") # load the maf file maf_path <- system.file("extdata", "example_variants_chr16.maf", package = "G4SNVHunter") variants <- loadVariant(maf_path, file_type = "maf")
This function generates a series of plots to visualize basic statistics of G4 sequences predicted by the G4Hunter algorithm. The function produces the following plots:
Distribution of max scores (absolute values).
Distribution of max scores, split by strand.
Distribution of scores (absolute values).
Distribution of scores, split by strand.
Distribution of sequence lengths (with lengths greater than 50 bp grouped into a single bin).
Distribution of sequence lengths, split by strand (with lengths greater than 50 bp grouped into a single bin).
plotG4Info( G4, p_colors = c("#6B9ECC", "#D91117", "#0E619C", "#58AC7B", "#D91117", "#0E619C", "#F39C40", "#D91117", "#0E619C") )plotG4Info( G4, p_colors = c("#6B9ECC", "#D91117", "#0E619C", "#58AC7B", "#D91117", "#0E619C", "#F39C40", "#D91117", "#0E619C") )
G4 |
A GRanges object containing G4 sequences. The object must include at least the following metadata columns:
The GRanges object must include |
p_colors |
A vector of colors to be used for the plots. It should contain nine color values corresponding to the different plot elements. |
A combined plot displays all the generated plots arranged in a grid layout.
G4HunterDetect for detecting the G4 sequences in a
given DNAStringSet object.
seq_path <- system.file("extdata", "seq.txt", package = "G4SNVHunter") G4 <- G4HunterDetect(loadSequence(seq_path = seq_path)) plotG4Info(G4)seq_path <- system.file("extdata", "seq.txt", package = "G4SNVHunter") G4 <- G4HunterDetect(loadSequence(seq_path = seq_path)) plotG4Info(G4)
This function visualizes and compares the G4 and its mutant counterpart using sequence logos.
plotImpactedG4(mut_G4 = NULL, keep_gstrand = TRUE)plotImpactedG4(mut_G4 = NULL, keep_gstrand = TRUE)
mut_G4 |
A |
keep_gstrand |
Logical. If |
A ggplot2-based object containing the G4 sequence logo.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) # Load SNVs data(snp_gr) res_snp <- G4VarImpact(G4, snp_gr, ref_col = "ref", alt_col = "alt") plotImpactedG4(res_snp[1])if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) # Load SNVs data(snp_gr) res_snp <- G4VarImpact(G4, snp_gr, ref_col = "ref", alt_col = "alt") plotImpactedG4(res_snp[1])
This function is deprecated and will be removed in a future version.
plotImpactSeq(filtered_gr, ncol = 1)plotImpactSeq(filtered_gr, ncol = 1)
filtered_gr |
A |
ncol |
An integer specifying the number of columns in the output plot grid. Default is 1. |
This function plot sequence logos to visualize sequence variants caused by SNVs or SNPs, with the location of the variants highlighted by rectangles and arrows.
A plot that displays the grid of sequence logos, showing the differences between the original and mutated sequences.
This function is no longer supported.
Use plotImpactedG4 instead.
SNVImpactG4 for evaluating the impact of SNVs on G4
formation, and filterSNVImpact for filtering G4s that are
significantly affected by SNVs.
if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } library(GenomicRanges) seq <- data.frame(chr = c("seq1", "seq2"), seq = c("ATTTGGGAGGGAGGGAGGGATGATGAAAATTTTATTTATTTTATTTA", "TTTATACTATTCCCTTACCCTCCCATCCCCATACGGCATCTAGATC")) seq_gr <- loadSequence(seq) G4 <- G4HunterDetect(seq_gr) snv_gr <- GRanges(seqnames = c("seq1", "seq2"), ranges = IRanges(start = c(18, 23), width = 1), ref = c("G", "C"), alt = c("C", "G")) effect <- SNVImpactG4(G4, snv_gr, alt_col = "alt") plotImpactSeq(effect, ncol = 2)if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } library(GenomicRanges) seq <- data.frame(chr = c("seq1", "seq2"), seq = c("ATTTGGGAGGGAGGGAGGGATGATGAAAATTTTATTTATTTTATTTA", "TTTATACTATTCCCTTACCCTCCCATCCCCATACGGCATCTAGATC")) seq_gr <- loadSequence(seq) G4 <- G4HunterDetect(seq_gr) snv_gr <- GRanges(seqnames = c("seq1", "seq2"), ranges = IRanges(start = c(18, 23), width = 1), ref = c("G", "C"), alt = c("C", "G")) effect <- SNVImpactG4(G4, snv_gr, alt_col = "alt") plotImpactSeq(effect, ncol = 2)
This function is deprecated and will be removed in a future version.
plotSNVImpact(gr, p_colors = c("#b22d2d", "#6ca4d6", "#2d69b0", "#1f77b4"))plotSNVImpact(gr, p_colors = c("#b22d2d", "#6ca4d6", "#2d69b0", "#1f77b4"))
gr |
A
|
p_colors |
A vector of four colors used for plotting. |
This function generates two plots for visualizing the impact of SNVs on G4 formation: 1. A scatter plot with density shading comparing the original G4Hunter score and the mutant G4Hunter score. 2. A density plot showing the distribution of score changes between the original and mutant G4 sequences.
A combined plot (using plot_grid) containing two subplots:
- Density scatter plot comparing original vs mutant G4Hunter scores.
- Density plot of the G4Hunter score differences.
This function is no longer supported.
Use plotVarImpact instead.
SNVImpactG4 for evaluating the impact of SNVs on G4
formation, and filterSNVImpact for filtering G4s that are
significantly affected by SNVs.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) # Load SNVs data(snp_gr) res_snp <- SNVImpactG4(G4, snp_gr, alt_col = "alt") plotSNVImpact(res_snp)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) # Load SNVs data(snp_gr) res_snp <- SNVImpactG4(G4, snp_gr, alt_col = "alt") plotSNVImpact(res_snp)
This function generates two panels to visualize the impact of variants on G4 formation:
A 2D density plot of absolute maximum G4Hunter scores before and after mutation.
A density plot showing the distribution of score differences.
plotVarImpact( gr, p_colors = c(fill_2d = "#376597", diagonal = "#b22d2d", density_fill = "#6ca4d6", density_line = "#2d69b0", vline = "#1f77b4") )plotVarImpact( gr, p_colors = c(fill_2d = "#376597", diagonal = "#b22d2d", density_fill = "#6ca4d6", density_line = "#2d69b0", vline = "#1f77b4") )
gr |
A
|
p_colors |
A character vector of five colors used for plotting. The vector must include:
|
A ggplot object combining two subplots:
2D density plot of the absolute maximum G4Hunter scores before and after mutation.
Density plot of the maximum G4Hunter score differences.
G4VarImpact for computing the effects of variants
on G4 formation,
and filterVarImpact for screening G4s whose formation
propensity may be impaired by variants.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) # Load SNVs data(snp_gr) res_snp <- G4VarImpact(G4, snp_gr, ref_col = "ref", alt_col = "alt") plotVarImpact(res_snp)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" G4 <- G4HunterDetect(chr21_seq) # Load SNVs data(snp_gr) res_snp <- G4VarImpact(G4, snp_gr, ref_col = "ref", alt_col = "alt") plotVarImpact(res_snp)
This dataset contains a GRanges object storing single nucleotide
polymorphisms (SNPs).
data(snp_gr)data(snp_gr)
A GRanges object with 30,000 SNPs.
This dataset contains a GRanges object storing single nucleotide
variants (SNVs).
data(snv_gr)data(snv_gr)
A GRanges object with 50,000 SNVs.
This function is deprecated and will be removed in a future version.
SNVImpactG4( G4 = NULL, snvs = NULL, alt_col = "alt", mode = "s", sampleid_col = NULL, snvid_col = NULL )SNVImpactG4( G4 = NULL, snvs = NULL, alt_col = "alt", mode = "s", sampleid_col = NULL, snvid_col = NULL )
G4 |
A |
snvs |
A |
alt_col |
A character string specifying the name of the column in
|
mode |
A character string indicating the mode of operation.
Set to |
sampleid_col |
A character string specifying the name of the column in
|
snvid_col |
A character string specifying the column name in
|
This function evaluates the impact of SNVs on G4 formation.
A GRanges object depending on the mode parameter:
For mode = "s", the returned GRanges object includes the
following metadata columns:
seqnames Identifiers for SNVs.
ranges Position of the SNVs.
strand Strand of the SNVs.
SNV.info.* Metadata columns related to each SNV.
G4.info.* Metadata columns from the original G4 object.
mut.G4.seq The mutated G4 sequence after applying the
SNV change.
mut.G4.anno.seq The mutated G4 sequence, with the
mutated bases annotated using square brackets.
mut.score The G4Hunter score of the mutated sequence.
score.diff The difference between the mutated G4Hunter
score and the original G4Hunter score.
The value is calculated as the absolute value of the mutated G4Hunter
score minus the absolute value of the original G4Hunter score.
For mode = "m", the returned GRanges object includes the
following metadata columns:
seqnames Identifiers for G4 sequences.
ranges Position of the G4 sequences (start and end).
strand Strand of the G4 sequences.
G4.info.* Metadata columns from the original G4 object.
snv.ids Concatenated SNV IDs for all SNVs affecting the
G4 region.
sample.ids A semicolon-separated list of sample IDs
overlapping each G4 region.
mut.G4.seq The mutated G4 sequence after applying the
combined SNV changes.
mut.G4.anno.seq The mutated G4 sequence, with the
mutated bases annotated using square brackets.
mut.score The G4Hunter score of the mutated sequence.
score.diff The difference between the mutated G4Hunter
score and the original G4Hunter score. This value is calculated as the
absolute value of the mutated G4Hunter score minus the absolute value
of the original G4Hunter score.
This function is no longer supported.
Use G4VarImpact instead.
G4HunterDetect for detecting the G4 sequences in a
given DNAStringSet object.
G4HunterScore for calculating the G4Hunter scores
for a given sequence.
filterSNVImpact for filtering out SNVs with
significant impact.
if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) # Load sequence for chromosome 21 (hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" # Detect G4s in human chromosome 21 G4 <- G4HunterDetect(chr21_seq) # 's' mode # Load SNPs data(snp_gr) # Obtain SNPs that overlap with G4 regions and assess their impact on G4. # In variant-centric mode ('s'), evaluating each SNP individually. res_snp <- SNVImpactG4(G4, snp_gr, alt_col = "alt") print(res_snp) # 'm' mode # Load SNVs data(snv_gr) # Obtain SNVs that overlap with G4 regions and assess their impact on G4. # In sample-centric mode ('m'), evaluate the combined impact of SNVs on G4s. # Grouped by the sample IDs specified in 'sampleid_col'. res_snv <- SNVImpactG4(G4, snv_gr, alt_col = "alt", mode = "m", sampleid_col = "sampleid", snvid_col = "snv_id" ) print(res_snv)if (!requireNamespace("BiocManager", quietly = TRUE)) { install.packages("BiocManager") } if (!requireNamespace("GenomicRanges", quietly = TRUE)) { BiocManager::install("GenomicRanges") } if (!requireNamespace("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)) { BiocManager::install("BSgenome.Hsapiens.UCSC.hg19") } library(GenomicRanges) library(BSgenome.Hsapiens.UCSC.hg19) # Load sequence for chromosome 21 (hg19) hg19 <- BSgenome.Hsapiens.UCSC.hg19 chr21_seq <- DNAStringSet(hg19$chr21) # Chromosome name is needed names(chr21_seq) <- "chr21" # Detect G4s in human chromosome 21 G4 <- G4HunterDetect(chr21_seq) # 's' mode # Load SNPs data(snp_gr) # Obtain SNPs that overlap with G4 regions and assess their impact on G4. # In variant-centric mode ('s'), evaluating each SNP individually. res_snp <- SNVImpactG4(G4, snp_gr, alt_col = "alt") print(res_snp) # 'm' mode # Load SNVs data(snv_gr) # Obtain SNVs that overlap with G4 regions and assess their impact on G4. # In sample-centric mode ('m'), evaluate the combined impact of SNVs on G4s. # Grouped by the sample IDs specified in 'sampleid_col'. res_snv <- SNVImpactG4(G4, snv_gr, alt_col = "alt", mode = "m", sampleid_col = "sampleid", snvid_col = "snv_id" ) print(res_snv)