Title: | Pathway Association Study Tool (PAST) |
---|---|
Description: | PAST takes GWAS output and assigns SNPs to genes, uses those genes to find pathways associated with the genes, and plots pathways based on significance. Implements methods for reading GWAS input data, finding genes associated with SNPs, calculating enrichment score and significance of pathways, and plotting pathways. |
Authors: | Thrash Adam [cre, aut], DeOrnellis Mason [aut] |
Maintainer: | Thrash Adam <[email protected]> |
License: | GPL (>=3) + file LICENSE |
Version: | 1.23.0 |
Built: | 2024-11-24 06:29:07 UTC |
Source: | https://github.com/bioc/PAST |
Assign SNPs in a chunk to genes
assign_chunk(gff, chunk, window)
assign_chunk(gff, chunk, window)
gff |
The GFF data for the chromosome being parsed |
chunk |
The dataframe containing SNP data |
window |
The search window around the SNPs |
tagSNPs labeled with gene names
Assign SNPs to genes
assign_SNPs_to_genes( gwas_data, LD, gff_file, filter_type, window, r_squared_cutoff, num_cores )
assign_SNPs_to_genes( gwas_data, LD, gff_file, filter_type, window, r_squared_cutoff, num_cores )
gwas_data |
Merged association and effects data from merge_data() |
LD |
Linkage disequilibrium data from parse_LD() |
gff_file |
The path to a GFF file |
window |
The search window for genes around the SNP |
r_squared_cutoff |
The R^2 value used to determine SNP significance |
num_cores |
The number of cores to use in parallelizing PAST |
A dataframe of genes from the SNP data
example("load_GWAS_data") example("load_LD") demo_genes_file = system.file("extdata", "genes.gff", package = "PAST", mustWork = TRUE) filter_type = c("gene") genes <-assign_SNPs_to_genes(gwas_data, LD, demo_genes_file, filter_type, 1000, 0.8, 2)
example("load_GWAS_data") example("load_LD") demo_genes_file = system.file("extdata", "genes.gff", package = "PAST", mustWork = TRUE) filter_type = c("gene") genes <-assign_SNPs_to_genes(gwas_data, LD, demo_genes_file, filter_type, 1000, 0.8, 2)
Determine Linkage
determine_linkage(chunk, r_squared_cutoff)
determine_linkage(chunk, r_squared_cutoff)
chunk |
A chunk of data to be processed |
r_squared_cutoff |
The R^2 value to check against |
Either the first unlinked SNP or a set of linked SNPs
Find Pathway Significance
find_pathway_significance( genes, pathways_file, gene_number_cutoff = 5, mode, sample_size = 1000, num_cores )
find_pathway_significance( genes, pathways_file, gene_number_cutoff = 5, mode, sample_size = 1000, num_cores )
genes |
Genes from assign_SNPs_to_genes() |
pathways_file |
A file containing the pathway IDs, their names, and the genes in the pathway |
gene_number_cutoff |
A cut-off for the minimum number of genes in a pathway |
mode |
increasing/decreasing |
sample_size |
How many times to sample the effects data during random sampling |
num_cores |
The number of cores to use in parallelizing PAST |
Rugplots data
example("assign_SNPs_to_genes") demo_pathways_file = system.file("extdata", "pathways.txt.xz", package = "PAST", mustWork = TRUE) rugplots_data <- find_pathway_significance(genes, demo_pathways_file, 5, "increasing", 1000, 2)
example("assign_SNPs_to_genes") demo_pathways_file = system.file("extdata", "pathways.txt.xz", package = "PAST", mustWork = TRUE) rugplots_data <- find_pathway_significance(genes, demo_pathways_file, 5, "increasing", 1000, 2)
Find representative SNP for a chunk of SNPs
find_representative_SNP(chunk, r_squared_cutoff)
find_representative_SNP(chunk, r_squared_cutoff)
chunk |
A chunk of data to parse |
r_squared_cutoff |
The R^2 value to check against when counting SNPs |
A single SNP representing the whole chunk
Find the SNP-gene assignment that represents SNPs assigned to a gene
find_representative_SNP_gene_pairing(chunk)
find_representative_SNP_gene_pairing(chunk)
chunk |
A chunk of gene assignments |
A single SNP-gene assignment representing all SNPS assigned to the same gene to a gene
Load GWAS data
load_GWAS_data( association_file, effects_file, association_columns = c("Trait", "Marker", "Locus", "Site", "p", "marker_R2"), effects_columns = c("Trait", "Marker", "Locus", "Site", "Effect") )
load_GWAS_data( association_file, effects_file, association_columns = c("Trait", "Marker", "Locus", "Site", "p", "marker_R2"), effects_columns = c("Trait", "Marker", "Locus", "Site", "Effect") )
association_file |
The association file |
effects_file |
The effects file |
association_columns |
The names of the columns in your association data for Trait, Marker, Chromosome, Site, F, p, and marker_Rsquared |
effects_columns |
The names of the columns in your effects data for Trait, Marker, Chromosome, Site, and effect |
The association data and the effects data merged into a dataframe with one row for each SNP
demo_association_file = system.file("extdata", "association.txt.xz", package = "PAST", mustWork = TRUE) demo_effects_file = system.file("extdata", "effects.txt.xz", package = "PAST", mustWork = TRUE) gwas_data <- load_GWAS_data(demo_association_file, demo_effects_file)
demo_association_file = system.file("extdata", "association.txt.xz", package = "PAST", mustWork = TRUE) demo_effects_file = system.file("extdata", "effects.txt.xz", package = "PAST", mustWork = TRUE) gwas_data <- load_GWAS_data(demo_association_file, demo_effects_file)
Load Linkage Disequilibrium
load_LD( LD_file, LD_columns = c("Locus1", "Position1", "Site1", "Position2", "Site2", "Dist_bp", "R.2") )
load_LD( LD_file, LD_columns = c("Locus1", "Position1", "Site1", "Position2", "Site2", "Dist_bp", "R.2") )
LD_file |
The file containing linkage disequilibrium data |
LD_columns |
The names of the columns in your linkage disequilibrium data for the chromosome of the first SNP, the position of the first SNP, the site of the first SNP, the chromosome of the second SNP, the position of the second SNP, the site of the second SNP, the distance between the two SNPs, and the R.2 |
The linkage disequilibrium data in a list containing dataframes for each chromosome.
demo_LD_file = system.file("extdata","LD.txt.xz", package = "PAST", mustWork = TRUE) LD <- load_LD(demo_LD_file)
demo_LD_file = system.file("extdata","LD.txt.xz", package = "PAST", mustWork = TRUE) LD <- load_LD(demo_LD_file)
Plot Rugplots for Selected Pathways
plot_pathways( rugplots_data, filter_type, filter_parameter, mode, output_directory )
plot_pathways( rugplots_data, filter_type, filter_parameter, mode, output_directory )
rugplots_data |
The data to be plotted (returned from find_pathway_significance()) |
filter_type |
The parameter to be used for filtering |
filter_parameter |
The cut-off value of the filtering parameter |
mode |
The mode used to create the data (increasing/decreasing) |
output_directory |
An existing directory to save results in |
Does not return a value
example("find_pathway_significance") plot_pathways(rugplots_data, "pvalue", "0.03", "decreasing", tempdir())
example("find_pathway_significance") plot_pathways(rugplots_data, "pvalue", "0.03", "decreasing", tempdir())