Title: | Phenotype Consensus ANalysis (PCAN) |
---|---|
Description: | Phenotypes comparison based on a pathway consensus approach. Assess the relationship between candidate genes and a set of phenotypes based on additional genes related to the candidate (e.g. Pathways or network neighbors). |
Authors: | Matthew Page and Patrice Godard |
Maintainer: | Matthew Page <[email protected]> and Patrice Godard <[email protected]> |
License: | CC BY-NC-ND 4.0 |
Version: | 1.35.0 |
Built: | 2024-11-22 06:25:59 UTC |
Source: | https://github.com/bioc/PCAN |
This function compares 2 HP terms based on provided Information Content and ancestors
calcHpSim(term1, term2, method = c("Resnik"), IC, ancestors)
calcHpSim(term1, term2, method = c("Resnik"), IC, ancestors)
term1 |
one of the HP term to compare |
term2 |
the other HP term to compare |
method |
the method for computing semantic simalirity (default: "Resnik" returns the IC of the MICA: Most Informative common ancestor) |
IC |
a named vector of Information Content by HP term |
ancestors |
a named list of ancestors by HP term |
A numeric value
Patrice Godard
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ## Compute similarity between different couples of HP terms data(hp_ancestors, hpDef, package="PCAN") hp1 <- "HP:0000518" hp2 <- "HP:0030084" hp3 <- "HP:0002119" hp4 <- "HP:0001305" hpDef[which(hpDef$id %in% c(hp1, hp2)), 1:2] calcHpSim(hp1, hp2, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp3)), 1:2] calcHpSim(hp2, hp3, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp4)), 1:2] calcHpSim(hp2, hp4, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp3, hp4)), 1:2] calcHpSim(hp3, hp4, IC=ic, ancestors=hp_ancestors)
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ## Compute similarity between different couples of HP terms data(hp_ancestors, hpDef, package="PCAN") hp1 <- "HP:0000518" hp2 <- "HP:0030084" hp3 <- "HP:0002119" hp4 <- "HP:0001305" hpDef[which(hpDef$id %in% c(hp1, hp2)), 1:2] calcHpSim(hp1, hp2, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp3)), 1:2] calcHpSim(hp2, hp3, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp4)), 1:2] calcHpSim(hp2, hp4, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp3, hp4)), 1:2] calcHpSim(hp3, hp4, IC=ic, ancestors=hp_ancestors)
This function compares each couple of HP terms from each of the 2 provided sets based on Information Content (IC)
compareHPSets(hpSet1, hpSet2, IC, ancestors, method = "Resnik", BPPARAM = bpparam())
compareHPSets(hpSet1, hpSet2, IC, ancestors, method = "Resnik", BPPARAM = bpparam())
hpSet1 |
a set of HP terms |
hpSet2 |
another set of HP terms |
IC |
a named vector of Information Content by HP term |
ancestors |
a named list of ancestors by HP term |
method |
the method for computing semantic simalirity among
those available in |
BPPARAM |
An optional |
A matrix of semantic similarity
Patrice Godard
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),] ## Assessing the significance of this score by comparing to all other genes hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] hist( resnSss, breaks=100, col="grey", ylim=c(0,300), xlab=expression(Sim[sym]), ylab="Number of genes", main=paste( "Distribution of symmetric semantic similarity scores\nfor all the", length(resnSss), "genes" ) ) polygon( x=c(candScore, 10, 10, candScore), y=c(-10, -10, 1000, 1000), border="#BE0000", col="#BE000080", lwd=3 ) withHigher <- sum(resnSss >= candScore) text( x=candScore, y=mean(par()$usr[3:4]), paste0( withHigher, " genes (", signif(withHigher*100/length(resnSss), 2), "%)\n", "show a symmetric semantic\n", "similarity score greater than\n", "the gene candidate for\n", "for the HPs under focus." ), pos=4, cex=0.6 )
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),] ## Assessing the significance of this score by comparing to all other genes hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] hist( resnSss, breaks=100, col="grey", ylim=c(0,300), xlab=expression(Sim[sym]), ylab="Number of genes", main=paste( "Distribution of symmetric semantic similarity scores\nfor all the", length(resnSss), "genes" ) ) polygon( x=c(candScore, 10, 10, candScore), y=c(-10, -10, 1000, 1000), border="#BE0000", col="#BE000080", lwd=3 ) withHigher <- sum(resnSss >= candScore) text( x=candScore, y=mean(par()$usr[3:4]), paste0( withHigher, " genes (", signif(withHigher*100/length(resnSss), 2), "%)\n", "show a symmetric semantic\n", "similarity score greater than\n", "the gene candidate for\n", "for the HPs under focus." ), pos=4, cex=0.6 )
Compute Information Content (IC) for each HP based on genes by HP
computeHpIC(content, hp.descendants)
computeHpIC(content, hp.descendants)
content |
a list providing the content associated to each HP |
hp.descendants |
a list providing for each HP all its descendant HP terms |
This function assumes that all the HP terms taken into account belong to the same family of terms(i.e they are all descendants of the same term).
a vector of IC named with HP terms
########################################### ## Compute information content of each HP according to associated genes data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) hist( ic, breaks=100, col="grey", main="Distribution of Information Content", xlab="IC base on genes associated to HP" )
########################################### ## Compute information content of each HP according to associated genes data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) hist( ic, breaks=100, col="grey", main="Distribution of Information Content", xlab="IC base on genes associated to HP" )
Each entrez gene IDs is associated to one or several HP terms
A data frame with 67989 rows and 2 columns:
entrez gene IDs
HP terms
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
Two ressources were used in May 27 2015:
ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-05.xml.gz was used to find genes associated to each OMIM disease with a "Pathogenic" clinical status and one of the follwing origins: "germline", "de novo", "inherited", "maternal", "paternal", "biparental", "uniparental".
http://compbio.charite.de/hudson/job/hpo.annotations/1039/artifact/misc/phenotype_annotation.tab was used to find HP associated to each OMIM disease.
########################################### ## Compute information content of each HP according to associated genes data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) hist( ic, breaks=100, col="grey", main="Distribution of Information Content", xlab="IC base on genes associated to HP" )
########################################### ## Compute information content of each HP according to associated genes data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) hist( ic, breaks=100, col="grey", main="Distribution of Information Content", xlab="IC base on genes associated to HP" )
Each trait is associated to one or several genes. Only genes associated to OMIM disease with a "Pathogenic" clinical status and one of the follwing origins: "germline", "de novo", "inherited", "maternal", "paternal", "biparental", "uniparental".
A data frame with 4569 rows and 3 columns:
Entrez gene IDs.
Trait database: always "OMIM" here.
Trait ID: OMI IDs here
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-05.xml.gz
data(geneByTrait, traitDef, geneDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] # Gene associated to this disease entrez <- geneByTrait[which(geneByTrait$id==omim), "entrez"] geneDef[which(geneDef$entrez %in% entrez),] # All diseases associated to this gene traitDef[ which( traitDef$id %in% geneByTrait[which(geneByTrait$entrez==entrez), "id"] ), ]
data(geneByTrait, traitDef, geneDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] # Gene associated to this disease entrez <- geneByTrait[which(geneByTrait$id==omim), "entrez"] geneDef[which(geneDef$entrez %in% entrez),] # All diseases associated to this gene traitDef[ which( traitDef$id %in% geneByTrait[which(geneByTrait$entrez==entrez), "id"] ), ]
Basic information about genes Only genes associated to at least one OMIM disease are taken into account.
A data frame with 3265 rows and 3 columns:
Entrez gene ID.
Gene name.
Gene symbol.
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-05.xml.gz
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
HP terms which are ancestors of each HP term (including itself) in the Human Phenotype Ontology (http://www.human-phenotype-ontology.org/). Only descendants of 'Phenotypic abnormality' were taken into account.
A named list of 10962 character vectors.
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
http://compbio.charite.de/hudson/job/hpo/1529/artifact/hp/hp.obo
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ## Compute similarity between different couples of HP terms data(hp_ancestors, hpDef, package="PCAN") hp1 <- "HP:0000518" hp2 <- "HP:0030084" hp3 <- "HP:0002119" hp4 <- "HP:0001305" hpDef[which(hpDef$id %in% c(hp1, hp2)), 1:2] calcHpSim(hp1, hp2, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp3)), 1:2] calcHpSim(hp2, hp3, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp4)), 1:2] calcHpSim(hp2, hp4, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp3, hp4)), 1:2] calcHpSim(hp3, hp4, IC=ic, ancestors=hp_ancestors)
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ## Compute similarity between different couples of HP terms data(hp_ancestors, hpDef, package="PCAN") hp1 <- "HP:0000518" hp2 <- "HP:0030084" hp3 <- "HP:0002119" hp4 <- "HP:0001305" hpDef[which(hpDef$id %in% c(hp1, hp2)), 1:2] calcHpSim(hp1, hp2, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp3)), 1:2] calcHpSim(hp2, hp3, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp4)), 1:2] calcHpSim(hp2, hp4, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp3, hp4)), 1:2] calcHpSim(hp3, hp4, IC=ic, ancestors=hp_ancestors)
Each HP term can be of one or several classes. Classes are HP terms direct descendants of the 'Phenotypic abnormality' term.
A named list of 10962 character vectors.
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
http://compbio.charite.de/hudson/job/hpo/1529/artifact/hp/hp.obo
data(hpDef, hp_class, package="PCAN") hp <- "HP:0100089" hpDef[which(hpDef$id==hp),] # This term has 2 classes: hpDef[which(hpDef$id %in% hp_class[[hp]]),]
data(hpDef, hp_class, package="PCAN") hp <- "HP:0100089" hpDef[which(hpDef$id==hp),] # This term has 2 classes: hpDef[which(hpDef$id %in% hp_class[[hp]]),]
HP terms which are descendants of each HP term (including itself) in the Human Phenotype Ontology (http://www.human-phenotype-ontology.org/). Only descendants of 'Phenotypic abnormality' were taken into account.
A named list of 10962 character vectors.
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
http://compbio.charite.de/hudson/job/hpo/1529/artifact/hp/hp.obo
########################################### ## Compute information content of each HP according to associated genes data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) hist( ic, breaks=100, col="grey", main="Distribution of Information Content", xlab="IC base on genes associated to HP" )
########################################### ## Compute information content of each HP according to associated genes data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) hist( ic, breaks=100, col="grey", main="Distribution of Information Content", xlab="IC base on genes associated to HP" )
Each trait is associated to one or several HP terms.
A data frame with 55311 rows and 3 columns:
HP terms.
Trait database: always "OMIM" here.
Trait ID: OMI IDs here
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
http://compbio.charite.de/hudson/job/hpo.annotations/1039/artifact/misc/phenotype_annotation.tab
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
HP terms basic information. Only descendants of 'Phenotypic abnormality' were taken into account.
A data frame with 10962 rows and 2 columns:
HP term IDs
HP term names
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
http://compbio.charite.de/hudson/job/hpo/1529/artifact/hp/hp.obo
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ## Compute similarity between different couples of HP terms data(hp_ancestors, hpDef, package="PCAN") hp1 <- "HP:0000518" hp2 <- "HP:0030084" hp3 <- "HP:0002119" hp4 <- "HP:0001305" hpDef[which(hpDef$id %in% c(hp1, hp2)), 1:2] calcHpSim(hp1, hp2, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp3)), 1:2] calcHpSim(hp2, hp3, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp4)), 1:2] calcHpSim(hp2, hp4, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp3, hp4)), 1:2] calcHpSim(hp3, hp4, IC=ic, ancestors=hp_ancestors)
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ## Compute similarity between different couples of HP terms data(hp_ancestors, hpDef, package="PCAN") hp1 <- "HP:0000518" hp2 <- "HP:0030084" hp3 <- "HP:0002119" hp4 <- "HP:0001305" hpDef[which(hpDef$id %in% c(hp1, hp2)), 1:2] calcHpSim(hp1, hp2, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp3)), 1:2] calcHpSim(hp2, hp3, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp2, hp4)), 1:2] calcHpSim(hp2, hp4, IC=ic, ancestors=hp_ancestors) hpDef[which(hpDef$id %in% c(hp3, hp4)), 1:2] calcHpSim(hp3, hp4, IC=ic, ancestors=hp_ancestors)
This function draw a heatmap corresponding to the result of the pathway consensus method. For each gene of the pathway under focus and each HP of interest it shows the best score.
hpGeneHeatmap(hpGeneListRes, genesOfInterest = NULL, geneLabels = NULL, hpLabels = NULL, clustByGene = TRUE, clustByHp = TRUE, palFun = colorRampPalette(c("white", "red")), goiCol = "blue", ...)
hpGeneHeatmap(hpGeneListRes, genesOfInterest = NULL, geneLabels = NULL, hpLabels = NULL, clustByGene = TRUE, clustByHp = TRUE, palFun = colorRampPalette(c("white", "red")), goiCol = "blue", ...)
hpGeneListRes |
the result of the |
genesOfInterest |
a list of gene to highlight. |
geneLabels |
a named vector of gene labels (all the genes id found in hpGeneListRes should be in names(geneLabels)). |
hpLabels |
a named vector of HP labels (all the HP id found in hpGeneListRes should be in names(hpLabels)). |
clustByGene |
should the heatmap be clustered according to genes (default: TRUE). |
clustByHp |
should the heatmap be clustered according to HP (default: TRUE). |
palFun |
the palette function for the heatmap. |
goiCol |
the color used to highlight genes of interest. |
... |
parameters for the codeheatmap function |
A list of 2 matrix (invisible return):
For each gene and each HP of interest the best match value.
The gene associated HP best matching the HP of interest.
hpGeneListComp
, hpSetCompBestMatch
data(geneByHp, hp_descendants, package="PCAN") data(hp_ancestors, hpDef, package="PCAN") data(traitDef, geneDef, package="PCAN") data(hpByTrait, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ########################################### ## Compute information content of each HP according to associated genes ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## HP Comparison hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] ########################################### ## The pathway consensus approach ## What about genes belonging to the same pathways as the candidate data(rPath, hsEntrezByRPath, package="PCAN") candPath <- names(hsEntrezByRPath)[which(unlist(lapply( hsEntrezByRPath, function(x) entrez %in% x )))] rPath[which(rPath$Pathway %in% candPath),] rPathRes <- hpGeneListComp( geneList=hsEntrezByRPath[[candPath]], ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,5), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( rPathRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes belonging to the ", candPath," pathway:\n'", rPath[which(rPath$Pathway %in% candPath),"Pathway_name"], "'\nand with a symmetric semantic similarity score (", sum(!is.na(rPathRes$scores)), "/", length(rPathRes$scores), ")\n", "p-value: ", signif(rPathRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity for each gene in the pathway pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))] names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(pathSss), las=2, ylab=expression(Sim[sym]), main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each gene in the pathway geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))] names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))] hpLabels <- hpDef$name names(hpLabels) <- hpDef$id hpGeneHeatmap( rPathRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) ########################################### ## What about genes interacting with the candidate (including itself) data(hqStrNw, package="PCAN") neighbors <- unique(c( hqStrNw$gene1[which(hqStrNw$gene2==entrez)], hqStrNw$gene2[which(hqStrNw$gene1==entrez)], entrez )) neighRes <- hpGeneListComp( geneList=neighbors, ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,10), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( neighRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")", "\nand with a symmetric semantic similarity score (", sum(!is.na(neighRes$scores)), "/", length(neighRes$scores), ")\n", "p-value: ", signif(neighRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity score for each interacting gene neighSss <- neighRes$scores[which(!is.na(neighRes$scores))] names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(neighSss), las=2, ylab=expression(Sim[sym]), main=paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")" ) ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each neighbor gene hpGeneHeatmap( neighRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] )
data(geneByHp, hp_descendants, package="PCAN") data(hp_ancestors, hpDef, package="PCAN") data(traitDef, geneDef, package="PCAN") data(hpByTrait, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ########################################### ## Compute information content of each HP according to associated genes ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## HP Comparison hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] ########################################### ## The pathway consensus approach ## What about genes belonging to the same pathways as the candidate data(rPath, hsEntrezByRPath, package="PCAN") candPath <- names(hsEntrezByRPath)[which(unlist(lapply( hsEntrezByRPath, function(x) entrez %in% x )))] rPath[which(rPath$Pathway %in% candPath),] rPathRes <- hpGeneListComp( geneList=hsEntrezByRPath[[candPath]], ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,5), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( rPathRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes belonging to the ", candPath," pathway:\n'", rPath[which(rPath$Pathway %in% candPath),"Pathway_name"], "'\nand with a symmetric semantic similarity score (", sum(!is.na(rPathRes$scores)), "/", length(rPathRes$scores), ")\n", "p-value: ", signif(rPathRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity for each gene in the pathway pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))] names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(pathSss), las=2, ylab=expression(Sim[sym]), main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each gene in the pathway geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))] names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))] hpLabels <- hpDef$name names(hpLabels) <- hpDef$id hpGeneHeatmap( rPathRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) ########################################### ## What about genes interacting with the candidate (including itself) data(hqStrNw, package="PCAN") neighbors <- unique(c( hqStrNw$gene1[which(hqStrNw$gene2==entrez)], hqStrNw$gene2[which(hqStrNw$gene1==entrez)], entrez )) neighRes <- hpGeneListComp( geneList=neighbors, ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,10), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( neighRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")", "\nand with a symmetric semantic similarity score (", sum(!is.na(neighRes$scores)), "/", length(neighRes$scores), ")\n", "p-value: ", signif(neighRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity score for each interacting gene neighSss <- neighRes$scores[which(!is.na(neighRes$scores))] names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(neighSss), las=2, ylab=expression(Sim[sym]), main=paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")" ) ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each neighbor gene hpGeneHeatmap( neighRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] )
This function compare a whole gene list to a set of HP terms using a matrix of semantic similarity.
hpGeneListComp(geneList, ssMatByGene, geneSSScore = NULL, ...)
hpGeneListComp(geneList, ssMatByGene, geneSSScore = NULL, ...)
geneList |
a vector providing the genes of interest. |
ssMatByGene |
a list (one element per gene) of matrix
of semantic similarity between HP terms as returned by
|
geneSSScore |
a vector of semantic similarity scores for all the genes in ssMatByGene list. If not provided these scores are computed from ssMatByGene. |
... |
parameters for |
A list with the following elements:
The original HP of interest.
The distribution of scores for all genes for the HP of interest.
The semantic similarity by gene.
For each gene which related HP terms best fits with the HP of interest (colnames of the elements of ssMatByGene).
The median of scores.
According to a wilcox.test
comparing genes
of interest to all the other genes.
Gene with the highest score among the genes of interest.
Maximum score.
Quantile of the scores compared to the whole list of gene.
Adjusted quantiles according Benjamini Hochberg
(link{p.adjust}
).
Patrice Godard
hpGeneHeatmap
, compareHPSets
,
hpSetCompSummary
and hpSetCompBestMatch
data(geneByHp, hp_descendants, package="PCAN") data(hp_ancestors, hpDef, package="PCAN") data(traitDef, geneDef, package="PCAN") data(hpByTrait, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ########################################### ## Compute information content of each HP according to associated genes ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## HP Comparison hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] ########################################### ## The pathway consensus approach ## What about genes belonging to the same pathways as the candidate data(rPath, hsEntrezByRPath, package="PCAN") candPath <- names(hsEntrezByRPath)[which(unlist(lapply( hsEntrezByRPath, function(x) entrez %in% x )))] rPath[which(rPath$Pathway %in% candPath),] rPathRes <- hpGeneListComp( geneList=hsEntrezByRPath[[candPath]], ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,5), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( rPathRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes belonging to the ", candPath," pathway:\n'", rPath[which(rPath$Pathway %in% candPath),"Pathway_name"], "'\nand with a symmetric semantic similarity score (", sum(!is.na(rPathRes$scores)), "/", length(rPathRes$scores), ")\n", "p-value: ", signif(rPathRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity for each gene in the pathway pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))] names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(pathSss), las=2, ylab=expression(Sim[sym]), main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each gene in the pathway geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))] names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))] hpLabels <- hpDef$name names(hpLabels) <- hpDef$id hpGeneHeatmap( rPathRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) ########################################### ## What about genes interacting with the candidate (including itself) data(hqStrNw, package="PCAN") neighbors <- unique(c( hqStrNw$gene1[which(hqStrNw$gene2==entrez)], hqStrNw$gene2[which(hqStrNw$gene1==entrez)], entrez )) neighRes <- hpGeneListComp( geneList=neighbors, ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,10), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( neighRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")", "\nand with a symmetric semantic similarity score (", sum(!is.na(neighRes$scores)), "/", length(neighRes$scores), ")\n", "p-value: ", signif(neighRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity score for each interacting gene neighSss <- neighRes$scores[which(!is.na(neighRes$scores))] names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(neighSss), las=2, ylab=expression(Sim[sym]), main=paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")" ) ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each neighbor gene hpGeneHeatmap( neighRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] )
data(geneByHp, hp_descendants, package="PCAN") data(hp_ancestors, hpDef, package="PCAN") data(traitDef, geneDef, package="PCAN") data(hpByTrait, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ########################################### ## Compute information content of each HP according to associated genes ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## HP Comparison hpGeneResnik <- compareHPSets( hpSet1=names(ic), hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) hpMatByGene <- lapply( hpByGene, function(x){ hpGeneResnik[x, , drop=FALSE] } ) resnSss <- unlist(lapply( hpMatByGene, hpSetCompSummary, method="bma", direction="symSim" )) candScore <- resnSss[entrez] ########################################### ## The pathway consensus approach ## What about genes belonging to the same pathways as the candidate data(rPath, hsEntrezByRPath, package="PCAN") candPath <- names(hsEntrezByRPath)[which(unlist(lapply( hsEntrezByRPath, function(x) entrez %in% x )))] rPath[which(rPath$Pathway %in% candPath),] rPathRes <- hpGeneListComp( geneList=hsEntrezByRPath[[candPath]], ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,5), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( rPathRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes belonging to the ", candPath," pathway:\n'", rPath[which(rPath$Pathway %in% candPath),"Pathway_name"], "'\nand with a symmetric semantic similarity score (", sum(!is.na(rPathRes$scores)), "/", length(rPathRes$scores), ")\n", "p-value: ", signif(rPathRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity for each gene in the pathway pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))] names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(pathSss), las=2, ylab=expression(Sim[sym]), main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each gene in the pathway geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))] names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))] hpLabels <- hpDef$name names(hpLabels) <- hpDef$id hpGeneHeatmap( rPathRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] ) ########################################### ## What about genes interacting with the candidate (including itself) data(hqStrNw, package="PCAN") neighbors <- unique(c( hqStrNw$gene1[which(hqStrNw$gene2==entrez)], hqStrNw$gene2[which(hqStrNw$gene1==entrez)], entrez )) neighRes <- hpGeneListComp( geneList=neighbors, ssMatByGene = hpMatByGene, geneSSScore = resnSss ) hist( resnSss, breaks=100, col="grey", ylim=c(0,10), xlab=expression(Sim[sym]), ylab="Density", main=paste( "Distribution of symmetric semantic similarity scores for all the", length(resnSss), "genes" ), probability=TRUE ) toAdd <- hist( neighRes$scores, breaks=100, plot=FALSE ) for(i in 1:length(toAdd$density)){ polygon( x=toAdd$breaks[c(i, i+1, i+1, i)], y=c(0, 0, rep(toAdd$density[i], 2)), col="#BE000040", border="#800000FF" ) } legend( "topright", paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")", "\nand with a symmetric semantic similarity score (", sum(!is.na(neighRes$scores)), "/", length(neighRes$scores), ")\n", "p-value: ", signif(neighRes$p.value, 2) ), pch=15, col="#BE000040", bty="n", cex=0.6 ) ## Assessing the symmetric semantic similarity score for each interacting gene neighSss <- neighRes$scores[which(!is.na(neighRes$scores))] names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"] opar <- par(mar=c(7.1, 4.1, 4.1, 2.1)) barplot( sort(neighSss), las=2, ylab=expression(Sim[sym]), main=paste0( "Genes interacting with ", geneDef[which(geneDef$entrez==entrez),"symbol"], " (", entrez, ")" ) ) p <- c(0.25, 0.5, 0.75, 0.95) abline( h=quantile(resnSss, probs=p), col="#BE0000", lty=c(2, 1, 2, 2), lwd=c(2, 2, 2, 1) ) text( rep(0,4), quantile(resnSss, probs=p), p, pos=3, offset=0, col="#BE0000", cex=0.6 ) legend( "topleft", paste0( "Quantiles of the distribution of symmetric semantic similarity\n", "scores for all the genes." ), lty=1, col="#BE0000", cex=0.6 ) par(opar) ## A heatmap showing the best HP match for each neighbor gene hpGeneHeatmap( neighRes, genesOfInterest=entrez, geneLabels=geneLabels, hpLabels=hpLabels, clustByGene=TRUE, clustByHp=TRUE, palFun=colorRampPalette(c("white", "red")), goiCol="blue", main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"] )
This function returns the best matches from a semantic similarity matrix.
hpSetCompBestMatch(hpSetComp, direction = c("b", "r", "c"))
hpSetCompBestMatch(hpSetComp, direction = c("b", "r", "c"))
hpSetComp |
a matrix of semantic similarities between couples of HP terms |
direction |
taken into account. "r": best match per row. "c": best match per column. "b" (symetric): best match for the whole matrix |
A data frame with the compared term, the best match and the value of the match.
Patrice Godard
compareHPSets
and hpSetCompSummary
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
This function summarize the comparison of 2 sets of HP terms
hpSetCompSummary(hpSetComp, method = c("bma", "bm", "average"), direction = c("symSim", "r", "c"))
hpSetCompSummary(hpSetComp, method = c("bma", "bm", "average"), direction = c("symSim", "r", "c"))
hpSetComp |
a matrix of semantic similarities between couples of HP terms |
method |
"bma" (Best Match Average): the average of the best matches on rows or columns (see direction param). "bm": the maximum value. "average": the average of the whole matrix. |
direction |
taken into account only if method="bma". "r": best match per row. "c": best match per column. "symSim" (symmetric semantic similarity): average of calls with "r" and "c" |
A numeric value corresponding to the semantic similarity of the 2 HP sets
Patrice Godard
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
A network of human entrez gene IDs taken from the STRING database.
A data frame of 643683 and 3 columns:
Entrez gene IDs.
Entrez gene IDs.
TRUE if the directionality of the interaction between the 2 genes is known. In this case gene1 is upstream gene 2.
Different ressources were used in June 2 2015:
http://string-db.org/newstring_download/protein.actions.v10/9606.protein.actions.v10.txt.gz was used to get the network of Ensembl protein IDs. Only interaction with a score greater or equal to 500 were kept.
BioMart from http://jan2013.archive.ensembl.org/index.html was used to map Ensembl protein IDs to Ensembl gene IDs. Ensembl gene IDs were mapped to Entrez gene IDs using this ressource in addition to ftp://ftp.ncbi.nih.gov/gene/DATA/gene2ensembl.gz
## Not run: example(hpGeneListComp)
## Not run: example(hpGeneListComp)
The human genes coding for proteins involved in the different Reactome pathways.
A named list of 1345 character vectors.
Two ressources were used in June 2 2015:
http://www.reactome.org/download/current/UniProt2Reactome.txt was used to get list of Uniprot ID associated to each pathway.
ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz was used to map Uniprot ID to Entrez gene IDs
## Not run: example(hpGeneListComp)
## Not run: example(hpGeneListComp)
Pathways taken from the Reactome database.
A data frame with 1345 rows and 2 columns:
Reactome ID.
The name of the pathway.
http://www.reactome.org/download/current/UniProt2Reactome.txt
## Not run: example(hpGeneListComp)
## Not run: example(hpGeneListComp)
Basic information about traits. Only OMIM diseases associated to at least one gene are taken into account.
A data frame with 3675 rows and 3 columns:
Always "OMIM" here.
The trait ID (OMIM IDs here).
The name of the trait.
These data are used to examplify the different functions of the package. More data are available in the MultiHumanPhenoDB package.
ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-05.xml.gz
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
## Prerequisite data(geneByHp, hp_descendants, package="PCAN") geneByHp <- unstack(geneByHp, entrez~hp) ic <- computeHpIC(geneByHp, hp_descendants) ########################################### ## Use case: comparing a gene and a disease data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN") omim <- "612285" traitDef[which(traitDef$id==omim),] entrez <- "57545" geneDef[which(geneDef$entrez==entrez),] ## Get HP terms associated to the disease data(hpByTrait, package="PCAN") hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)] ## Get HP terms associated to the gene hpByGene <- unstack(stack(geneByHp), ind~values) geneHps <- hpByGene[[entrez]] ## Comparison of the two sets of HP terms compMat <- compareHPSets( hpSet1=geneHps, hpSet2=hpOfInterest, IC=ic, ancestors=hp_ancestors, method="Resnik", BPPARAM=SerialParam() ) ## Get the symmetric semantic similarity score hpSetCompSummary(compMat, method="bma", direction="symSim") bm <- hpSetCompBestMatch(compMat, "b") hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]