Complete processing of raw input phylogenetic profiles
Description
Create a processed and filtered data for plotting or analysing
phylogenetic profiles from raw input file (from raw input to final filtered
dataframe)
Usage
fromInputToProfile(rawInput, rankName, refTaxon = NULL,
taxaTree = NULL, sortedTaxonList = NULL, var1AggregateBy = "max",
var2AggregateBy = "max", percentCutoff = c(0, 1),
coorthologCutoffMax = 9999, var1Cutoff = c(0, 1), var2Cutoff = c(0, 1),
var1Relation = "protein", var2Relation = "protein", groupByCat = FALSE,
catDt = NULL, taxDB = NULL)
fromInputToProfile(rawInput, rankName, refTaxon = NULL,
taxaTree = NULL, sortedTaxonList = NULL, var1AggregateBy = "max",
var2AggregateBy = "max", percentCutoff = c(0, 1),
coorthologCutoffMax = 9999, var1Cutoff = c(0, 1), var2Cutoff = c(0, 1),
var1Relation = "protein", var2Relation = "protein", groupByCat = FALSE,
catDt = NULL, taxDB = NULL)
Arguments
rawInput |
input file (in long, wide, multi-fasta or orthoxml format)
|
rankName |
taxonomy rank (e.g. "species","phylum",...)
|
refTaxon |
selected reference taxon name (used for sorting and will be
protected from filtering). Default = NULL.
|
taxaTree |
input taxonomy tree for taxa in input profiles (optional).
Default = NULL.
|
sortedTaxonList |
list of sorted taxa (optional). Default = NULL.
|
var1AggregateBy |
aggregate method for var1 (min, max, mean or median).
Default = "max".
|
var2AggregateBy |
aggregate method for VAR2 (min, max, mean or median).
Default = "max".
|
percentCutoff |
min and max cutoffs for percentage of species present
in a supertaxon. Default = c(0, 1).
|
coorthologCutoffMax |
maximum number of co-orthologs allowed. Default =
9999.
|
var1Cutoff |
min and max cutoffs for var1. Default = c(0, 1).
|
var2Cutoff |
min and max cutoffs for var2. Default = c(0, 1).
|
var1Relation |
relation of var1 ("protein" for protein-protein or
"species" for protein-species). Default = "protein".
|
var2Relation |
relation of var2 ("protein" for protein-protein or
"species" for protein-species). Default = "protein".
|
groupByCat |
group genes by their categories (TRUE or FALSE). Default =
FALSE.
|
catDt |
dataframe contains gene categories. Default = NULL
|
taxDB |
Path to the taxonomy DB files
|
Value
Dataframe required for generating phylogenetic profile plot or
clustering analysis. It contains seed gene IDs (or orthologous group IDs),
their ortholog IDs and the corresponding (super)taxa, (super)taxon IDs,
number of co-orthologs in each (super)taxon, values for two additional
variables var1, var2,
categories of seed genes (or ortholog groups).
Author(s)
Vinh Tran [email protected]
See Also
createLongMatrix
, getInputTaxaID
,
getInputTaxaName
, sortInputTaxa
,
parseInfoProfile
, reduceProfile
,
filterProfileData
Examples
rawInput <- system.file(
"extdata", "test.main.long", package = "PhyloProfile", mustWork = TRUE
)
rankName <- "class"
refTaxon <- "Mammalia"
taxaTree <- NULL
sortedTaxonList <- NULL
var1AggregateBy <- "max"
var2AggregateBy <- "mean"
percentCutoff <- c(0.0, 1.0)
coorthologCutoffMax <- 10
var1Cutoff <- c(0.75, 1.0)
var2Cutoff <- c(0.5, 1.0)
var1Relation <- "protein"
var2Relation <- "species"
groupByCat <- FALSE
catDt <- NULL
fromInputToProfile(
rawInput,
rankName,
refTaxon,
taxaTree,
sortedTaxonList,
var1AggregateBy,
var2AggregateBy,
percentCutoff,
coorthologCutoffMax,
var1Cutoff,
var2Cutoff,
var1Relation,
var2Relation,
groupByCat,
catDt
)
rawInput <- system.file(
"extdata", "test.main.long", package = "PhyloProfile", mustWork = TRUE
)
rankName <- "class"
refTaxon <- "Mammalia"
taxaTree <- NULL
sortedTaxonList <- NULL
var1AggregateBy <- "max"
var2AggregateBy <- "mean"
percentCutoff <- c(0.0, 1.0)
coorthologCutoffMax <- 10
var1Cutoff <- c(0.75, 1.0)
var2Cutoff <- c(0.5, 1.0)
var1Relation <- "protein"
var2Relation <- "species"
groupByCat <- FALSE
catDt <- NULL
fromInputToProfile(
rawInput,
rankName,
refTaxon,
taxaTree,
sortedTaxonList,
var1AggregateBy,
var2AggregateBy,
percentCutoff,
coorthologCutoffMax,
var1Cutoff,
var2Cutoff,
var1Relation,
var2Relation,
groupByCat,
catDt
)