Title: | Run workflows implemented in Terra/AnVIL workspace |
---|---|
Description: | The AnVIL is a cloud computing resource developed in part by the National Human Genome Research Institute. The main cloud-based genomics platform deported by the AnVIL project is Terra. The AnVILWorkflow package allows remote access to Terra implemented workflows, enabling end-user to utilize Terra/ AnVIL provided resources - such as data, workflows, and flexible/scalble computing resources - through the conventional R functions. |
Authors: | Sehyun Oh [aut, cre] , Marcel Ramos [ctb] , Kai Gravel-Pucillo [aut] |
Maintainer: | Sehyun Oh <[email protected]> |
License: | Artistic-2.0 |
Version: | 1.7.1 |
Built: | 2024-12-29 03:19:36 UTC |
Source: | https://github.com/bioc/AnVILWorkflow |
.avSampleData
functionApply to the returned value from .avSampleData
function
.getAgeMinMax(sampleData)
.getAgeMinMax(sampleData)
sampleData |
A data frame. |
Search keywords in a given metadata table
.search_keyword(keyword, metadata)
.search_keyword(keyword, metadata)
keyword |
A character(1). Regular expression is accepted. For example,
you can search multiple keywords separated by the vertical bar (" |
metadata |
A data frame. Metadata table of workspace, workflow, or AnVIL data. |
A data frame. A subset of input metadata table with the rows containing the keyword.
sep
and columns are separated by delim
Summary table into a single string where the column name and the value
is separated by sep
and columns are separated by delim
.tableToString(tb, sep = ":", delim = ";")
.tableToString(tb, sep = ":", delim = ";")
tb |
A table. Output from the |
sep |
A delimiter to separate the column name and value |
delim |
A delimiter to separate columns |
Search AnVIL workspaces using keywords
AnVILBrowse( keyword, searchFrom = "all", returnFrom = NULL, metaTables = "default", minAge = 0, maxAge = 130, minCount = 0, workspaceTable = NULL, workflowTable = NULL, dataTable = NULL )
AnVILBrowse( keyword, searchFrom = "all", returnFrom = NULL, metaTables = "default", minAge = 0, maxAge = 130, minCount = 0, workspaceTable = NULL, workflowTable = NULL, dataTable = NULL )
keyword |
A character(1). Regular expression is accepted. For example,
you can search multiple keywords separated by the vertical bar (" |
searchFrom |
Under the default ( |
returnFrom |
Under the default ( |
metaTables |
Under the default ( |
minAge |
A numeric (1). Any data with a maximum participant age lower
than this parameter will be excluded from the output. Under the default
( |
maxAge |
A numeric (1). Any data with a minimum participant age higher
than this parameter will be excluded from the output. Under the default
( |
minCount |
A numeric (1). Any data with the number of subjects fewer
than this parameter will be excluded from the output. Under the default
( |
workspaceTable |
A data frame. This argument is counted only when
|
workflowTable |
A data frame. This argument is counted only when
|
dataTable |
A data frame. This argument is counted only when
|
A data frame of AnVIL resources containing keywords. Depending on
the returnFrom
argument, it can be workspaces, workflows, or data.
Under the default returnFrom = NULL
, it returns the same data type
as specified in searchFrom
or workspace for searchFrom = "all"
.
AnVILBrowse("malaria") AnVILBrowse("resistance") AnVILBrowse("resistance", searchFrom = "workflow")
AnVILBrowse("malaria") AnVILBrowse("resistance") AnVILBrowse("resistance", searchFrom = "workflow")
This function shows the available analyses and the brief descriptions of them.
availableAnalysis(curatedOnly = TRUE, keyword = NULL)
availableAnalysis(curatedOnly = TRUE, keyword = NULL)
curatedOnly |
Default is |
keyword |
Default is |
A data frame. The analysis
columns shows the name of the
available analyses, which is the required input (analysis
argument)
for the functions implemented in AnVILWorkflow package.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { availableAnalysis() }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { availableAnalysis() }
This function makes your own copy of the existing workspace, selected
through templateName
or analysis
. Your copied/cloned
workspace name will be workspaceName
and any computing cost will
be charaged to the billing linked to your billingProjectName
.
You should provide at least one argument templateName
or
analysis
.
cloneWorkspace( workspaceName, templateName = "", analysis = NULL, bucketLocation = "us-central1", accountEmail = gcloud_account(), billingProjectName = gcloud_project() )
cloneWorkspace( workspaceName, templateName = "", analysis = NULL, bucketLocation = "us-central1", accountEmail = gcloud_account(), billingProjectName = gcloud_project() )
workspaceName |
Name of the workspace you are creating |
templateName |
Character(1). Name of the template workspace name you
want to clone. You can provide |
analysis |
Character(1). Name of the analysis you want to clone
it's workspace. The list of available analyses can be found using
|
bucketLocation |
Character(1). Region in which bucket attached to the
workspace should be created. Default is |
accountEmail |
Character(1). Email linked to Terra account |
billingProjectName |
Character(1). Name of the billing project |
Name of the cloned workspace
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { cloneWorkspace(workspaceName = "salmon", templateName = "Bioconductor-Workflow-DESeq2") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { cloneWorkspace(workspaceName = "salmon", templateName = "Bioconductor-Workflow-DESeq2") }
Check the current input arguments
currentInput(workspaceName, config, requiredInputOnly = TRUE, analysis = NULL)
currentInput(workspaceName, config, requiredInputOnly = TRUE, analysis = NULL)
workspaceName |
Name of the workspace |
config |
Workflow configuration. Output from the
|
requiredInputOnly |
Under the default ( |
analysis |
If specified, only the minimally required inputs for a given workflow will be returned. |
A data.frame for the inputs defined in a workflow configuration.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { workspaceName <- "Bioconductor-Workflow-DESeq2" config <- getWorkflowConfig(workspaceName) currentInput(workspaceName = workspaceName, config = config) }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { workspaceName <- "Bioconductor-Workflow-DESeq2" config <- getWorkflowConfig(workspaceName) currentInput(workspaceName = workspaceName, config = config) }
Find the root entity name
findInputName(workspaceName, rootEntity = "", nameOnly = TRUE)
findInputName(workspaceName, rootEntity = "", nameOnly = TRUE)
workspaceName |
Name of the workspace |
rootEntity |
A character. Type of root entity for Terra's data model.
For example, |
nameOnly |
Under the default ( |
A character vector of input names under the given root entity.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { .findInputName( workspaceName = "Bioconductor-Workflow-DESeq2", rootEntity = "participant_set") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { .findInputName( workspaceName = "Bioconductor-Workflow-DESeq2", rootEntity = "participant_set") }
Get all the data tables
getAllDataTables(workspaces = NULL)
getAllDataTables(workspaces = NULL)
workspaces |
A character vector. Under the default ( |
A Data Frame of all the data tables
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { allDataTables <- getAllDataTables() }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { allDataTables <- getAllDataTables() }
Collect workflows from all workspaces a user has access to
getAllWorkflows(workspaces = NULL)
getAllWorkflows(workspaces = NULL)
workspaces |
Under the default ( |
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { allWorkflows <- getAllWorkflows() }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { allWorkflows <- getAllWorkflows() }
Different from avworkspaces
getAllWorkspaces()
getAllWorkspaces()
library(AnVILBase) if (gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name())) { allWorkspaces <- getAllWorkspaces() }
library(AnVILBase) if (gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name())) { allWorkspaces <- getAllWorkspaces() }
This function prints out the Dashboard contents of the target workspace.
You can provide either workspaceName
or analysis
. If both
values are provided, this function will use workspaceName
argument
over analysis
argument.
getDashboard(workspaceName = "", analysis = NULL)
getDashboard(workspaceName = "", analysis = NULL)
workspaceName |
The name of the workspace you want to get the overview provided through the Dashboard. |
analysis |
The name of the analysis use want to check the Dashboard of.
The list of available analyses can be found with |
The last modified date as a message, followed by the Dashboard contents from the target workspace.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { getDashboard(analysis = "salmon") getDashboard(workspaceName = "Bioconductor-Workflow-DESeq2") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { getDashboard(analysis = "salmon") getDashboard(workspaceName = "Bioconductor-Workflow-DESeq2") }
This function usually takes a long time to run due to the large volume of AnVIL data.
getData(allWorkspaces)
getData(allWorkspaces)
allWorkspaces |
A data frame of all the workspaces you have access
to. An output from the |
Download output files from Terra
getOutput( workspaceName, submissionId = NULL, keyword = NULL, dest_dir = ".", dry = TRUE )
getOutput( workspaceName, submissionId = NULL, keyword = NULL, dest_dir = ".", dry = TRUE )
workspaceName |
Name of the workspace |
submissionId |
Submission Id. If it's not provided, the most recent submission id with the 'succeeded' status will be used. |
keyword |
A character string containing a regular expression to be
matched in the output file name. Under the default |
dest_dir |
Path to the directory where downloaded files are saved |
dry |
To download the output data, set |
If "dry=TRUE"
, this function will return a data frame with
two columns named 'filename' and 'name'.
filename
: Name of the actual output files.
name
: Name of the output defined in your workflow script.
This is how configuration refers the outputs.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { getOutput(workspaceName = "Bioconductor-Workflow-DESeq2") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { getOutput(workspaceName = "Bioconductor-Workflow-DESeq2") }
Check the workflow configuration
getWorkflowConfig(workspaceName, workflowName = NULL)
getWorkflowConfig(workspaceName, workflowName = NULL)
workspaceName |
Name of the workspace |
workflowName |
Name of the workflow to run. If a single workflow is
available under the specified workspace, this function will check the input
of that workflow under the default ( |
A data.frame for the inputs defined in a workflow configuration.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { config <- getWorkflowConfig(workspaceName = "Bioconductor-Workflow-DESeq2") config }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { config <- getWorkflowConfig(workspaceName = "Bioconductor-Workflow-DESeq2") config }
Creates a metadata table of workflows from all workspaces provided
getWorkflows(allWorkspaces)
getWorkflows(allWorkspaces)
allWorkspaces |
A data frame of all the workspaces you have access
to. An output from the |
Metadata of all the GCP-based workspaces are collected.
getWorkspaces()
getWorkspaces()
Check the status of submitted jobs
monitorWorkflow(workspaceName)
monitorWorkflow(workspaceName)
workspaceName |
Character(1). Name of the workspace |
A tibble summarizing submitted workflow jobs. Contains information such as submission Id, submission date, and submission status.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { monitorWorkflow(workspaceName = "Bioconductor-Workflow-DESeq2") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { monitorWorkflow(workspaceName = "Bioconductor-Workflow-DESeq2") }
Launch Terra workflow
runWorkflow( workspaceName, config, workflowName = NULL, useCallCache = TRUE, inputName = NULL )
runWorkflow( workspaceName, config, workflowName = NULL, useCallCache = TRUE, inputName = NULL )
workspaceName |
Name of the workspace that contains the workflow(s) you want to launch. |
config |
Workflow configuration. Output from the
|
workflowName |
Name of the workflow to run. If this input is not provided but there is only a single workflow available, the function will automatically use the only workflow. |
useCallCache |
A logical. Under the default condition ( |
inputName |
Name of you input entity. If the workflow is using Terra's
data model, this is required. The available entities can be found using the
|
This function will print out whether the call for workflow launching was successful or not.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { if ("salmon" %in% avworkspaces()$name) runWorkflow(workspaceName = "salmon") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { if ("salmon" %in% avworkspaces()$name) runWorkflow(workspaceName = "salmon") }
Setup Google Cloud Account and Project
setCloudEnv( accountEmail = gcloud_account(), billingProjectName = gcloud_project(), message = TRUE )
setCloudEnv( accountEmail = gcloud_account(), billingProjectName = gcloud_project(), message = TRUE )
accountEmail |
Character(1). Email linked to your Terra account. |
billingProjectName |
Character(1). Name of the billing project, which is the gcloud account. |
message |
Under the default ( |
Terra/AnVIL working environment - Google Cloud billing account and the billing project name - will be printed out.
if (gcloud_exists()) { setCloudEnv() }
if (gcloud_exists()) { setCloudEnv() }
Abort submitted job
stopWorkflow(workspaceName, submissionId = NULL, dry = TRUE)
stopWorkflow(workspaceName, submissionId = NULL, dry = TRUE)
workspaceName |
Name of the workspace |
submissionId |
A character. Submission ID you want to abort. You can
find the submission id using |
dry |
Logical(1) when |
This function will print out whether the call for workflow abortion was successful or not. In case it was unsuccesful, the diagnosis will be suggested as a part of the message.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { if ("salmon" %in% avworkspaces()$name) stopWorkflow(workspaceName = "salmon") }
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { if ("salmon" %in% avworkspaces()$name) stopWorkflow(workspaceName = "salmon") }
Update the input
updateInput( workspaceName, inputs, config, workflowName = NULL, dry = TRUE, verbose = TRUE )
updateInput( workspaceName, inputs, config, workflowName = NULL, dry = TRUE, verbose = TRUE )
workspaceName |
Name of the workspace |
inputs |
A tibble containing new input values. Provide the modified
version of the current input table, which is the output from
|
config |
Workflow configuration. Output from the
|
workflowName |
Name of the workflow to run. If a single workflow is
available under the specified workspace, this function will check the input
of that workflow under the default ( |
dry |
Logical(1). When |
verbose |
Logical(1). When |
With verbose=TRUE
, a list of updated inputs will be
printed. A successful execution of the function will update the input
configuration of the target workflow in Terra/AnVIL.
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { if ("salmon" %in% avworkspaces()$name) { config <- getWorkflowConfig(workspaceName = "salmon") inputs <- currentInput("salmon", config) ## Modify the contents of 'inputs' table for your analysis updateInput("salmon", inputs, config) }}
library(AnVILBase) if ( gcloud_exists() && identical(avplatform_namespace(), "AnVILGCP") && nzchar(avworkspace_name()) ) { if ("salmon" %in% avworkspaces()$name) { config <- getWorkflowConfig(workspaceName = "salmon") inputs <- currentInput("salmon", config) ## Modify the contents of 'inputs' table for your analysis updateInput("salmon", inputs, config) }}