--- title: "Normalization Methods" author: - name: Pol Castellano-Escuder, Ph.D. affiliation: Duke University email: polcaes@gmail.com date: "`r BiocStyle::doc_date()`" output: BiocStyle::html_document vignette: > %\VignetteIndexEntry{Normalization Methods} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} %\VignetteEncoding{UTF-8} bibliography: ["POMA.bib"] biblio-style: apalike link-citations: true --- **Compiled date**: `r Sys.Date()` **Last edited**: 2023-12-14 **License**: `r packageDescription("POMA")[["License"]]` ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, fig.align = "center", comment = ">" ) ``` # Installation Run the following code to install the Bioconductor version of package. ```{r, eval = FALSE} # install.packages("BiocManager") BiocManager::install("POMA") ``` # Load Packages ```{r, warning = FALSE, message = FALSE, comment = FALSE} library(POMA) library(ggtext) library(patchwork) ``` # Load Data and Imputation Let's create a cleaned `SummarizedExperiment` object from the sample data `st000336` to explore the normalization effects. ```{r, warning = FALSE, comment = NA} example_data <- st000336 %>% PomaImpute() # KNN imputation example_data ``` # Normalization Here we will evaluate the normalization methods that POMA offers on the same `SummarizedExperiment` object to compare them [@normalization]. ```{r, warning = FALSE} none <- PomaNorm(example_data, method = "none") auto_scaling <- PomaNorm(example_data, method = "auto_scaling") level_scaling <- PomaNorm(example_data, method = "level_scaling") log_scaling <- PomaNorm(example_data, method = "log_scaling") log_transformation <- PomaNorm(example_data, method = "log") vast_scaling <- PomaNorm(example_data, method = "vast_scaling") log_pareto <- PomaNorm(example_data, method = "log_pareto") ``` ## Normalization effect on data dimensions When we check for the dimension of the data after normalization we can see that all methods have the same effect on data dimension. `PomaNorm` **only** modifies the data dimension when the dataset contains **only-zero features** or **zero-variance features**. ```{r, warning = FALSE} dim(SummarizedExperiment::assay(none)) dim(SummarizedExperiment::assay(auto_scaling)) dim(SummarizedExperiment::assay(level_scaling)) dim(SummarizedExperiment::assay(log_scaling)) dim(SummarizedExperiment::assay(log_transformation)) dim(SummarizedExperiment::assay(vast_scaling)) dim(SummarizedExperiment::assay(log_pareto)) ``` ## Normalization effect on samples Here we can evaluate the normalization effects on samples [@normalization]. ```{r, message = FALSE, warning = FALSE} a <- PomaBoxplots(none, x = "samples") + ggplot2::ggtitle("Not Normalized") b <- PomaBoxplots(auto_scaling, x = "samples", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Auto Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) c <- PomaBoxplots(level_scaling, x = "samples", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Level Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) d <- PomaBoxplots(log_scaling, x = "samples", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Log Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) e <- PomaBoxplots(log_transformation, x = "samples", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Log Transformation") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) f <- PomaBoxplots(vast_scaling, x = "samples", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Vast Scaling") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) g <- PomaBoxplots(log_pareto, x = "samples", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Log Pareto") + ggplot2::theme(axis.text.x = ggplot2::element_blank()) a (b + c + d) / (e + f + g) ``` ## Normalization effect on features Here we can evaluate the normalization effects on features. ```{r, message = FALSE, warning = FALSE} h <- PomaDensity(none, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Not Normalized") i <- PomaDensity(auto_scaling, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Auto Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) j <- PomaDensity(level_scaling, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Level Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) k <- PomaDensity(log_scaling, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Log Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) l <- PomaDensity(log_transformation, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Log Transformation") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) m <- PomaDensity(vast_scaling, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Vast Scaling") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) n <- PomaDensity(log_pareto, x = "features", theme_params = list(legend_title = FALSE, legend_position = "none")) + ggplot2::ggtitle("Log Pareto") + ggplot2::theme(axis.title.x = ggplot2::element_blank(), axis.title.y = ggplot2::element_blank()) h (i + j + k) / (l + m + n) ``` # Session Information ```{r} sessionInfo() ``` # References