This document explains the functionalities available in the a4Classif package.
This package contains for classification of Affymetrix microarray
data, stored in an ExpressionSet
. This package integrates
within the Automated Affymetrix Array Analysis suite of packages.
## Loading required package: a4Core
## Loading required package: a4Preproc
##
## a4Classif version 1.53.0
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, setdiff, table, tapply,
## union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
To demonstrate the functionalities of the package, the
ALL
dataset is used. The genes are annotated thanks to the
addGeneInfo
utility function of the a4Preproc
package.
## Loading required package: hgu95av2.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## I, expand.grid, unname
## Loading required package: org.Hs.eg.db
##
##
resultLasso <- lassoClass(object = ALL, groups = "BTtype")
plot(resultLasso,
label = TRUE,
main = "Lasso coefficients in relation to degree of penalization."
)
## The lasso selected 16 genes. The top 15 genes are:
##
## Gene Coefficient
## 38319_at CD3D 0.95966733
## 35016_at CD74 -0.60928095
## 38147_at SH2D1A 0.49240967
## 35792_at MGLL 0.46856925
## 37563_at SRGAP3 0.26648240
## 38917_at YME1L1 0.25100075
## 40278_at GGA2 -0.25017550
## 41164_at IGHM -0.12387272
## 41409_at THEMIS2 -0.10581122
## 38242_at BLNK -0.10309606
## 35523_at HPGDS 0.10169706
## 38949_at PRKCQ 0.07832802
## 33316_at TOX 0.06963509
## 33839_at ITPR2 0.05801832
## 40570_at FOXO1 -0.04858863
resultPam <- pamClass(object = ALL, groups = "BTtype")
plot(resultPam,
main = "Pam misclassification error versus number of genes."
)
## Pam selected 1 genes. The top 15 genes are:
##
## GeneSymbol B.score T.score av.rank.in.CV prop.selected.in.CV
## 38319_at CD3D -0.1693 0.4875 1 1
## predicted
## true B T
## B 95 0
## T 1 32
# select only a subset of the data for computation time reason
ALLSubset <- ALL[sample.int(n = nrow(ALL), size = 100, replace = FALSE), ]
resultRf <- rfClass(object = ALLSubset, groups = "BTtype")
plot(resultRf)
## Random forest selected 17 genes. The top 15 genes are:
##
## GeneSymbol
## 31682_s_at VCAN
## 31731_at CBX4
## 32046_at PRKCD
## 32332_at IDH2
## 32833_at CLK1
## 33770_at CHUK
## 34671_at POLR3C
## 34707_at CHD3
## 35209_at <NA>
## 36143_at CASP3
## 36366_at B4GALT6
## 37988_at CD79B
## 38650_at IGFBP5
## 40271_at NUP93
## 40505_at UBE2L6
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=C LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] hgu95av2.db_3.13.0 org.Hs.eg.db_3.19.1 AnnotationDbi_1.67.0 IRanges_2.39.2 S4Vectors_0.43.2 ALL_1.47.0 Biobase_2.65.1 BiocGenerics_0.51.0 a4Classif_1.53.0 a4Preproc_1.53.0 a4Core_1.53.0 rmarkdown_2.28
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.9 varSelRF_0.7-8 shape_1.4.6.1 RSQLite_2.3.7 lattice_0.22-6 digest_0.6.37 evaluate_0.24.0 grid_4.4.1 iterators_1.0.14 fastmap_1.2.0 blob_1.2.4 foreach_1.5.2 jsonlite_1.8.8 glmnet_4.1-8 Matrix_1.7-0 GenomeInfoDb_1.41.1 DBI_1.2.3 survival_3.7-0 httr_1.4.7 UCSC.utils_1.1.0
## [21] Biostrings_2.73.1 codetools_0.2-20 jquerylib_0.1.4 cli_3.6.3 crayon_1.5.3 rlang_1.1.4 XVector_0.45.0 pamr_1.57 bit64_4.0.5 splines_4.4.1 cachem_1.1.0 yaml_2.3.10 tools_4.4.1 parallel_4.4.1 memoise_2.0.1 GenomeInfoDbData_1.2.12 ROCR_1.0-11 png_0.1-8 buildtools_1.0.0 vctrs_0.6.5
## [41] R6_2.5.1 lifecycle_1.0.4 zlibbioc_1.51.1 KEGGREST_1.45.1 randomForest_4.7-1.1 bit_4.0.5 cluster_2.1.6 pkgconfig_2.0.3 bslib_0.8.0 Rcpp_1.0.13 highr_0.11 xfun_0.47 sys_3.4.2 knitr_1.48 htmltools_0.5.8.1 maketools_1.3.0 compiler_4.4.1