SangerAlignment
Description
An S4 class containing SangerContigs lists and contigs alignment results which corresponds to a final alignment in Sanger sequencing.
Slots
objectResults
-
This is the object that stores all information of the creation result.
inputSource
-
The input source of the raw file. It must be "ABIF" or "FASTA". The default value is "ABIF".
processMethod
-
The method to create a contig from reads. The value is "REGEX" or "CSV". The default value is "REGEX".
ABIF_Directory
-
If inputSource is "ABIF", then this value is the path of a parent directory storing all reads in ABIF format you want to analyse. If inputSource is "FASTA", then this value has to be NULL by default.
FASTA_File
-
If inputSource is "FASTA", then this value has to be the path to a valid FASTA file ; if inputSource is "ABIF", then this value has to be NULL by default.
REGEX_SuffixForward
-
The suffix of the filenames for forward reads in regular expression, i.e. reads that do not need to be reverse-complemented. For forward reads, it should be "_F.ab1".
REGEX_SuffixReverse
-
The suffix of the filenames for reverse reads in regular expression, i.e. reads that need to be reverse-complemented. For revcerse reads, it should be "_R.ab1".
CSV_NamesConversion
-
The file path to the CSV file that provides read names, directions, and their contig groups. If processMethod is "CSV", then this value has to be the path to a valid CSV file; if processMethod is "REGEX", then this value has to be NULL by default.
geneticCode
-
Named character vector in the same format as GENETIC_CODE (the default), which represents the standard genetic code. This is the code with which the function will attempt to translate your DNA sequences. You can get an appropriate vector with the getGeneticCode() function. The default is the standard code.
refAminoAcidSeq
-
An amino acid reference sequence supplied as a string or an AAString object. If your sequences are protein-coding DNA seuqences, and you want to have frameshifts automatically detected and corrected, supply a reference amino acid sequence via this argument. If this argument is supplied, the sequences are then kept in frame for the alignment step. Fwd sequences are assumed to come from the sense (i.e. coding, or "+") strand. The default value is "".
contigList
-
A list storing all SangerContigs S4 instances.
contigsConsensus
-
The consensus read of all SangerContig S4 instances in DNAString object.
contigsAlignment
-
The alignment of all SangerContig S4 instances with the called consensus sequence in DNAStringSet object. Users can use BrowseSeqs() to view the alignment.
contigsTree
-
A phylo instance returned by bionj function in ape package. It can be used to draw the tree.
Author(s)
Kuan-Hao Chao
Examples
## Simple example
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
parentDir <- file.path(rawDataDir, 'Allolobophora_chlorotica', 'ACHLO')
my_aligned_contigs <- new("SangerAlignment",
ABIF_Directory = parentDir,
REGEX_SuffixForward = "_[0-9]*_F.ab1$",
REGEX_SuffixReverse = "_[0-9]*_R.ab1$")
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
parentDir <- file.path(rawDataDir, 'Allolobophora_chlorotica', 'ACHLO')
CSV_NamesConversion <- file.path(rawDataDir, "ab1", "SangerAlignment", "names_conversion.csv")
sangerAlignment <- new("SangerAlignment",
processMethod = "CSV",
ABIF_Directory = parentDir,
CSV_NamesConversion = CSV_NamesConversion)
## Input From ABIF file format (Regex)
REGEX_SuffixForward <- "_[0-9]*_F.ab1$"
REGEX_SuffixReverse <- "_[0-9]*_R.ab1$"
sangerAlignment <- new("SangerAlignment",
printLevel = "SangerAlignment",
inputSource = "ABIF",
processMethod = "REGEX",
FASTA_File = NULL,
CSV_NamesConversion = NULL,
ABIF_Directory = parentDir,
REGEX_SuffixForward = REGEX_SuffixForward,
REGEX_SuffixReverse = REGEX_SuffixReverse,
TrimmingMethod = "M1",
M1TrimmingCutoff = 0.0001,
M2CutoffQualityScore = NULL,
M2SlidingWindowSize = NULL,
baseNumPerRow = 100,
heightPerRow = 200,
signalRatioCutoff = 0.33,
showTrimmed = TRUE,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
minReadsNum = 2,
minReadLength = 20,
minFractionCall = 0.5,
maxFractionLost = 0.5,
geneticCode = GENETIC_CODE,
acceptStopCodons = TRUE,
readingFrame = 1,
processorsNum = 2)
## Input From ABIF file format (Csv three column)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
parentDir <- file.path(rawDataDir, 'Allolobophora_chlorotica', 'ACHLO')
CSV_NamesConversion <- file.path(rawDataDir, "ab1", "SangerAlignment",
"names_conversion_all.csv")
sangerAlignment <- new("SangerAlignment",
inputSource = "ABIF",
processMethod = "CSV",
ABIF_Directory = parentDir,
CSV_NamesConversion = CSV_NamesConversion,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
TrimmingMethod = "M1",
M1TrimmingCutoff = 0.0001,
M2CutoffQualityScore = NULL,
M2SlidingWindowSize = NULL,
baseNumPerRow = 100,
heightPerRow = 200,
signalRatioCutoff = 0.33,
showTrimmed = TRUE,
processorsNum = 2)
## Input From FASTA file format (No Csv - Regex)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
fastaFN <- file.path(rawDataDir, "fasta",
"SangerAlignment", "Sanger_all_reads.fa")
REGEX_SuffixForwardFa <- "_[0-9]*_F$"
REGEX_SuffixReverseFa <- "_[0-9]*_R$"
sangerAlignmentFa <- new("SangerAlignment",
inputSource = "FASTA",
processMethod = "REGEX",
FASTA_File = fastaFN,
REGEX_SuffixForward = REGEX_SuffixForwardFa,
REGEX_SuffixReverse = REGEX_SuffixReverseFa,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
processorsNum = 2)
## Input From FASTA file format (Csv three column method)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
fastaFN <- file.path(rawDataDir, "fasta",
"SangerAlignment", "Sanger_all_reads.fa")
CSV_NamesConversion <- file.path(rawDataDir, "fasta",
"SangerAlignment", "names_conversion.csv")
sangerAlignmentFa <- new("SangerAlignment",
inputSource = "FASTA",
processMethod = "CSV",
FASTA_File = fastaFN,
CSV_NamesConversion = CSV_NamesConversion,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
processorsNum = 2)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
parentDir <- file.path(rawDataDir, 'Allolobophora_chlorotica', 'ACHLO')
my_aligned_contigs <- new("SangerAlignment",
ABIF_Directory = parentDir,
REGEX_SuffixForward = "_[0-9]*_F.ab1$",
REGEX_SuffixReverse = "_[0-9]*_R.ab1$")
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
parentDir <- file.path(rawDataDir, 'Allolobophora_chlorotica', 'ACHLO')
CSV_NamesConversion <- file.path(rawDataDir, "ab1", "SangerAlignment", "names_conversion.csv")
sangerAlignment <- new("SangerAlignment",
processMethod = "CSV",
ABIF_Directory = parentDir,
CSV_NamesConversion = CSV_NamesConversion)
REGEX_SuffixForward <- "_[0-9]*_F.ab1$"
REGEX_SuffixReverse <- "_[0-9]*_R.ab1$"
sangerAlignment <- new("SangerAlignment",
printLevel = "SangerAlignment",
inputSource = "ABIF",
processMethod = "REGEX",
FASTA_File = NULL,
CSV_NamesConversion = NULL,
ABIF_Directory = parentDir,
REGEX_SuffixForward = REGEX_SuffixForward,
REGEX_SuffixReverse = REGEX_SuffixReverse,
TrimmingMethod = "M1",
M1TrimmingCutoff = 0.0001,
M2CutoffQualityScore = NULL,
M2SlidingWindowSize = NULL,
baseNumPerRow = 100,
heightPerRow = 200,
signalRatioCutoff = 0.33,
showTrimmed = TRUE,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
minReadsNum = 2,
minReadLength = 20,
minFractionCall = 0.5,
maxFractionLost = 0.5,
geneticCode = GENETIC_CODE,
acceptStopCodons = TRUE,
readingFrame = 1,
processorsNum = 2)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
parentDir <- file.path(rawDataDir, 'Allolobophora_chlorotica', 'ACHLO')
CSV_NamesConversion <- file.path(rawDataDir, "ab1", "SangerAlignment",
"names_conversion_all.csv")
sangerAlignment <- new("SangerAlignment",
inputSource = "ABIF",
processMethod = "CSV",
ABIF_Directory = parentDir,
CSV_NamesConversion = CSV_NamesConversion,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
TrimmingMethod = "M1",
M1TrimmingCutoff = 0.0001,
M2CutoffQualityScore = NULL,
M2SlidingWindowSize = NULL,
baseNumPerRow = 100,
heightPerRow = 200,
signalRatioCutoff = 0.33,
showTrimmed = TRUE,
processorsNum = 2)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
fastaFN <- file.path(rawDataDir, "fasta",
"SangerAlignment", "Sanger_all_reads.fa")
REGEX_SuffixForwardFa <- "_[0-9]*_F$"
REGEX_SuffixReverseFa <- "_[0-9]*_R$"
sangerAlignmentFa <- new("SangerAlignment",
inputSource = "FASTA",
processMethod = "REGEX",
FASTA_File = fastaFN,
REGEX_SuffixForward = REGEX_SuffixForwardFa,
REGEX_SuffixReverse = REGEX_SuffixReverseFa,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
processorsNum = 2)
rawDataDir <- system.file("extdata", package = "sangeranalyseR")
fastaFN <- file.path(rawDataDir, "fasta",
"SangerAlignment", "Sanger_all_reads.fa")
CSV_NamesConversion <- file.path(rawDataDir, "fasta",
"SangerAlignment", "names_conversion.csv")
sangerAlignmentFa <- new("SangerAlignment",
inputSource = "FASTA",
processMethod = "CSV",
FASTA_File = fastaFN,
CSV_NamesConversion = CSV_NamesConversion,
refAminoAcidSeq = "SRQWLFSTNHKDIGTLYFIFGAWAGMVGTSLSILIRAELGHPGALIGDDQIYNVIVTAHAFIMIFFMVMPIMIGGFGNWLVPLMLGAPDMAFPRMNNMSFWLLPPALSLLLVSSMVENGAGTGWTVYPPLSAGIAHGGASVDLAIFSLHLAGISSILGAVNFITTVINMRSTGISLDRMPLFVWSVVITALLLLLSLPVLAGAITMLLTDRNLNTSFFDPAGGGDPILYQHLFWFFGHPEVYILILPGFGMISHIISQESGKKETFGSLGMIYAMLAIGLLGFIVWAHHMFTVGMDVDTRAYFTSATMIIAVPTGIKIFSWLATLHGTQLSYSPAILWALGFVFLFTVGGLTGVVLANSSVDIILHDTYYVVAHFHYVLSMGAVFAIMAGFIHWYPLFTGLTLNNKWLKSHFIIMFIGVNLTFFPQHFLGLAGMPRRYSDYPDAYTTWNIVSTIGSTISLLGILFFFFIIWESLVSQRQVIYPIQLNSSIEWYQNTPPAEHSYSELPLLTN",
processorsNum = 2)