quality.control.vignette.Rmd
library(gridExtra)
library(knitr)
library(ggplot2)
#install.packages("fastqcr")
library(fastqcr)
#must run this if fastqc is not already installed locally
#fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:
#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/d669d153/work/dia.din/fq"
#specify full path to the output directory where you want
qc.dir<-"~/Downloads/qc"
#run fastqc on all .fastq.gz files, through r
#only needs to be run once, if tweaking downstream visualizations, comment out this step
#fastqc(fq.dir = fq.dir, # FASTQ files directory
# qc.dir = qc.dir, # Results directory
# threads = 4 # Number of threads
# )
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)
[1] “908108_H_diadema_Gatokae_fastqc.html”
[2] “908108_H_diadema_Gatokae_fastqc.zip”
[3] “908150_H_dinops_Guadalcanal_fastqc.html”
[4] “908150_H_dinops_Guadalcanal_fastqc.zip”
[5] “908151_H_diadema_Guadalcanal_fastqc.html”
[6] “908151_H_diadema_Guadalcanal_fastqc.zip”
[7] “908152_H_diadema_Guadalcanal_fastqc.html”
[8] “908152_H_diadema_Guadalcanal_fastqc.zip”
[9] “908153a_H_dinops_Guadalcanal_fastqc.html”
[10] “908153a_H_dinops_Guadalcanal_fastqc.zip”
[11] “908154_H_dinops_Guadalcanal_fastqc.html”
[12] “908154_H_dinops_Guadalcanal_fastqc.zip”
[13] “908155_H_dinops_Guadalcanal_fastqc.html”
[14] “908155_H_dinops_Guadalcanal_fastqc.zip”
[15] “908156_H_diadema_Guadalcanal_fastqc.html”
[16] “908156_H_diadema_Guadalcanal_fastqc.zip”
[17] “908208_H_diadema_Guadalcanal_fastqc.html”
[18] “908208_H_diadema_Guadalcanal_fastqc.zip”
[19] “JM19686_H_diadema_Choiseul_fastqc.html”
[20] “JM19686_H_diadema_Choiseul_fastqc.zip”
[21] “KVO150_H_diadema_Isabel_fastqc.html”
[22] “KVO150_H_diadema_Isabel_fastqc.zip”
[23] “KVO168_H_diadema_Isabel_fastqc.html”
[24] “KVO168_H_diadema_Isabel_fastqc.zip”
[25] “KVO169_H_diadema_Isabel_fastqc.html”
[26] “KVO169_H_diadema_Isabel_fastqc.zip”
[27] “KVO170_H_diadema_Isabel_fastqc.html”
[28] “KVO170_H_diadema_Isabel_fastqc.zip”
[29] “KVO171_H_diadema_Isabel_fastqc.html”
[30] “KVO171_H_diadema_Isabel_fastqc.zip”
[31] “KVO172_H_diadema_Isabel_fastqc.html”
[32] “KVO172_H_diadema_Isabel_fastqc.zip”
[33] “KVO242_H_dinops_Isabel_fastqc.html”
[34] “KVO242_H_dinops_Isabel_fastqc.zip”
[35] “KVO243_H_dinops_Isabel_fastqc.html”
[36] “KVO243_H_dinops_Isabel_fastqc.zip”
[37] “KVO244_H_dinops_Isabel_fastqc.html”
[38] “KVO244_H_dinops_Isabel_fastqc.zip”
[39] “KVO245_H_dinops_Isabel_fastqc.html”
[40] “KVO245_H_dinops_Isabel_fastqc.zip”
[41] “KVO246_H_dinops_Isabel_fastqc.html”
[42] “KVO246_H_dinops_Isabel_fastqc.zip”
[43] “KVO248_H_dinops_Isabel_fastqc.html”
[44] “KVO248_H_dinops_Isabel_fastqc.zip”
[45] “KVO249_Hipposiderous_sp_Isabel_fastqc.html” [46] “KVO249_Hipposiderous_sp_Isabel_fastqc.zip”
[47] “KVO250_Hipposiderous_sp_Isabel_fastqc.html” [48] “KVO250_Hipposiderous_sp_Isabel_fastqc.zip”
[49] “KVO251_Hipposiderous_sp_Rendova_fastqc.html” [50] “KVO251_Hipposiderous_sp_Rendova_fastqc.zip” [51] “THL1048_H_dinops_Guadalcanal_fastqc.html”
[52] “THL1048_H_dinops_Guadalcanal_fastqc.zip”
[53] “THL1120_H_dinops_Gatokae_fastqc.html”
[54] “THL1120_H_dinops_Gatokae_fastqc.zip”
[55] “THL1121_H_dinops_Gatokae_fastqc.html”
[56] “THL1121_H_dinops_Gatokae_fastqc.zip”
[57] “THL1122_H_dinops_Gatokae_fastqc.html”
[58] “THL1122_H_dinops_Gatokae_fastqc.zip”
[59] “THL1154_H_demissus_Makira_fastqc.html”
[60] “THL1154_H_demissus_Makira_fastqc.zip”
[61] “THL1156_H_demissus_Makira_fastqc.html”
[62] “THL1156_H_demissus_Makira_fastqc.zip”
[63] “THL1167_H_diadema_Guadalcanal_fastqc.html”
[64] “THL1167_H_diadema_Guadalcanal_fastqc.zip”
[65] “THL1172_H_dinops_Guadalcanal_fastqc.html”
[66] “THL1172_H_dinops_Guadalcanal_fastqc.zip”
[67] “THL1173_H_dinops_Guadalcanal_fastqc.html”
[68] “THL1173_H_dinops_Guadalcanal_fastqc.zip”
[69] “THL1221_H_diadema_Gatokae_fastqc.html”
[70] “THL1221_H_diadema_Gatokae_fastqc.zip”
[71] “THL1223_H_dinops_Guadalcanal_fastqc.html”
[72] “THL1223_H_dinops_Guadalcanal_fastqc.zip”
[73] “THL17193_H_diadema_Ngella_fastqc.html”
[74] “THL17193_H_diadema_Ngella_fastqc.zip”
[75] “THL17194_H_diadema_Ngella_fastqc.html”
[76] “THL17194_H_diadema_Ngella_fastqc.zip”
[77] “THL17195_H_diadema_Ngella_fastqc.html”
[78] “THL17195_H_diadema_Ngella_fastqc.zip”
[79] “THL17197_H_diadema_Ngella_fastqc.html”
[80] “THL17197_H_diadema_Ngella_fastqc.zip”
[81] “THL17198_H_diadema_Ngella_fastqc.html”
[82] “THL17198_H_diadema_Ngella_fastqc.zip”
[83] “THL17199_H_diadema_Ngella_fastqc.html”
[84] “THL17199_H_diadema_Ngella_fastqc.zip”
[85] “WD1705_H_diadema_E_New_Britain_fastqc.html” [86] “WD1705_H_diadema_E_New_Britain_fastqc.zip”
[87] “WD2047_H_diadema_Simbu_Prov_fastqc.html”
[88] “WD2047_H_diadema_Simbu_Prov_fastqc.zip”
[89] “WD2074_H_diadema_Gulf_Prov_fastqc.html”
[90] “WD2074_H_diadema_Gulf_Prov_fastqc.zip”
#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
#samps<-list.files("/home/d669d153/work/dia.din/qc", full.names = T, pattern = "*.zip")
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")
#plot qc test results for each sample
for (i in samps){
#read info for given sample from the .zip file generated in the previous step
samp.info <- qc_read(i)
#open blank list to hold qc visualizations for the given sample
plot<-list()
#do qc for the given sample
plot[[1]]<-qc_plot(samp.info, "Basic statistics")
plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
#visualize tables
print(paste0("QC results for sample ", gsub(".*/", "", i)))
cat('\n')
print(kable(plot[[1]]))
cat('\n')
#visualize plots
grid.arrange(plot[[2]],plot[[3]],
ncol=2)
#clear plot to hold info for next sample
rm(plot)
}
[1] “QC results for sample 908108_H_diadema_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | 908108_H_diadema_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 454706 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample 908150_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908150_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 120063 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample 908151_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908151_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3174216 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample 908152_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908152_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1388568 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908153a_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908153a_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 7300968 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908154_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908154_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 519876 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908155_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908155_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 989397 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908156_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908156_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 4264843 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908208_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908208_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 769417 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample JM19686_H_diadema_Choiseul_fastqc.zip”
Measure | Value |
---|---|
Filename | JM19686_H_diadema_Choiseul.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1029 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 34 |
[1] “QC results for sample KVO150_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO150_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 34555 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO168_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO168_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 262565 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO169_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO169_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 499713 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO170_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO170_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 825167 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO171_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO171_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 271596 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO172_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO172_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 98438 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO242_H_dinops_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO242_H_dinops_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1350278 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample KVO243_H_dinops_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO243_H_dinops_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1797464 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample KVO244_H_dinops_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO244_H_dinops_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3002231 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO245_H_dinops_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO245_H_dinops_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 4187253 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO246_H_dinops_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO246_H_dinops_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 2153670 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO248_H_dinops_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO248_H_dinops_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1799491 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO249_Hipposiderous_sp_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO249_Hipposiderous_sp_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 2164473 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO250_Hipposiderous_sp_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO250_Hipposiderous_sp_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 215420 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO251_Hipposiderous_sp_Rendova_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO251_Hipposiderous_sp_Rendova.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1739541 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1048_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1048_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 4848 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 35 |
[1] “QC results for sample THL1120_H_dinops_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1120_H_dinops_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3212 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1121_H_dinops_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1121_H_dinops_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6874 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 35 |
[1] “QC results for sample THL1122_H_dinops_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1122_H_dinops_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 288 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1154_H_demissus_Makira_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1154_H_demissus_Makira.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 5201 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1156_H_demissus_Makira_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1156_H_demissus_Makira.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 819353 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1167_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1167_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1379 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL1172_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1172_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 173703 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample THL1173_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1173_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 979903 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL1221_H_diadema_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1221_H_diadema_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 43 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 40 |
[1] “QC results for sample THL1223_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1223_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6655 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL17193_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17193_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3033643 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample THL17194_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17194_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1210618 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17195_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17195_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1825457 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17197_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17197_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1546118 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17198_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17198_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6607272 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17199_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17199_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1508601 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample WD1705_H_diadema_E_New_Britain_fastqc.zip”
Measure | Value |
---|---|
Filename | WD1705_H_diadema_E_New_Britain.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1323647 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample WD2047_H_diadema_Simbu_Prov_fastqc.zip”
Measure | Value |
---|---|
Filename | WD2047_H_diadema_Simbu_Prov.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 696126 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample WD2074_H_diadema_Gulf_Prov_fastqc.zip”
Measure | Value |
---|---|
Filename | WD2074_H_diadema_Gulf_Prov.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 945453 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
#aggregate the reports by pointing this function to the folder holding output of fastqc()
#qc <- qc_aggregate("/home/d669d153/work/dia.din/qc", progressbar=F)
qc <- qc_aggregate(qc.dir, progressbar = F)
#stats per sample
knitr::kable(qc_stats(qc))
sample | pct.dup | pct.gc | tot.seq | seq.length |
---|---|---|---|---|
908108_H_diadema_Gatokae.fq.gz | 84.98 | 36 | 454706 | 145 |
908150_H_dinops_Guadalcanal.fq.gz | 76.29 | 38 | 120063 | 145 |
908151_H_diadema_Guadalcanal.fq.gz | 94.54 | 38 | 3174216 | 145 |
908152_H_diadema_Guadalcanal.fq.gz | 89.86 | 37 | 1388568 | 145 |
908153a_H_dinops_Guadalcanal.fq.gz | 94.82 | 37 | 7300968 | 145 |
908154_H_dinops_Guadalcanal.fq.gz | 84.86 | 37 | 519876 | 145 |
908155_H_dinops_Guadalcanal.fq.gz | 88.96 | 37 | 989397 | 145 |
908156_H_diadema_Guadalcanal.fq.gz | 94.89 | 37 | 4264843 | 145 |
908208_H_diadema_Guadalcanal.fq.gz | 89.66 | 37 | 769417 | 145 |
JM19686_H_diadema_Choiseul.fq.gz | 18.85 | 34 | 1029 | 145 |
KVO150_H_diadema_Isabel.fq.gz | 58.87 | 36 | 34555 | 145 |
KVO168_H_diadema_Isabel.fq.gz | 80.63 | 36 | 262565 | 145 |
KVO169_H_diadema_Isabel.fq.gz | 86.28 | 36 | 499713 | 145 |
KVO170_H_diadema_Isabel.fq.gz | 89.21 | 36 | 825167 | 145 |
KVO171_H_diadema_Isabel.fq.gz | 81.79 | 36 | 271596 | 145 |
KVO172_H_diadema_Isabel.fq.gz | 70.24 | 36 | 98438 | 145 |
KVO242_H_dinops_Isabel.fq.gz | 91.91 | 37 | 1350278 | 145 |
KVO243_H_dinops_Isabel.fq.gz | 92.18 | 37 | 1797464 | 145 |
KVO244_H_dinops_Isabel.fq.gz | 93.16 | 36 | 3002231 | 145 |
KVO245_H_dinops_Isabel.fq.gz | 94.18 | 36 | 4187253 | 145 |
KVO246_H_dinops_Isabel.fq.gz | 91.61 | 36 | 2153670 | 145 |
KVO248_H_dinops_Isabel.fq.gz | 91.09 | 36 | 1799491 | 145 |
KVO249_Hipposiderous_sp_Isabel.fq.gz | 91.67 | 36 | 2164473 | 145 |
KVO250_Hipposiderous_sp_Isabel.fq.gz | 79.87 | 36 | 215420 | 145 |
KVO251_Hipposiderous_sp_Rendova.fq.gz | 91.80 | 36 | 1739541 | 145 |
THL1048_H_dinops_Guadalcanal.fq.gz | 35.68 | 35 | 4848 | 145 |
THL1120_H_dinops_Gatokae.fq.gz | 10.65 | 36 | 3212 | 145 |
THL1121_H_dinops_Gatokae.fq.gz | 9.18 | 35 | 6874 | 145 |
THL1122_H_dinops_Gatokae.fq.gz | 43.75 | 36 | 288 | 145 |
THL1154_H_demissus_Makira.fq.gz | 15.44 | 36 | 5201 | 145 |
THL1156_H_demissus_Makira.fq.gz | 88.94 | 36 | 819353 | 145 |
THL1167_H_diadema_Guadalcanal.fq.gz | 38.43 | 37 | 1379 | 145 |
THL1172_H_dinops_Guadalcanal.fq.gz | 77.77 | 38 | 173703 | 145 |
THL1173_H_dinops_Guadalcanal.fq.gz | 88.50 | 37 | 979903 | 145 |
THL1221_H_diadema_Gatokae.fq.gz | 39.53 | 40 | 43 | 145 |
THL1223_H_dinops_Guadalcanal.fq.gz | 14.46 | 36 | 6655 | 145 |
THL17193_H_diadema_Ngella.fq.gz | 94.40 | 38 | 3033643 | 145 |
THL17194_H_diadema_Ngella.fq.gz | 90.91 | 37 | 1210618 | 145 |
THL17195_H_diadema_Ngella.fq.gz | 92.44 | 37 | 1825457 | 145 |
THL17197_H_diadema_Ngella.fq.gz | 91.87 | 37 | 1546118 | 145 |
THL17198_H_diadema_Ngella.fq.gz | 95.67 | 37 | 6607272 | 145 |
THL17199_H_diadema_Ngella.fq.gz | 91.37 | 37 | 1508601 | 145 |
WD1705_H_diadema_E_New_Britain.fq.gz | 91.09 | 37 | 1323647 | 145 |
WD2047_H_diadema_Simbu_Prov.fq.gz | 86.17 | 36 | 696126 | 145 |
WD2074_H_diadema_Gulf_Prov.fq.gz | 88.42 | 36 | 945453 | 145 |
#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)
#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
geom_histogram(color="black", fill="white", bins=20)+
geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
theme_classic()+
xlab("Number of sequencing reads")
#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
geom_histogram(color="black", fill="white", bins=200)+
geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
theme_classic()+
xlab("Number of sequencing reads")
#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))
[1] “Median sample contains 825167 reads. The following samples contain less than 82516.7 reads (10% of the median), and should likely be dropped”
sample | pct.dup | pct.gc | tot.seq | seq.length |
---|---|---|---|---|
JM19686_H_diadema_Choiseul.fq.gz | 18.85 | 34 | 1029 | 145 |
KVO150_H_diadema_Isabel.fq.gz | 58.87 | 36 | 34555 | 145 |
THL1048_H_dinops_Guadalcanal.fq.gz | 35.68 | 35 | 4848 | 145 |
THL1120_H_dinops_Gatokae.fq.gz | 10.65 | 36 | 3212 | 145 |
THL1121_H_dinops_Gatokae.fq.gz | 9.18 | 35 | 6874 | 145 |
THL1122_H_dinops_Gatokae.fq.gz | 43.75 | 36 | 288 | 145 |
THL1154_H_demissus_Makira.fq.gz | 15.44 | 36 | 5201 | 145 |
THL1167_H_diadema_Guadalcanal.fq.gz | 38.43 | 37 | 1379 | 145 |
THL1221_H_diadema_Gatokae.fq.gz | 39.53 | 40 | 43 | 145 |
THL1223_H_dinops_Guadalcanal.fq.gz | 14.46 | 36 | 6655 | 145 |