library(gridExtra)
library(knitr)
library(ggplot2)
library(fastqcr)
#must run this if fastqc is not already installed locally
#fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:
#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/d669d153/work/hipposideros/fastq"
#specify full path to the output directory where you want
qc.dir<-"/home/d669d153/work/hipposideros/qc"
#run fastqc on all .fastq.gz files, through r
#This only needs to be run once, if only tweaking downstream visualizations, you can comment out this step
fastqc(fq.dir = fq.dir, # FASTQ files directory
qc.dir = qc.dir, # Results directory
threads = 4 # Number of threads
)
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)
[1] “908108_H_diadema_Gatokae_fastqc.html”
[2] “908108_H_diadema_Gatokae_fastqc.zip”
[3] “908150_H_dinops_Guadalcanal_fastqc.html”
[4] “908150_H_dinops_Guadalcanal_fastqc.zip”
[5] “908151_H_diadema_Guadalcanal_fastqc.html”
[6] “908151_H_diadema_Guadalcanal_fastqc.zip”
[7] “908152_H_diadema_Guadalcanal_fastqc.html”
[8] “908152_H_diadema_Guadalcanal_fastqc.zip”
[9] “908153a_H_dinops_Guadalcanal_fastqc.html”
[10] “908153a_H_dinops_Guadalcanal_fastqc.zip”
[11] “908154_H_dinops_Guadalcanal_fastqc.html”
[12] “908154_H_dinops_Guadalcanal_fastqc.zip”
[13] “908155_H_dinops_Guadalcanal_fastqc.html”
[14] “908155_H_dinops_Guadalcanal_fastqc.zip”
[15] “908156_H_diadema_Guadalcanal_fastqc.html”
[16] “908156_H_diadema_Guadalcanal_fastqc.zip”
[17] “908208_H_diadema_Guadalcanal_fastqc.html”
[18] “908208_H_diadema_Guadalcanal_fastqc.zip”
[19] “JM19686_H_diadema_Choiseul_fastqc.html”
[20] “JM19686_H_diadema_Choiseul_fastqc.zip”
[21] “KO-P3_S3_R1_001_fastqc.html”
[22] “KO-P3_S3_R1_001_fastqc.zip”
[23] “KVO150_H_diadema_Isabel_fastqc.html”
[24] “KVO150_H_diadema_Isabel_fastqc.zip”
[25] “KVO168_H_diadema_Isabel_fastqc.html”
[26] “KVO168_H_diadema_Isabel_fastqc.zip”
[27] “KVO169_H_diadema_Isabel_fastqc.html”
[28] “KVO169_H_diadema_Isabel_fastqc.zip”
[29] “KVO170_H_diadema_Isabel_fastqc.html”
[30] “KVO170_H_diadema_Isabel_fastqc.zip”
[31] “KVO171_H_diadema_Isabel_fastqc.html”
[32] “KVO171_H_diadema_Isabel_fastqc.zip”
[33] “KVO172_H_diadema_Isabel_fastqc.html”
[34] “KVO172_H_diadema_Isabel_fastqc.zip”
[35] “KVO242_H_dinops_Rendova_fastqc.html”
[36] “KVO242_H_dinops_Rendova_fastqc.zip”
[37] “KVO243_H_dinops_Rendova_fastqc.html”
[38] “KVO243_H_dinops_Rendova_fastqc.zip”
[39] “KVO245_H_dinops_Rendova_fastqc.html”
[40] “KVO245_H_dinops_Rendova_fastqc.zip”
[41] “KVO246_H_dinops_Rendova_fastqc.html”
[42] “KVO246_H_dinops_Rendova_fastqc.zip”
[43] “KVO248_H_diadema_Rendova_fastqc.html”
[44] “KVO248_H_diadema_Rendova_fastqc.zip”
[45] “THL1048_H_dinops_Guadalcanal_fastqc.html”
[46] “THL1048_H_dinops_Guadalcanal_fastqc.zip”
[47] “THL1120_H_dinops_Gatokae_fastqc.html”
[48] “THL1120_H_dinops_Gatokae_fastqc.zip”
[49] “THL1121_H_dinops_Gatokae_fastqc.html”
[50] “THL1121_H_dinops_Gatokae_fastqc.zip”
[51] “THL1122_H_dinops_Gatokae_fastqc.html”
[52] “THL1122_H_dinops_Gatokae_fastqc.zip”
[53] “THL1154_H_demissus_Makira_fastqc.html”
[54] “THL1154_H_demissus_Makira_fastqc.zip”
[55] “THL1156_H_demissus_Makira_fastqc.html”
[56] “THL1156_H_demissus_Makira_fastqc.zip”
[57] “THL1167_H_diadema_Guadalcanal_fastqc.html” [58] “THL1167_H_diadema_Guadalcanal_fastqc.zip”
[59] “THL1172_H_dinops_Guadalcanal_fastqc.html”
[60] “THL1172_H_dinops_Guadalcanal_fastqc.zip”
[61] “THL1173_H_dinops_Guadalcanal_fastqc.html”
[62] “THL1173_H_dinops_Guadalcanal_fastqc.zip”
[63] “THL1221_H_diadema_Gatokae_fastqc.html”
[64] “THL1221_H_diadema_Gatokae_fastqc.zip”
[65] “THL1223_H_dinops_Guadalcanal_fastqc.html”
[66] “THL1223_H_dinops_Guadalcanal_fastqc.zip”
[67] “THL17193_H_diadema_Ngella_fastqc.html”
[68] “THL17193_H_diadema_Ngella_fastqc.zip”
[69] “THL17194_H_diadema_Ngella_fastqc.html”
[70] “THL17194_H_diadema_Ngella_fastqc.zip”
[71] “THL17195_H_diadema_Ngella_fastqc.html”
[72] “THL17195_H_diadema_Ngella_fastqc.zip”
[73] “THL17197_H_diadema_Ngella_fastqc.html”
[74] “THL17197_H_diadema_Ngella_fastqc.zip”
[75] “THL17198_H_diadema_Ngella_fastqc.html”
[76] “THL17198_H_diadema_Ngella_fastqc.zip”
[77] “THL17199_H_diadema_Ngella_fastqc.html”
[78] “THL17199_H_diadema_Ngella_fastqc.zip”
[79] “WD1705_H_diadema_E_New_Britain_fastqc.html” [80] “WD1705_H_diadema_E_New_Britain_fastqc.zip” [81] “WD2047_H_diadema_Simbu_Prov_fastqc.html”
[82] “WD2047_H_diadema_Simbu_Prov_fastqc.zip”
[83] “WD2074_H_diadema_Gulf_Prov_fastqc.html”
[84] “WD2074_H_diadema_Gulf_Prov_fastqc.zip”
#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")
#plot qc test results for each sample
for (i in samps){
#read info for given sample from the .zip file generated in the previous step
samp.info <- qc_read(i)
#open blank list to hold qc visualizations for the given sample
plot<-list()
#do qc for the given sample
plot[[1]]<-qc_plot(samp.info, "Basic statistics")
plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
#visualize tables
print(paste0("QC results for sample ", gsub(".*/", "", i)))
cat('\n')
print(kable(plot[[1]]))
cat('\n')
#visualize plots
grid.arrange(plot[[2]],plot[[3]],
ncol=2)
#clear plot to hold info for next sample
rm(plot)
}
[1] “QC results for sample 908108_H_diadema_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | 908108_H_diadema_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 456195 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample 908150_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908150_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 123287 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample 908151_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908151_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3189301 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample 908152_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908152_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1388568 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908153a_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908153a_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 7332578 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908154_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908154_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 521388 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908155_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908155_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 989397 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908156_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908156_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 4283313 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample 908208_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | 908208_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 769417 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample JM19686_H_diadema_Choiseul_fastqc.zip”
Measure | Value |
---|---|
Filename | JM19686_H_diadema_Choiseul.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 5088 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 35 |
[1] “QC results for sample KO-P3_S3_R1_001_fastqc.zip”
Measure | Value |
---|---|
Filename | KO-P3_S3_R1_001.fastq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 122147979 |
Sequences flagged as poor quality | 0 |
Sequence length | 151 |
%GC | 37 |
[1] “QC results for sample KVO150_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO150_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 34578 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO168_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO168_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 266025 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO169_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO169_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 499713 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO170_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO170_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 825167 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO171_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO171_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 271596 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO172_H_diadema_Isabel_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO172_H_diadema_Isabel.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 98438 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO242_H_dinops_Rendova_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO242_H_dinops_Rendova.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1355358 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample KVO243_H_dinops_Rendova_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO243_H_dinops_Rendova.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1797464 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample KVO245_H_dinops_Rendova_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO245_H_dinops_Rendova.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 4204013 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO246_H_dinops_Rendova_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO246_H_dinops_Rendova.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 2153670 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample KVO248_H_diadema_Rendova_fastqc.zip”
Measure | Value |
---|---|
Filename | KVO248_H_diadema_Rendova.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1799491 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1048_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1048_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 4848 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 35 |
[1] “QC results for sample THL1120_H_dinops_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1120_H_dinops_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3212 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1121_H_dinops_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1121_H_dinops_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6874 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 35 |
[1] “QC results for sample THL1122_H_dinops_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1122_H_dinops_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 288 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1154_H_demissus_Makira_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1154_H_demissus_Makira.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6361 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1156_H_demissus_Makira_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1156_H_demissus_Makira.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 819353 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL1167_H_diadema_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1167_H_diadema_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1409 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL1172_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1172_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 173703 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample THL1173_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1173_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 979903 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL1221_H_diadema_Gatokae_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1221_H_diadema_Gatokae.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 2435 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL1223_H_dinops_Guadalcanal_fastqc.zip”
Measure | Value |
---|---|
Filename | THL1223_H_dinops_Guadalcanal.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6655 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample THL17193_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17193_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 3046762 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 38 |
[1] “QC results for sample THL17194_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17194_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1218936 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17195_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17195_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1825457 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17197_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17197_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1551531 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17198_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17198_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 6622220 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample THL17199_H_diadema_Ngella_fastqc.zip”
Measure | Value |
---|---|
Filename | THL17199_H_diadema_Ngella.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1508601 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample WD1705_H_diadema_E_New_Britain_fastqc.zip”
Measure | Value |
---|---|
Filename | WD1705_H_diadema_E_New_Britain.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 1336956 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 37 |
[1] “QC results for sample WD2047_H_diadema_Simbu_Prov_fastqc.zip”
Measure | Value |
---|---|
Filename | WD2047_H_diadema_Simbu_Prov.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 704726 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
[1] “QC results for sample WD2074_H_diadema_Gulf_Prov_fastqc.zip”
Measure | Value |
---|---|
Filename | WD2074_H_diadema_Gulf_Prov.fq.gz |
File type | Conventional base calls |
Encoding | Sanger / Illumina 1.9 |
Total Sequences | 951142 |
Sequences flagged as poor quality | 0 |
Sequence length | 145 |
%GC | 36 |
#aggregate the reports by pointing this function to the folder holding output of fastqc()
qc <- qc_aggregate(qc.dir, progressbar = F)
#stats per sample
knitr::kable(qc_stats(qc))
sample | pct.dup | pct.gc | tot.seq | seq.length |
---|---|---|---|---|
908108_H_diadema_Gatokae.fq.gz | 84.89 | 36 | 456195 | 145 |
908150_H_dinops_Guadalcanal.fq.gz | 75.37 | 38 | 123287 | 145 |
908151_H_diadema_Guadalcanal.fq.gz | 94.29 | 38 | 3189301 | 145 |
908152_H_diadema_Guadalcanal.fq.gz | 89.86 | 37 | 1388568 | 145 |
908153a_H_dinops_Guadalcanal.fq.gz | 94.54 | 37 | 7332578 | 145 |
908154_H_dinops_Guadalcanal.fq.gz | 84.85 | 37 | 521388 | 145 |
908155_H_dinops_Guadalcanal.fq.gz | 88.96 | 37 | 989397 | 145 |
908156_H_diadema_Guadalcanal.fq.gz | 94.72 | 37 | 4283313 | 145 |
908208_H_diadema_Guadalcanal.fq.gz | 89.66 | 37 | 769417 | 145 |
JM19686_H_diadema_Choiseul.fq.gz | 13.09 | 35 | 5088 | 145 |
KO-P3_S3_R1_001 | 91.78 | 37 | 122147979 | 151 |
KVO150_H_diadema_Isabel.fq.gz | 58.84 | 36 | 34578 | 145 |
KVO168_H_diadema_Isabel.fq.gz | 79.97 | 36 | 266025 | 145 |
KVO169_H_diadema_Isabel.fq.gz | 86.28 | 36 | 499713 | 145 |
KVO170_H_diadema_Isabel.fq.gz | 89.21 | 36 | 825167 | 145 |
KVO171_H_diadema_Isabel.fq.gz | 81.79 | 36 | 271596 | 145 |
KVO172_H_diadema_Isabel.fq.gz | 70.24 | 36 | 98438 | 145 |
KVO242_H_dinops_Rendova.fq.gz | 91.85 | 37 | 1355358 | 145 |
KVO243_H_dinops_Rendova.fq.gz | 92.18 | 37 | 1797464 | 145 |
KVO245_H_dinops_Rendova.fq.gz | 94.10 | 36 | 4204013 | 145 |
KVO246_H_dinops_Rendova.fq.gz | 91.61 | 36 | 2153670 | 145 |
KVO248_H_diadema_Rendova.fq.gz | 91.09 | 36 | 1799491 | 145 |
THL1048_H_dinops_Guadalcanal.fq.gz | 35.68 | 35 | 4848 | 145 |
THL1120_H_dinops_Gatokae.fq.gz | 10.65 | 36 | 3212 | 145 |
THL1121_H_dinops_Gatokae.fq.gz | 9.18 | 35 | 6874 | 145 |
THL1122_H_dinops_Gatokae.fq.gz | 43.75 | 36 | 288 | 145 |
THL1154_H_demissus_Makira.fq.gz | 16.62 | 36 | 6361 | 145 |
THL1156_H_demissus_Makira.fq.gz | 88.94 | 36 | 819353 | 145 |
THL1167_H_diadema_Guadalcanal.fq.gz | 37.83 | 37 | 1409 | 145 |
THL1172_H_dinops_Guadalcanal.fq.gz | 77.77 | 38 | 173703 | 145 |
THL1173_H_dinops_Guadalcanal.fq.gz | 88.50 | 37 | 979903 | 145 |
THL1221_H_diadema_Gatokae.fq.gz | 39.92 | 37 | 2435 | 145 |
THL1223_H_dinops_Guadalcanal.fq.gz | 14.46 | 36 | 6655 | 145 |
THL17193_H_diadema_Ngella.fq.gz | 94.22 | 38 | 3046762 | 145 |
THL17194_H_diadema_Ngella.fq.gz | 90.47 | 37 | 1218936 | 145 |
THL17195_H_diadema_Ngella.fq.gz | 92.44 | 37 | 1825457 | 145 |
THL17197_H_diadema_Ngella.fq.gz | 91.79 | 37 | 1551531 | 145 |
THL17198_H_diadema_Ngella.fq.gz | 95.61 | 37 | 6622220 | 145 |
THL17199_H_diadema_Ngella.fq.gz | 91.37 | 37 | 1508601 | 145 |
WD1705_H_diadema_E_New_Britain.fq.gz | 90.71 | 37 | 1336956 | 145 |
WD2047_H_diadema_Simbu_Prov.fq.gz | 85.78 | 36 | 704726 | 145 |
WD2074_H_diadema_Gulf_Prov.fq.gz | 88.11 | 36 | 951142 | 145 |
#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)
#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
geom_histogram(color="black", fill="white", bins=20)+
geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
theme_classic()+
xlab("Number of sequencing reads")
#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
geom_histogram(color="black", fill="white", bins=200)+
geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
theme_classic()+
xlab("Number of sequencing reads")
#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))
[1] “Median sample contains 822260 reads. The following samples contain less than 82226 reads (10% of the median), and should likely be dropped”
knitr::kable(stats.info[stats.info$tot.seq < median(stats.info$tot.seq)*.1,])
sample | pct.dup | pct.gc | tot.seq | seq.length |
---|---|---|---|---|
JM19686_H_diadema_Choiseul.fq.gz | 13.09 | 35 | 5088 | 145 |
KVO150_H_diadema_Isabel.fq.gz | 58.84 | 36 | 34578 | 145 |
THL1048_H_dinops_Guadalcanal.fq.gz | 35.68 | 35 | 4848 | 145 |
THL1120_H_dinops_Gatokae.fq.gz | 10.65 | 36 | 3212 | 145 |
THL1121_H_dinops_Gatokae.fq.gz | 9.18 | 35 | 6874 | 145 |
THL1122_H_dinops_Gatokae.fq.gz | 43.75 | 36 | 288 | 145 |
THL1154_H_demissus_Makira.fq.gz | 16.62 | 36 | 6361 | 145 |
THL1167_H_diadema_Guadalcanal.fq.gz | 37.83 | 37 | 1409 | 145 |
THL1221_H_diadema_Gatokae.fq.gz | 39.92 | 37 | 2435 | 145 |
THL1223_H_dinops_Guadalcanal.fq.gz | 14.46 | 36 | 6655 | 145 |