library(gridExtra)
library(knitr)
library(ggplot2)
#install.packages("fastqcr")
library(fastqcr)
#must run this if fastqc is not already installed locally
#fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:

#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/d669d153/work/dia.din/fq"

#specify full path to the output directory where you want 
qc.dir<-"~/Downloads/qc"

#run fastqc on all .fastq.gz files, through r
#only needs to be run once, if tweaking downstream visualizations, comment out this step
#fastqc(fq.dir = fq.dir, # FASTQ files directory
#       qc.dir = qc.dir, # Results directory
#       threads = 4                    # Number of threads
#       )
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)

[1] “908108_H_diadema_Gatokae_fastqc.html”
[2] “908108_H_diadema_Gatokae_fastqc.zip”
[3] “908150_H_dinops_Guadalcanal_fastqc.html”
[4] “908150_H_dinops_Guadalcanal_fastqc.zip”
[5] “908151_H_diadema_Guadalcanal_fastqc.html”
[6] “908151_H_diadema_Guadalcanal_fastqc.zip”
[7] “908152_H_diadema_Guadalcanal_fastqc.html”
[8] “908152_H_diadema_Guadalcanal_fastqc.zip”
[9] “908153a_H_dinops_Guadalcanal_fastqc.html”
[10] “908153a_H_dinops_Guadalcanal_fastqc.zip”
[11] “908154_H_dinops_Guadalcanal_fastqc.html”
[12] “908154_H_dinops_Guadalcanal_fastqc.zip”
[13] “908155_H_dinops_Guadalcanal_fastqc.html”
[14] “908155_H_dinops_Guadalcanal_fastqc.zip”
[15] “908156_H_diadema_Guadalcanal_fastqc.html”
[16] “908156_H_diadema_Guadalcanal_fastqc.zip”
[17] “908208_H_diadema_Guadalcanal_fastqc.html”
[18] “908208_H_diadema_Guadalcanal_fastqc.zip”
[19] “JM19686_H_diadema_Choiseul_fastqc.html”
[20] “JM19686_H_diadema_Choiseul_fastqc.zip”
[21] “KVO150_H_diadema_Isabel_fastqc.html”
[22] “KVO150_H_diadema_Isabel_fastqc.zip”
[23] “KVO168_H_diadema_Isabel_fastqc.html”
[24] “KVO168_H_diadema_Isabel_fastqc.zip”
[25] “KVO169_H_diadema_Isabel_fastqc.html”
[26] “KVO169_H_diadema_Isabel_fastqc.zip”
[27] “KVO170_H_diadema_Isabel_fastqc.html”
[28] “KVO170_H_diadema_Isabel_fastqc.zip”
[29] “KVO171_H_diadema_Isabel_fastqc.html”
[30] “KVO171_H_diadema_Isabel_fastqc.zip”
[31] “KVO172_H_diadema_Isabel_fastqc.html”
[32] “KVO172_H_diadema_Isabel_fastqc.zip”
[33] “KVO242_H_dinops_Isabel_fastqc.html”
[34] “KVO242_H_dinops_Isabel_fastqc.zip”
[35] “KVO243_H_dinops_Isabel_fastqc.html”
[36] “KVO243_H_dinops_Isabel_fastqc.zip”
[37] “KVO244_H_dinops_Isabel_fastqc.html”
[38] “KVO244_H_dinops_Isabel_fastqc.zip”
[39] “KVO245_H_dinops_Isabel_fastqc.html”
[40] “KVO245_H_dinops_Isabel_fastqc.zip”
[41] “KVO246_H_dinops_Isabel_fastqc.html”
[42] “KVO246_H_dinops_Isabel_fastqc.zip”
[43] “KVO248_H_dinops_Isabel_fastqc.html”
[44] “KVO248_H_dinops_Isabel_fastqc.zip”
[45] “KVO249_Hipposiderous_sp_Isabel_fastqc.html” [46] “KVO249_Hipposiderous_sp_Isabel_fastqc.zip”
[47] “KVO250_Hipposiderous_sp_Isabel_fastqc.html” [48] “KVO250_Hipposiderous_sp_Isabel_fastqc.zip”
[49] “KVO251_Hipposiderous_sp_Rendova_fastqc.html” [50] “KVO251_Hipposiderous_sp_Rendova_fastqc.zip” [51] “THL1048_H_dinops_Guadalcanal_fastqc.html”
[52] “THL1048_H_dinops_Guadalcanal_fastqc.zip”
[53] “THL1120_H_dinops_Gatokae_fastqc.html”
[54] “THL1120_H_dinops_Gatokae_fastqc.zip”
[55] “THL1121_H_dinops_Gatokae_fastqc.html”
[56] “THL1121_H_dinops_Gatokae_fastqc.zip”
[57] “THL1122_H_dinops_Gatokae_fastqc.html”
[58] “THL1122_H_dinops_Gatokae_fastqc.zip”
[59] “THL1154_H_demissus_Makira_fastqc.html”
[60] “THL1154_H_demissus_Makira_fastqc.zip”
[61] “THL1156_H_demissus_Makira_fastqc.html”
[62] “THL1156_H_demissus_Makira_fastqc.zip”
[63] “THL1167_H_diadema_Guadalcanal_fastqc.html”
[64] “THL1167_H_diadema_Guadalcanal_fastqc.zip”
[65] “THL1172_H_dinops_Guadalcanal_fastqc.html”
[66] “THL1172_H_dinops_Guadalcanal_fastqc.zip”
[67] “THL1173_H_dinops_Guadalcanal_fastqc.html”
[68] “THL1173_H_dinops_Guadalcanal_fastqc.zip”
[69] “THL1221_H_diadema_Gatokae_fastqc.html”
[70] “THL1221_H_diadema_Gatokae_fastqc.zip”
[71] “THL1223_H_dinops_Guadalcanal_fastqc.html”
[72] “THL1223_H_dinops_Guadalcanal_fastqc.zip”
[73] “THL17193_H_diadema_Ngella_fastqc.html”
[74] “THL17193_H_diadema_Ngella_fastqc.zip”
[75] “THL17194_H_diadema_Ngella_fastqc.html”
[76] “THL17194_H_diadema_Ngella_fastqc.zip”
[77] “THL17195_H_diadema_Ngella_fastqc.html”
[78] “THL17195_H_diadema_Ngella_fastqc.zip”
[79] “THL17197_H_diadema_Ngella_fastqc.html”
[80] “THL17197_H_diadema_Ngella_fastqc.zip”
[81] “THL17198_H_diadema_Ngella_fastqc.html”
[82] “THL17198_H_diadema_Ngella_fastqc.zip”
[83] “THL17199_H_diadema_Ngella_fastqc.html”
[84] “THL17199_H_diadema_Ngella_fastqc.zip”
[85] “WD1705_H_diadema_E_New_Britain_fastqc.html” [86] “WD1705_H_diadema_E_New_Britain_fastqc.zip”
[87] “WD2047_H_diadema_Simbu_Prov_fastqc.html”
[88] “WD2047_H_diadema_Simbu_Prov_fastqc.zip”
[89] “WD2074_H_diadema_Gulf_Prov_fastqc.html”
[90] “WD2074_H_diadema_Gulf_Prov_fastqc.zip”

#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
#samps<-list.files("/home/d669d153/work/dia.din/qc", full.names = T, pattern = "*.zip")
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")

#plot qc test results for each sample
for (i in samps){
  #read info for given sample from the .zip file generated in the previous step
  samp.info <- qc_read(i)
  #open blank list to hold qc visualizations for the given sample
  plot<-list()
  #do qc for the given sample
  plot[[1]]<-qc_plot(samp.info, "Basic statistics")
  plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
  plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
  #visualize tables
  print(paste0("QC results for sample ", gsub(".*/", "", i)))

  cat('\n')

  print(kable(plot[[1]]))

  cat('\n')

  #visualize plots
  grid.arrange(plot[[2]],plot[[3]],
               ncol=2)
  
  #clear plot to hold info for next sample
  rm(plot)
}

[1] “QC results for sample 908108_H_diadema_Gatokae_fastqc.zip”

Measure Value
Filename 908108_H_diadema_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 454706
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample 908150_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908150_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 120063
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample 908151_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908151_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3174216
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample 908152_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908152_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1388568
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908153a_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908153a_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7300968
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908154_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908154_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 519876
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908155_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908155_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 989397
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908156_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908156_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4264843
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908208_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908208_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 769417
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample JM19686_H_diadema_Choiseul_fastqc.zip”

Measure Value
Filename JM19686_H_diadema_Choiseul.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1029
Sequences flagged as poor quality 0
Sequence length 145
%GC 34

[1] “QC results for sample KVO150_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO150_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 34555
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO168_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO168_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 262565
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO169_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO169_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 499713
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO170_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO170_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 825167
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO171_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO171_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 271596
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO172_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO172_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 98438
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO242_H_dinops_Isabel_fastqc.zip”

Measure Value
Filename KVO242_H_dinops_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1350278
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample KVO243_H_dinops_Isabel_fastqc.zip”

Measure Value
Filename KVO243_H_dinops_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1797464
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample KVO244_H_dinops_Isabel_fastqc.zip”

Measure Value
Filename KVO244_H_dinops_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3002231
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO245_H_dinops_Isabel_fastqc.zip”

Measure Value
Filename KVO245_H_dinops_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4187253
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO246_H_dinops_Isabel_fastqc.zip”

Measure Value
Filename KVO246_H_dinops_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2153670
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO248_H_dinops_Isabel_fastqc.zip”

Measure Value
Filename KVO248_H_dinops_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1799491
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO249_Hipposiderous_sp_Isabel_fastqc.zip”

Measure Value
Filename KVO249_Hipposiderous_sp_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2164473
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO250_Hipposiderous_sp_Isabel_fastqc.zip”

Measure Value
Filename KVO250_Hipposiderous_sp_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 215420
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO251_Hipposiderous_sp_Rendova_fastqc.zip”

Measure Value
Filename KVO251_Hipposiderous_sp_Rendova.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1739541
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1048_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1048_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4848
Sequences flagged as poor quality 0
Sequence length 145
%GC 35

[1] “QC results for sample THL1120_H_dinops_Gatokae_fastqc.zip”

Measure Value
Filename THL1120_H_dinops_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3212
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1121_H_dinops_Gatokae_fastqc.zip”

Measure Value
Filename THL1121_H_dinops_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6874
Sequences flagged as poor quality 0
Sequence length 145
%GC 35

[1] “QC results for sample THL1122_H_dinops_Gatokae_fastqc.zip”

Measure Value
Filename THL1122_H_dinops_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 288
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1154_H_demissus_Makira_fastqc.zip”

Measure Value
Filename THL1154_H_demissus_Makira.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 5201
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1156_H_demissus_Makira_fastqc.zip”

Measure Value
Filename THL1156_H_demissus_Makira.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 819353
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1167_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1167_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1379
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL1172_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1172_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 173703
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample THL1173_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1173_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 979903
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL1221_H_diadema_Gatokae_fastqc.zip”

Measure Value
Filename THL1221_H_diadema_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 43
Sequences flagged as poor quality 0
Sequence length 145
%GC 40

[1] “QC results for sample THL1223_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1223_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6655
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL17193_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17193_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3033643
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample THL17194_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17194_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1210618
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17195_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17195_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1825457
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17197_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17197_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1546118
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17198_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17198_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6607272
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17199_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17199_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1508601
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample WD1705_H_diadema_E_New_Britain_fastqc.zip”

Measure Value
Filename WD1705_H_diadema_E_New_Britain.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1323647
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample WD2047_H_diadema_Simbu_Prov_fastqc.zip”

Measure Value
Filename WD2047_H_diadema_Simbu_Prov.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 696126
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample WD2074_H_diadema_Gulf_Prov_fastqc.zip”

Measure Value
Filename WD2074_H_diadema_Gulf_Prov.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 945453
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

#aggregate the reports by pointing this function to the folder holding output of fastqc()
#qc <- qc_aggregate("/home/d669d153/work/dia.din/qc", progressbar=F)
qc <- qc_aggregate(qc.dir, progressbar = F)

#stats per sample
knitr::kable(qc_stats(qc))
sample pct.dup pct.gc tot.seq seq.length
908108_H_diadema_Gatokae.fq.gz 84.98 36 454706 145
908150_H_dinops_Guadalcanal.fq.gz 76.29 38 120063 145
908151_H_diadema_Guadalcanal.fq.gz 94.54 38 3174216 145
908152_H_diadema_Guadalcanal.fq.gz 89.86 37 1388568 145
908153a_H_dinops_Guadalcanal.fq.gz 94.82 37 7300968 145
908154_H_dinops_Guadalcanal.fq.gz 84.86 37 519876 145
908155_H_dinops_Guadalcanal.fq.gz 88.96 37 989397 145
908156_H_diadema_Guadalcanal.fq.gz 94.89 37 4264843 145
908208_H_diadema_Guadalcanal.fq.gz 89.66 37 769417 145
JM19686_H_diadema_Choiseul.fq.gz 18.85 34 1029 145
KVO150_H_diadema_Isabel.fq.gz 58.87 36 34555 145
KVO168_H_diadema_Isabel.fq.gz 80.63 36 262565 145
KVO169_H_diadema_Isabel.fq.gz 86.28 36 499713 145
KVO170_H_diadema_Isabel.fq.gz 89.21 36 825167 145
KVO171_H_diadema_Isabel.fq.gz 81.79 36 271596 145
KVO172_H_diadema_Isabel.fq.gz 70.24 36 98438 145
KVO242_H_dinops_Isabel.fq.gz 91.91 37 1350278 145
KVO243_H_dinops_Isabel.fq.gz 92.18 37 1797464 145
KVO244_H_dinops_Isabel.fq.gz 93.16 36 3002231 145
KVO245_H_dinops_Isabel.fq.gz 94.18 36 4187253 145
KVO246_H_dinops_Isabel.fq.gz 91.61 36 2153670 145
KVO248_H_dinops_Isabel.fq.gz 91.09 36 1799491 145
KVO249_Hipposiderous_sp_Isabel.fq.gz 91.67 36 2164473 145
KVO250_Hipposiderous_sp_Isabel.fq.gz 79.87 36 215420 145
KVO251_Hipposiderous_sp_Rendova.fq.gz 91.80 36 1739541 145
THL1048_H_dinops_Guadalcanal.fq.gz 35.68 35 4848 145
THL1120_H_dinops_Gatokae.fq.gz 10.65 36 3212 145
THL1121_H_dinops_Gatokae.fq.gz 9.18 35 6874 145
THL1122_H_dinops_Gatokae.fq.gz 43.75 36 288 145
THL1154_H_demissus_Makira.fq.gz 15.44 36 5201 145
THL1156_H_demissus_Makira.fq.gz 88.94 36 819353 145
THL1167_H_diadema_Guadalcanal.fq.gz 38.43 37 1379 145
THL1172_H_dinops_Guadalcanal.fq.gz 77.77 38 173703 145
THL1173_H_dinops_Guadalcanal.fq.gz 88.50 37 979903 145
THL1221_H_diadema_Gatokae.fq.gz 39.53 40 43 145
THL1223_H_dinops_Guadalcanal.fq.gz 14.46 36 6655 145
THL17193_H_diadema_Ngella.fq.gz 94.40 38 3033643 145
THL17194_H_diadema_Ngella.fq.gz 90.91 37 1210618 145
THL17195_H_diadema_Ngella.fq.gz 92.44 37 1825457 145
THL17197_H_diadema_Ngella.fq.gz 91.87 37 1546118 145
THL17198_H_diadema_Ngella.fq.gz 95.67 37 6607272 145
THL17199_H_diadema_Ngella.fq.gz 91.37 37 1508601 145
WD1705_H_diadema_E_New_Britain.fq.gz 91.09 37 1323647 145
WD2047_H_diadema_Simbu_Prov.fq.gz 86.17 36 696126 145
WD2074_H_diadema_Gulf_Prov.fq.gz 88.42 36 945453 145

solid red line = median sample value

dashed red line = 10% of median sample value

#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)

#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=20)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=200)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))

[1] “Median sample contains 825167 reads. The following samples contain less than 82516.7 reads (10% of the median), and should likely be dropped”

knitr::kable(stats.info[stats.info$tot.seq < median(stats.info$tot.seq)*.1,])
sample pct.dup pct.gc tot.seq seq.length
JM19686_H_diadema_Choiseul.fq.gz 18.85 34 1029 145
KVO150_H_diadema_Isabel.fq.gz 58.87 36 34555 145
THL1048_H_dinops_Guadalcanal.fq.gz 35.68 35 4848 145
THL1120_H_dinops_Gatokae.fq.gz 10.65 36 3212 145
THL1121_H_dinops_Gatokae.fq.gz 9.18 35 6874 145
THL1122_H_dinops_Gatokae.fq.gz 43.75 36 288 145
THL1154_H_demissus_Makira.fq.gz 15.44 36 5201 145
THL1167_H_diadema_Guadalcanal.fq.gz 38.43 37 1379 145
THL1221_H_diadema_Gatokae.fq.gz 39.53 40 43 145
THL1223_H_dinops_Guadalcanal.fq.gz 14.46 36 6655 145