library(gridExtra)
library(knitr)
library(ggplot2)
library(fastqcr)
#must run this if fastqc is not already installed locally
#fastqc_install()
###ONLY THIS CHUNK REQUIRES MODIFICATION###
###assign your directory locations here:

#specify full path to directory containing a .fastq.gz file for each sample
fq.dir<-"/home/d669d153/work/hipposideros/fastq"

#specify full path to the output directory where you want 
qc.dir<-"/home/d669d153/work/hipposideros/qc"
#run fastqc on all .fastq.gz files, through r
#This only needs to be run once, if only tweaking downstream visualizations, you can comment out this step
fastqc(fq.dir = fq.dir, # FASTQ files directory
       qc.dir = qc.dir, # Results directory
       threads = 4      # Number of threads
       )
# List of files in the output directory to ensure fastqc worked
list.files(qc.dir)

[1] “908108_H_diadema_Gatokae_fastqc.html”
[2] “908108_H_diadema_Gatokae_fastqc.zip”
[3] “908150_H_dinops_Guadalcanal_fastqc.html”
[4] “908150_H_dinops_Guadalcanal_fastqc.zip”
[5] “908151_H_diadema_Guadalcanal_fastqc.html”
[6] “908151_H_diadema_Guadalcanal_fastqc.zip”
[7] “908152_H_diadema_Guadalcanal_fastqc.html”
[8] “908152_H_diadema_Guadalcanal_fastqc.zip”
[9] “908153a_H_dinops_Guadalcanal_fastqc.html”
[10] “908153a_H_dinops_Guadalcanal_fastqc.zip”
[11] “908154_H_dinops_Guadalcanal_fastqc.html”
[12] “908154_H_dinops_Guadalcanal_fastqc.zip”
[13] “908155_H_dinops_Guadalcanal_fastqc.html”
[14] “908155_H_dinops_Guadalcanal_fastqc.zip”
[15] “908156_H_diadema_Guadalcanal_fastqc.html”
[16] “908156_H_diadema_Guadalcanal_fastqc.zip”
[17] “908208_H_diadema_Guadalcanal_fastqc.html”
[18] “908208_H_diadema_Guadalcanal_fastqc.zip”
[19] “JM19686_H_diadema_Choiseul_fastqc.html”
[20] “JM19686_H_diadema_Choiseul_fastqc.zip”
[21] “KO-P3_S3_R1_001_fastqc.html”
[22] “KO-P3_S3_R1_001_fastqc.zip”
[23] “KVO150_H_diadema_Isabel_fastqc.html”
[24] “KVO150_H_diadema_Isabel_fastqc.zip”
[25] “KVO168_H_diadema_Isabel_fastqc.html”
[26] “KVO168_H_diadema_Isabel_fastqc.zip”
[27] “KVO169_H_diadema_Isabel_fastqc.html”
[28] “KVO169_H_diadema_Isabel_fastqc.zip”
[29] “KVO170_H_diadema_Isabel_fastqc.html”
[30] “KVO170_H_diadema_Isabel_fastqc.zip”
[31] “KVO171_H_diadema_Isabel_fastqc.html”
[32] “KVO171_H_diadema_Isabel_fastqc.zip”
[33] “KVO172_H_diadema_Isabel_fastqc.html”
[34] “KVO172_H_diadema_Isabel_fastqc.zip”
[35] “KVO242_H_dinops_Rendova_fastqc.html”
[36] “KVO242_H_dinops_Rendova_fastqc.zip”
[37] “KVO243_H_dinops_Rendova_fastqc.html”
[38] “KVO243_H_dinops_Rendova_fastqc.zip”
[39] “KVO245_H_dinops_Rendova_fastqc.html”
[40] “KVO245_H_dinops_Rendova_fastqc.zip”
[41] “KVO246_H_dinops_Rendova_fastqc.html”
[42] “KVO246_H_dinops_Rendova_fastqc.zip”
[43] “KVO248_H_diadema_Rendova_fastqc.html”
[44] “KVO248_H_diadema_Rendova_fastqc.zip”
[45] “THL1048_H_dinops_Guadalcanal_fastqc.html”
[46] “THL1048_H_dinops_Guadalcanal_fastqc.zip”
[47] “THL1120_H_dinops_Gatokae_fastqc.html”
[48] “THL1120_H_dinops_Gatokae_fastqc.zip”
[49] “THL1121_H_dinops_Gatokae_fastqc.html”
[50] “THL1121_H_dinops_Gatokae_fastqc.zip”
[51] “THL1122_H_dinops_Gatokae_fastqc.html”
[52] “THL1122_H_dinops_Gatokae_fastqc.zip”
[53] “THL1154_H_demissus_Makira_fastqc.html”
[54] “THL1154_H_demissus_Makira_fastqc.zip”
[55] “THL1156_H_demissus_Makira_fastqc.html”
[56] “THL1156_H_demissus_Makira_fastqc.zip”
[57] “THL1167_H_diadema_Guadalcanal_fastqc.html” [58] “THL1167_H_diadema_Guadalcanal_fastqc.zip”
[59] “THL1172_H_dinops_Guadalcanal_fastqc.html”
[60] “THL1172_H_dinops_Guadalcanal_fastqc.zip”
[61] “THL1173_H_dinops_Guadalcanal_fastqc.html”
[62] “THL1173_H_dinops_Guadalcanal_fastqc.zip”
[63] “THL1221_H_diadema_Gatokae_fastqc.html”
[64] “THL1221_H_diadema_Gatokae_fastqc.zip”
[65] “THL1223_H_dinops_Guadalcanal_fastqc.html”
[66] “THL1223_H_dinops_Guadalcanal_fastqc.zip”
[67] “THL17193_H_diadema_Ngella_fastqc.html”
[68] “THL17193_H_diadema_Ngella_fastqc.zip”
[69] “THL17194_H_diadema_Ngella_fastqc.html”
[70] “THL17194_H_diadema_Ngella_fastqc.zip”
[71] “THL17195_H_diadema_Ngella_fastqc.html”
[72] “THL17195_H_diadema_Ngella_fastqc.zip”
[73] “THL17197_H_diadema_Ngella_fastqc.html”
[74] “THL17197_H_diadema_Ngella_fastqc.zip”
[75] “THL17198_H_diadema_Ngella_fastqc.html”
[76] “THL17198_H_diadema_Ngella_fastqc.zip”
[77] “THL17199_H_diadema_Ngella_fastqc.html”
[78] “THL17199_H_diadema_Ngella_fastqc.zip”
[79] “WD1705_H_diadema_E_New_Britain_fastqc.html” [80] “WD1705_H_diadema_E_New_Britain_fastqc.zip” [81] “WD2047_H_diadema_Simbu_Prov_fastqc.html”
[82] “WD2047_H_diadema_Simbu_Prov_fastqc.zip”
[83] “WD2074_H_diadema_Gulf_Prov_fastqc.html”
[84] “WD2074_H_diadema_Gulf_Prov_fastqc.zip”

#create a character vector where each value is the full path to the .zip created by fastqc() for a given sample
samps<-list.files(qc.dir, full.names = T, pattern = "*.zip")

#plot qc test results for each sample
for (i in samps){
  #read info for given sample from the .zip file generated in the previous step
  samp.info <- qc_read(i)
  #open blank list to hold qc visualizations for the given sample
  plot<-list()
  #do qc for the given sample
  plot[[1]]<-qc_plot(samp.info, "Basic statistics")
  plot[[2]]<-qc_plot(samp.info, "Per sequence quality scores")
  plot[[3]]<-qc_plot(samp.info, "Sequence duplication levels")
  #visualize tables
  print(paste0("QC results for sample ", gsub(".*/", "", i)))

  cat('\n')

  print(kable(plot[[1]]))

  cat('\n')

  #visualize plots
  grid.arrange(plot[[2]],plot[[3]],
               ncol=2)
  
  #clear plot to hold info for next sample
  rm(plot)
}

[1] “QC results for sample 908108_H_diadema_Gatokae_fastqc.zip”

Measure Value
Filename 908108_H_diadema_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 456195
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample 908150_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908150_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 123287
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample 908151_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908151_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3189301
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample 908152_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908152_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1388568
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908153a_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908153a_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 7332578
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908154_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908154_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 521388
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908155_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename 908155_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 989397
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908156_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908156_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4283313
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample 908208_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename 908208_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 769417
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample JM19686_H_diadema_Choiseul_fastqc.zip”

Measure Value
Filename JM19686_H_diadema_Choiseul.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 5088
Sequences flagged as poor quality 0
Sequence length 145
%GC 35

[1] “QC results for sample KO-P3_S3_R1_001_fastqc.zip”

Measure Value
Filename KO-P3_S3_R1_001.fastq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 122147979
Sequences flagged as poor quality 0
Sequence length 151
%GC 37

[1] “QC results for sample KVO150_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO150_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 34578
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO168_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO168_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 266025
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO169_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO169_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 499713
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO170_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO170_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 825167
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO171_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO171_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 271596
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO172_H_diadema_Isabel_fastqc.zip”

Measure Value
Filename KVO172_H_diadema_Isabel.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 98438
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO242_H_dinops_Rendova_fastqc.zip”

Measure Value
Filename KVO242_H_dinops_Rendova.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1355358
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample KVO243_H_dinops_Rendova_fastqc.zip”

Measure Value
Filename KVO243_H_dinops_Rendova.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1797464
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample KVO245_H_dinops_Rendova_fastqc.zip”

Measure Value
Filename KVO245_H_dinops_Rendova.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4204013
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO246_H_dinops_Rendova_fastqc.zip”

Measure Value
Filename KVO246_H_dinops_Rendova.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2153670
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample KVO248_H_diadema_Rendova_fastqc.zip”

Measure Value
Filename KVO248_H_diadema_Rendova.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1799491
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1048_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1048_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 4848
Sequences flagged as poor quality 0
Sequence length 145
%GC 35

[1] “QC results for sample THL1120_H_dinops_Gatokae_fastqc.zip”

Measure Value
Filename THL1120_H_dinops_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3212
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1121_H_dinops_Gatokae_fastqc.zip”

Measure Value
Filename THL1121_H_dinops_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6874
Sequences flagged as poor quality 0
Sequence length 145
%GC 35

[1] “QC results for sample THL1122_H_dinops_Gatokae_fastqc.zip”

Measure Value
Filename THL1122_H_dinops_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 288
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1154_H_demissus_Makira_fastqc.zip”

Measure Value
Filename THL1154_H_demissus_Makira.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6361
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1156_H_demissus_Makira_fastqc.zip”

Measure Value
Filename THL1156_H_demissus_Makira.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 819353
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL1167_H_diadema_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1167_H_diadema_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1409
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL1172_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1172_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 173703
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample THL1173_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1173_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 979903
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL1221_H_diadema_Gatokae_fastqc.zip”

Measure Value
Filename THL1221_H_diadema_Gatokae.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 2435
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL1223_H_dinops_Guadalcanal_fastqc.zip”

Measure Value
Filename THL1223_H_dinops_Guadalcanal.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6655
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample THL17193_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17193_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 3046762
Sequences flagged as poor quality 0
Sequence length 145
%GC 38

[1] “QC results for sample THL17194_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17194_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1218936
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17195_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17195_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1825457
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17197_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17197_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1551531
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17198_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17198_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 6622220
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample THL17199_H_diadema_Ngella_fastqc.zip”

Measure Value
Filename THL17199_H_diadema_Ngella.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1508601
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample WD1705_H_diadema_E_New_Britain_fastqc.zip”

Measure Value
Filename WD1705_H_diadema_E_New_Britain.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 1336956
Sequences flagged as poor quality 0
Sequence length 145
%GC 37

[1] “QC results for sample WD2047_H_diadema_Simbu_Prov_fastqc.zip”

Measure Value
Filename WD2047_H_diadema_Simbu_Prov.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 704726
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

[1] “QC results for sample WD2074_H_diadema_Gulf_Prov_fastqc.zip”

Measure Value
Filename WD2074_H_diadema_Gulf_Prov.fq.gz
File type Conventional base calls
Encoding Sanger / Illumina 1.9
Total Sequences 951142
Sequences flagged as poor quality 0
Sequence length 145
%GC 36

#aggregate the reports by pointing this function to the folder holding output of fastqc()
qc <- qc_aggregate(qc.dir, progressbar = F)

#stats per sample
knitr::kable(qc_stats(qc))
sample pct.dup pct.gc tot.seq seq.length
908108_H_diadema_Gatokae.fq.gz 84.89 36 456195 145
908150_H_dinops_Guadalcanal.fq.gz 75.37 38 123287 145
908151_H_diadema_Guadalcanal.fq.gz 94.29 38 3189301 145
908152_H_diadema_Guadalcanal.fq.gz 89.86 37 1388568 145
908153a_H_dinops_Guadalcanal.fq.gz 94.54 37 7332578 145
908154_H_dinops_Guadalcanal.fq.gz 84.85 37 521388 145
908155_H_dinops_Guadalcanal.fq.gz 88.96 37 989397 145
908156_H_diadema_Guadalcanal.fq.gz 94.72 37 4283313 145
908208_H_diadema_Guadalcanal.fq.gz 89.66 37 769417 145
JM19686_H_diadema_Choiseul.fq.gz 13.09 35 5088 145
KO-P3_S3_R1_001 91.78 37 122147979 151
KVO150_H_diadema_Isabel.fq.gz 58.84 36 34578 145
KVO168_H_diadema_Isabel.fq.gz 79.97 36 266025 145
KVO169_H_diadema_Isabel.fq.gz 86.28 36 499713 145
KVO170_H_diadema_Isabel.fq.gz 89.21 36 825167 145
KVO171_H_diadema_Isabel.fq.gz 81.79 36 271596 145
KVO172_H_diadema_Isabel.fq.gz 70.24 36 98438 145
KVO242_H_dinops_Rendova.fq.gz 91.85 37 1355358 145
KVO243_H_dinops_Rendova.fq.gz 92.18 37 1797464 145
KVO245_H_dinops_Rendova.fq.gz 94.10 36 4204013 145
KVO246_H_dinops_Rendova.fq.gz 91.61 36 2153670 145
KVO248_H_diadema_Rendova.fq.gz 91.09 36 1799491 145
THL1048_H_dinops_Guadalcanal.fq.gz 35.68 35 4848 145
THL1120_H_dinops_Gatokae.fq.gz 10.65 36 3212 145
THL1121_H_dinops_Gatokae.fq.gz 9.18 35 6874 145
THL1122_H_dinops_Gatokae.fq.gz 43.75 36 288 145
THL1154_H_demissus_Makira.fq.gz 16.62 36 6361 145
THL1156_H_demissus_Makira.fq.gz 88.94 36 819353 145
THL1167_H_diadema_Guadalcanal.fq.gz 37.83 37 1409 145
THL1172_H_dinops_Guadalcanal.fq.gz 77.77 38 173703 145
THL1173_H_dinops_Guadalcanal.fq.gz 88.50 37 979903 145
THL1221_H_diadema_Gatokae.fq.gz 39.92 37 2435 145
THL1223_H_dinops_Guadalcanal.fq.gz 14.46 36 6655 145
THL17193_H_diadema_Ngella.fq.gz 94.22 38 3046762 145
THL17194_H_diadema_Ngella.fq.gz 90.47 37 1218936 145
THL17195_H_diadema_Ngella.fq.gz 92.44 37 1825457 145
THL17197_H_diadema_Ngella.fq.gz 91.79 37 1551531 145
THL17198_H_diadema_Ngella.fq.gz 95.61 37 6622220 145
THL17199_H_diadema_Ngella.fq.gz 91.37 37 1508601 145
WD1705_H_diadema_E_New_Britain.fq.gz 90.71 37 1336956 145
WD2047_H_diadema_Simbu_Prov.fq.gz 85.78 36 704726 145
WD2074_H_diadema_Gulf_Prov.fq.gz 88.11 36 951142 145

solid red line = median sample value

dashed red line = 10% of median sample value

#save stats info as an object
stats.info<-qc_stats(qc)
#make tot.seq numeric
stats.info$tot.seq<-as.numeric(stats.info$tot.seq)

#make histogram of number of sequence reads for each sample
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=20)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#solid red line = median sample value
#dashed red line = 10% of median sample value
ggplot(stats.info, aes(x=tot.seq))+
              geom_histogram(color="black", fill="white", bins=200)+
              geom_vline(aes(xintercept=median(tot.seq)), color = "red")+
              geom_vline(aes(xintercept=median(tot.seq)*.1), color = "red", lty=14)+
              theme_classic()+
              xlab("Number of sequencing reads")

#show me the samples that have less than 10% of the number of reads as the median sample from this experiment (these should be dropped immediately)
print(paste("Median sample contains", median(stats.info$tot.seq), "reads. The following samples contain less than", median(stats.info$tot.seq)*.1, "reads (10% of the median), and should likely be dropped"))

[1] “Median sample contains 822260 reads. The following samples contain less than 82226 reads (10% of the median), and should likely be dropped”

knitr::kable(stats.info[stats.info$tot.seq < median(stats.info$tot.seq)*.1,])
sample pct.dup pct.gc tot.seq seq.length
JM19686_H_diadema_Choiseul.fq.gz 13.09 35 5088 145
KVO150_H_diadema_Isabel.fq.gz 58.84 36 34578 145
THL1048_H_dinops_Guadalcanal.fq.gz 35.68 35 4848 145
THL1120_H_dinops_Gatokae.fq.gz 10.65 36 3212 145
THL1121_H_dinops_Gatokae.fq.gz 9.18 35 6874 145
THL1122_H_dinops_Gatokae.fq.gz 43.75 36 288 145
THL1154_H_demissus_Makira.fq.gz 16.62 36 6361 145
THL1167_H_diadema_Guadalcanal.fq.gz 37.83 37 1409 145
THL1221_H_diadema_Gatokae.fq.gz 39.92 37 2435 145
THL1223_H_dinops_Guadalcanal.fq.gz 14.46 36 6655 145