This vignette shows the relative filtering time of the hard_filter() function from SNPfiltR versus the same filtering approach using VCFtools. I repeated this procedure and found little difference between compressed (.gz) vcf input and standard vcf input, so the comparisons shown here are done with gzipped vcf files as input. All of the input vcf files necessary to fully reproduce this performance comparison can be found here

We are going to do performance benchmarking on the hard_filter() function from the SNPfiltR package, against the same filtering approach implemented in VCFtools. I have already generated vcf files with 500K, 400K, 300K, 200K, 100K, 50K, 20K, and 10K SNPs, (each with 100 individuals) in order to compare performance across file sizes. For each vcf file, we will use the R package microbenchmark to benchmark the time it takes to:

1) Read in the vcf using vcfR::read.vcfR() and use SNPfiltR::hard_filter() to filter the file to a minimum depth of 5 per genotype and a minimum quality of 30 per genotype.
2) Use SNPfiltR::hard_filter() to filter the file to a minimum depth of 5 per genotype and a minimum quality of 30 per genotype, with the vcfR object already read in.

Using microbenchmark, I will execute three replicates for each approach, and record the mean value of the three replicates. Then I will execute the same filter three times (minimum depth = 5, minimum gq =30) using VCFtools, and record the mean of the three replicates.

After doing this for each vcf file, we should get a sense of the performance of the SNPfiltR package relative to the state of the art program VCFtools, and how each program scales with input file size.

library(SNPfiltR)
## This is SNPfiltR v.1.0.1
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.14.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Benchmark 10K

#10K

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 10K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.10K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 10000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 10000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 10000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant: 10000
## All variants processed
## 32.58% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 10000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 10000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 10000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant: 10000
## All variants processed
## 32.58% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 10000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 10000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 10000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant: 10000
## All variants processed
## 32.58% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 10K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.10K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 10000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 10000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 10000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant: 10000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 20K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 32.58% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## 32.58% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## 32.58% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(y,z)

Benchmark 20K

#20K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 20K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.20K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 20000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 20000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 20000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant: 20000
## All variants processed
## 30.09% of genotypes fall below a read depth of 5 and were converted to NA
## 1.43% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 20000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 20000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 20000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant: 20000
## All variants processed
## 30.09% of genotypes fall below a read depth of 5 and were converted to NA
## 1.43% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 20000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 20000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 20000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant: 20000
## All variants processed
## 30.09% of genotypes fall below a read depth of 5 and were converted to NA
## 1.43% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 20K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.20K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 20000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 20000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 20000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant: 20000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 20K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 30.09% of genotypes fall below a read depth of 5 and were converted to NA
## 1.43% of genotypes fall below a genotype quality of 30 and were converted to NA
## 30.09% of genotypes fall below a read depth of 5 and were converted to NA
## 1.43% of genotypes fall below a genotype quality of 30 and were converted to NA
## 30.09% of genotypes fall below a read depth of 5 and were converted to NA
## 1.43% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

Benchmark 50K

#50K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 50K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.50K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 50000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 50000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 50000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant: 50000
## All variants processed
## 31.18% of genotypes fall below a read depth of 5 and were converted to NA
## 1.37% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 50000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 50000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 50000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant: 50000
## All variants processed
## 31.18% of genotypes fall below a read depth of 5 and were converted to NA
## 1.37% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 50000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 50000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 50000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant: 50000
## All variants processed
## 31.18% of genotypes fall below a read depth of 5 and were converted to NA
## 1.37% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 50K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.50K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 50000
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 50000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 50000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant: 50000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 50K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 31.18% of genotypes fall below a read depth of 5 and were converted to NA
## 1.37% of genotypes fall below a genotype quality of 30 and were converted to NA
## 31.18% of genotypes fall below a read depth of 5 and were converted to NA
## 1.37% of genotypes fall below a genotype quality of 30 and were converted to NA
## 31.18% of genotypes fall below a read depth of 5 and were converted to NA
## 1.37% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

Benchmark 100K

#100K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 100K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.100K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 1e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 100000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 100000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant: 100000
## All variants processed
## 30.89% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 1e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 100000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 100000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant: 100000
## All variants processed
## 30.89% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 1e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 100000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 100000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant: 100000
## All variants processed
## 30.89% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 100K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.100K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 1e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 100000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 100000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant: 100000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 100K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 30.89% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## 30.89% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
## 30.89% of genotypes fall below a read depth of 5 and were converted to NA
## 1.36% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

Benchmark 200K

#200K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 200K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.200K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 2e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 200000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 200000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant: 200000
## All variants processed
## 29.5% of genotypes fall below a read depth of 5 and were converted to NA
## 1.28% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 2e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 200000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 200000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant: 200000
## All variants processed
## 29.5% of genotypes fall below a read depth of 5 and were converted to NA
## 1.28% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 2e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 200000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 200000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant: 200000
## All variants processed
## 29.5% of genotypes fall below a read depth of 5 and were converted to NA
## 1.28% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 200K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.200K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 2e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 200000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 200000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant: 200000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 200K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 29.5% of genotypes fall below a read depth of 5 and were converted to NA
## 1.28% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.5% of genotypes fall below a read depth of 5 and were converted to NA
## 1.28% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.5% of genotypes fall below a read depth of 5 and were converted to NA
## 1.28% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

Benchmark 300K

#300K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 300K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.300K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 3e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 300000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 300000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant: 300000
## All variants processed
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.29% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 3e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 300000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 300000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant: 300000
## All variants processed
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.29% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 3e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 300000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 300000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant: 300000
## All variants processed
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.29% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 300K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.300K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 3e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 300000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 300000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant: 300000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 300K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.29% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.29% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.29% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

#Benchmark 400K

#400K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 400K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.400K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 4e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 400000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 400000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant: 400000
## All variants processed
## 29.2% of genotypes fall below a read depth of 5 and were converted to NA
## 1.31% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 4e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 400000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 400000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant: 400000
## All variants processed
## 29.2% of genotypes fall below a read depth of 5 and were converted to NA
## 1.31% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 4e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 400000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 400000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant: 400000
## All variants processed
## 29.2% of genotypes fall below a read depth of 5 and were converted to NA
## 1.31% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 400K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.400K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 4e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 400000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 400000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant: 400000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 400K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 29.2% of genotypes fall below a read depth of 5 and were converted to NA
## 1.31% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.2% of genotypes fall below a read depth of 5 and were converted to NA
## 1.31% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.2% of genotypes fall below a read depth of 5 and were converted to NA
## 1.31% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

Benchmark 500K

#500K
#remove objects currently in the working directory to avoid internal memory issues slowing down the functions and affecting benchmarking accuracy
rm(x,y,z)

#now benchmark with having to read in the file itself
y<-microbenchmark(
  #benchmark 500K while having to read in the vcf file
  hard_filter(vcfR = read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.500K.vcf.gz"),depth = 5,gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 5e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 500000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 500000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant 401000
Processed variant 402000
Processed variant 403000
Processed variant 404000
Processed variant 405000
Processed variant 406000
Processed variant 407000
Processed variant 408000
Processed variant 409000
Processed variant 410000
Processed variant 411000
Processed variant 412000
Processed variant 413000
Processed variant 414000
Processed variant 415000
Processed variant 416000
Processed variant 417000
Processed variant 418000
Processed variant 419000
Processed variant 420000
Processed variant 421000
Processed variant 422000
Processed variant 423000
Processed variant 424000
Processed variant 425000
Processed variant 426000
Processed variant 427000
Processed variant 428000
Processed variant 429000
Processed variant 430000
Processed variant 431000
Processed variant 432000
Processed variant 433000
Processed variant 434000
Processed variant 435000
Processed variant 436000
Processed variant 437000
Processed variant 438000
Processed variant 439000
Processed variant 440000
Processed variant 441000
Processed variant 442000
Processed variant 443000
Processed variant 444000
Processed variant 445000
Processed variant 446000
Processed variant 447000
Processed variant 448000
Processed variant 449000
Processed variant 450000
Processed variant 451000
Processed variant 452000
Processed variant 453000
Processed variant 454000
Processed variant 455000
Processed variant 456000
Processed variant 457000
Processed variant 458000
Processed variant 459000
Processed variant 460000
Processed variant 461000
Processed variant 462000
Processed variant 463000
Processed variant 464000
Processed variant 465000
Processed variant 466000
Processed variant 467000
Processed variant 468000
Processed variant 469000
Processed variant 470000
Processed variant 471000
Processed variant 472000
Processed variant 473000
Processed variant 474000
Processed variant 475000
Processed variant 476000
Processed variant 477000
Processed variant 478000
Processed variant 479000
Processed variant 480000
Processed variant 481000
Processed variant 482000
Processed variant 483000
Processed variant 484000
Processed variant 485000
Processed variant 486000
Processed variant 487000
Processed variant 488000
Processed variant 489000
Processed variant 490000
Processed variant 491000
Processed variant 492000
Processed variant 493000
Processed variant 494000
Processed variant 495000
Processed variant 496000
Processed variant 497000
Processed variant 498000
Processed variant 499000
Processed variant 500000
Processed variant: 500000
## All variants processed
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.32% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 5e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 500000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 500000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant 401000
Processed variant 402000
Processed variant 403000
Processed variant 404000
Processed variant 405000
Processed variant 406000
Processed variant 407000
Processed variant 408000
Processed variant 409000
Processed variant 410000
Processed variant 411000
Processed variant 412000
Processed variant 413000
Processed variant 414000
Processed variant 415000
Processed variant 416000
Processed variant 417000
Processed variant 418000
Processed variant 419000
Processed variant 420000
Processed variant 421000
Processed variant 422000
Processed variant 423000
Processed variant 424000
Processed variant 425000
Processed variant 426000
Processed variant 427000
Processed variant 428000
Processed variant 429000
Processed variant 430000
Processed variant 431000
Processed variant 432000
Processed variant 433000
Processed variant 434000
Processed variant 435000
Processed variant 436000
Processed variant 437000
Processed variant 438000
Processed variant 439000
Processed variant 440000
Processed variant 441000
Processed variant 442000
Processed variant 443000
Processed variant 444000
Processed variant 445000
Processed variant 446000
Processed variant 447000
Processed variant 448000
Processed variant 449000
Processed variant 450000
Processed variant 451000
Processed variant 452000
Processed variant 453000
Processed variant 454000
Processed variant 455000
Processed variant 456000
Processed variant 457000
Processed variant 458000
Processed variant 459000
Processed variant 460000
Processed variant 461000
Processed variant 462000
Processed variant 463000
Processed variant 464000
Processed variant 465000
Processed variant 466000
Processed variant 467000
Processed variant 468000
Processed variant 469000
Processed variant 470000
Processed variant 471000
Processed variant 472000
Processed variant 473000
Processed variant 474000
Processed variant 475000
Processed variant 476000
Processed variant 477000
Processed variant 478000
Processed variant 479000
Processed variant 480000
Processed variant 481000
Processed variant 482000
Processed variant 483000
Processed variant 484000
Processed variant 485000
Processed variant 486000
Processed variant 487000
Processed variant 488000
Processed variant 489000
Processed variant 490000
Processed variant 491000
Processed variant 492000
Processed variant 493000
Processed variant 494000
Processed variant 495000
Processed variant 496000
Processed variant 497000
Processed variant 498000
Processed variant 499000
Processed variant 500000
Processed variant: 500000
## All variants processed
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.32% of genotypes fall below a genotype quality of 30 and were converted to NA
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 5e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 500000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 500000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant 401000
Processed variant 402000
Processed variant 403000
Processed variant 404000
Processed variant 405000
Processed variant 406000
Processed variant 407000
Processed variant 408000
Processed variant 409000
Processed variant 410000
Processed variant 411000
Processed variant 412000
Processed variant 413000
Processed variant 414000
Processed variant 415000
Processed variant 416000
Processed variant 417000
Processed variant 418000
Processed variant 419000
Processed variant 420000
Processed variant 421000
Processed variant 422000
Processed variant 423000
Processed variant 424000
Processed variant 425000
Processed variant 426000
Processed variant 427000
Processed variant 428000
Processed variant 429000
Processed variant 430000
Processed variant 431000
Processed variant 432000
Processed variant 433000
Processed variant 434000
Processed variant 435000
Processed variant 436000
Processed variant 437000
Processed variant 438000
Processed variant 439000
Processed variant 440000
Processed variant 441000
Processed variant 442000
Processed variant 443000
Processed variant 444000
Processed variant 445000
Processed variant 446000
Processed variant 447000
Processed variant 448000
Processed variant 449000
Processed variant 450000
Processed variant 451000
Processed variant 452000
Processed variant 453000
Processed variant 454000
Processed variant 455000
Processed variant 456000
Processed variant 457000
Processed variant 458000
Processed variant 459000
Processed variant 460000
Processed variant 461000
Processed variant 462000
Processed variant 463000
Processed variant 464000
Processed variant 465000
Processed variant 466000
Processed variant 467000
Processed variant 468000
Processed variant 469000
Processed variant 470000
Processed variant 471000
Processed variant 472000
Processed variant 473000
Processed variant 474000
Processed variant 475000
Processed variant 476000
Processed variant 477000
Processed variant 478000
Processed variant 479000
Processed variant 480000
Processed variant 481000
Processed variant 482000
Processed variant 483000
Processed variant 484000
Processed variant 485000
Processed variant 486000
Processed variant 487000
Processed variant 488000
Processed variant 489000
Processed variant 490000
Processed variant 491000
Processed variant 492000
Processed variant 493000
Processed variant 494000
Processed variant 495000
Processed variant 496000
Processed variant 497000
Processed variant 498000
Processed variant 499000
Processed variant 500000
Processed variant: 500000
## All variants processed
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.32% of genotypes fall below a genotype quality of 30 and were converted to NA
#read in 500K benchmarking vcf
x<-read.vcfR("~/Desktop/benchmarking.vcfs/benchmark.500K.vcf.gz")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 5e+05
##   column count: 109
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 500000
##   Character matrix gt cols: 109
##   skip: 0
##   nrows: 500000
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant 251000
Processed variant 252000
Processed variant 253000
Processed variant 254000
Processed variant 255000
Processed variant 256000
Processed variant 257000
Processed variant 258000
Processed variant 259000
Processed variant 260000
Processed variant 261000
Processed variant 262000
Processed variant 263000
Processed variant 264000
Processed variant 265000
Processed variant 266000
Processed variant 267000
Processed variant 268000
Processed variant 269000
Processed variant 270000
Processed variant 271000
Processed variant 272000
Processed variant 273000
Processed variant 274000
Processed variant 275000
Processed variant 276000
Processed variant 277000
Processed variant 278000
Processed variant 279000
Processed variant 280000
Processed variant 281000
Processed variant 282000
Processed variant 283000
Processed variant 284000
Processed variant 285000
Processed variant 286000
Processed variant 287000
Processed variant 288000
Processed variant 289000
Processed variant 290000
Processed variant 291000
Processed variant 292000
Processed variant 293000
Processed variant 294000
Processed variant 295000
Processed variant 296000
Processed variant 297000
Processed variant 298000
Processed variant 299000
Processed variant 300000
Processed variant 301000
Processed variant 302000
Processed variant 303000
Processed variant 304000
Processed variant 305000
Processed variant 306000
Processed variant 307000
Processed variant 308000
Processed variant 309000
Processed variant 310000
Processed variant 311000
Processed variant 312000
Processed variant 313000
Processed variant 314000
Processed variant 315000
Processed variant 316000
Processed variant 317000
Processed variant 318000
Processed variant 319000
Processed variant 320000
Processed variant 321000
Processed variant 322000
Processed variant 323000
Processed variant 324000
Processed variant 325000
Processed variant 326000
Processed variant 327000
Processed variant 328000
Processed variant 329000
Processed variant 330000
Processed variant 331000
Processed variant 332000
Processed variant 333000
Processed variant 334000
Processed variant 335000
Processed variant 336000
Processed variant 337000
Processed variant 338000
Processed variant 339000
Processed variant 340000
Processed variant 341000
Processed variant 342000
Processed variant 343000
Processed variant 344000
Processed variant 345000
Processed variant 346000
Processed variant 347000
Processed variant 348000
Processed variant 349000
Processed variant 350000
Processed variant 351000
Processed variant 352000
Processed variant 353000
Processed variant 354000
Processed variant 355000
Processed variant 356000
Processed variant 357000
Processed variant 358000
Processed variant 359000
Processed variant 360000
Processed variant 361000
Processed variant 362000
Processed variant 363000
Processed variant 364000
Processed variant 365000
Processed variant 366000
Processed variant 367000
Processed variant 368000
Processed variant 369000
Processed variant 370000
Processed variant 371000
Processed variant 372000
Processed variant 373000
Processed variant 374000
Processed variant 375000
Processed variant 376000
Processed variant 377000
Processed variant 378000
Processed variant 379000
Processed variant 380000
Processed variant 381000
Processed variant 382000
Processed variant 383000
Processed variant 384000
Processed variant 385000
Processed variant 386000
Processed variant 387000
Processed variant 388000
Processed variant 389000
Processed variant 390000
Processed variant 391000
Processed variant 392000
Processed variant 393000
Processed variant 394000
Processed variant 395000
Processed variant 396000
Processed variant 397000
Processed variant 398000
Processed variant 399000
Processed variant 400000
Processed variant 401000
Processed variant 402000
Processed variant 403000
Processed variant 404000
Processed variant 405000
Processed variant 406000
Processed variant 407000
Processed variant 408000
Processed variant 409000
Processed variant 410000
Processed variant 411000
Processed variant 412000
Processed variant 413000
Processed variant 414000
Processed variant 415000
Processed variant 416000
Processed variant 417000
Processed variant 418000
Processed variant 419000
Processed variant 420000
Processed variant 421000
Processed variant 422000
Processed variant 423000
Processed variant 424000
Processed variant 425000
Processed variant 426000
Processed variant 427000
Processed variant 428000
Processed variant 429000
Processed variant 430000
Processed variant 431000
Processed variant 432000
Processed variant 433000
Processed variant 434000
Processed variant 435000
Processed variant 436000
Processed variant 437000
Processed variant 438000
Processed variant 439000
Processed variant 440000
Processed variant 441000
Processed variant 442000
Processed variant 443000
Processed variant 444000
Processed variant 445000
Processed variant 446000
Processed variant 447000
Processed variant 448000
Processed variant 449000
Processed variant 450000
Processed variant 451000
Processed variant 452000
Processed variant 453000
Processed variant 454000
Processed variant 455000
Processed variant 456000
Processed variant 457000
Processed variant 458000
Processed variant 459000
Processed variant 460000
Processed variant 461000
Processed variant 462000
Processed variant 463000
Processed variant 464000
Processed variant 465000
Processed variant 466000
Processed variant 467000
Processed variant 468000
Processed variant 469000
Processed variant 470000
Processed variant 471000
Processed variant 472000
Processed variant 473000
Processed variant 474000
Processed variant 475000
Processed variant 476000
Processed variant 477000
Processed variant 478000
Processed variant 479000
Processed variant 480000
Processed variant 481000
Processed variant 482000
Processed variant 483000
Processed variant 484000
Processed variant 485000
Processed variant 486000
Processed variant 487000
Processed variant 488000
Processed variant 489000
Processed variant 490000
Processed variant 491000
Processed variant 492000
Processed variant 493000
Processed variant 494000
Processed variant 495000
Processed variant 496000
Processed variant 497000
Processed variant 498000
Processed variant 499000
Processed variant 500000
Processed variant: 500000
## All variants processed
#now benchmark without having to read in the file itself
z<-microbenchmark(
  #benchmark 500K while having to read in the vcf file
  hard_filter(vcfR = x, depth = 5, gq = 30),
  #set number of reps and units as seconds
  times = 3, unit = "s"
)
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.32% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.32% of genotypes fall below a genotype quality of 30 and were converted to NA
## 29.3% of genotypes fall below a read depth of 5 and were converted to NA
## 1.32% of genotypes fall below a genotype quality of 30 and were converted to NA
#convert each to a dataframe
y<-summary(y)
z<-summary(z)

#add these results into the full dataframe
sum.df<-rbind(sum.df,y,z)

Now add informative columns to the dataframe

sum.df$approach<-rep(c("SNPfiltR+vcfR","SNPfiltR"), times = 8)

sum.df$SNPs<-c(10000,10000,20000,20000,50000,50000,100000,100000,
               200000,200000,300000,300000,400000,400000,500000,500000)

SNPfiltR.times<-sum.df[,c(9,10,4)]

Now time VCFtools performing the same filtering, in bash, three times for the 10K SNP dataset

VCFtools benchmark 10K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.10K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat > time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.10K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.10K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 20K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.20K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.20K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.20K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 50K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.50K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.50K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.50K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 100K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.100K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.100K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.100K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 200K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.200K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.200K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.200K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 300K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.300K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.300K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.300K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 400K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.400K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.400K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.400K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

VCFtools benchmark 500K

cd /Users/devder/Desktop/benchmarking.vcfs
{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.500K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.500K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

{ time /Users/devder/Downloads/vcftools/src/cpp/vcftools --gzvcf benchmark.500K.vcf.gz --minGQ 30 --minDP 5 --recode --recode-INFO-all ; } 2>&1 |  cat >> time.txt

clean output file with sed one-liner to keep only the lines beginning with “real” (which contain the time info for each replicate run)

cd /Users/devder/Desktop/benchmarking.vcfs
#use sed one-liner to get only the times of each run in a single file
sed -n -e '/^real/ p' time.txt > cleantimes.txt

read cleaned file into R and manipulate into a cohesive dataframe with our microbenchmark output dataframe

#read in VCFtools times
vcftools.times<-read.table("~/Desktop/benchmarking.vcfs/cleantimes.txt", sep = "\t")

#make column 1 informative about run conditions
vcftools.times$V1<-c(rep(10000, times=3),rep(20000, times=3),rep(50000, times=3),
                     rep(100000, times=3),rep(200000, times=3),rep(300000, times=3),
                     rep(400000, times=3),rep(500000, times=3))

#format column 2
vcftools.times$V2<-gsub("m", ":", vcftools.times$V2)
vcftools.times$V2<-gsub("s", "", vcftools.times$V2)
#vcftools.times$V2<-as.numeric(as.character(vcftools.times$V2))
#convert minutes:seconds to just seconds as.numeric for plotting
vcftools.times$V2<-lubridate::period_to_seconds(lubridate::ms(vcftools.times$V2))
#get the mean of the runtimes across all three replicates, for each vcf file
v<-aggregate(vcftools.times$V2, list(vcftools.times$V1), mean)

#add column tagging all of these times as coming from vcftools
v$approach<-rep("VCFtools", times=nrow(v))
#give informative column names
colnames(v)[1:2]<-c("SNPs","mean")
#reorder to match column order from microbenchmark
v<-v[,c(3,1,2)]
#rbind dataframe with the microbenchmark dataframe
benchmark.df<-rbind(SNPfiltR.times,v)
#make approach a factor for plotting
benchmark.df$approach<-as.factor(benchmark.df$approach)

#final product:
benchmark.df
##         approach  SNPs        mean
## 1  SNPfiltR+vcfR 1e+04   1.3929194
## 2       SNPfiltR 1e+04   0.4061834
## 3  SNPfiltR+vcfR 2e+04   2.7607792
## 4       SNPfiltR 2e+04   0.9804999
## 5  SNPfiltR+vcfR 5e+04   6.6884265
## 6       SNPfiltR 5e+04   2.6912465
## 7  SNPfiltR+vcfR 1e+05  15.1215112
## 8       SNPfiltR 1e+05   5.5519810
## 9  SNPfiltR+vcfR 2e+05  34.3970906
## 10      SNPfiltR 2e+05  13.4741584
## 11 SNPfiltR+vcfR 3e+05  48.4870002
## 12      SNPfiltR 3e+05  18.2342997
## 13 SNPfiltR+vcfR 4e+05  69.9663987
## 14      SNPfiltR 4e+05  28.6984442
## 15 SNPfiltR+vcfR 5e+05 101.2610619
## 16      SNPfiltR 5e+05  37.9307606
## 17      VCFtools 1e+04   1.8620000
## 18      VCFtools 2e+04   2.3426667
## 19      VCFtools 5e+04   5.1456667
## 20      VCFtools 1e+05  10.5826667
## 21      VCFtools 2e+05  22.0676667
## 22      VCFtools 3e+05  41.0583333
## 23      VCFtools 4e+05  50.1796667
## 24      VCFtools 5e+05  53.2986667
write.csv(benchmark.df, file="~/Desktop/benchmarking.vcfs/benchmark.info.csv", quote = F, row.names = F)

Compare runtimes

#visualize the comparative runtimes across SNPs and between approaches
ggplot(benchmark.df, aes(x = SNPs, y = mean, color = approach)) +
  geom_point(aes(fill=approach),size=3) +
  geom_line(aes(group = approach))+
  theme_classic()+
  ylab("mean runtime (seconds)")+
  theme(legend.position = c(0.2, 0.8))

runtime.plot<-ggplot(benchmark.df, aes(x = SNPs, y = mean, color = approach)) +
  geom_point(aes(fill=approach),size=3) +
  geom_line(aes(group = approach))+
  theme_classic()+
  ylab("mean runtime (seconds)")+
  theme(legend.position = c(0.2, 0.8))

ggsave(runtime.plot,
      filename = "~/Desktop/SNPfiltR.mol.ecol.resour.submission/comp.runtimes.pdf",
      height = 3, width = 4, units = "in")
  

#try it with a log10 scaled x axis
ggplot(benchmark.df, aes(x = SNPs, y = mean, color = approach)) +
  geom_point(aes(fill=approach),size=3) +
  geom_line(aes(group = approach))+
  scale_x_log10()+
  theme_classic()+
  ylab("mean runtime (seconds)")+
  theme(legend.position = c(0.2, 0.8))