library(SNPfiltR)
## This is SNPfiltR v.1.0.0
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
#read in vcf as vcfR
v<- read.vcfR("~/Desktop/phil.dicaeum/n8.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 250231
##   column count: 73
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 250231
##   Character matrix gt cols: 73
##   skip: 0
##   nrows: 250231
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant 58000
Processed variant 59000
Processed variant 60000
Processed variant 61000
Processed variant 62000
Processed variant 63000
Processed variant 64000
Processed variant 65000
Processed variant 66000
Processed variant 67000
Processed variant 68000
Processed variant 69000
Processed variant 70000
Processed variant 71000
Processed variant 72000
Processed variant 73000
Processed variant 74000
Processed variant 75000
Processed variant 76000
Processed variant 77000
Processed variant 78000
Processed variant 79000
Processed variant 80000
Processed variant 81000
Processed variant 82000
Processed variant 83000
Processed variant 84000
Processed variant 85000
Processed variant 86000
Processed variant 87000
Processed variant 88000
Processed variant 89000
Processed variant 90000
Processed variant 91000
Processed variant 92000
Processed variant 93000
Processed variant 94000
Processed variant 95000
Processed variant 96000
Processed variant 97000
Processed variant 98000
Processed variant 99000
Processed variant 100000
Processed variant 101000
Processed variant 102000
Processed variant 103000
Processed variant 104000
Processed variant 105000
Processed variant 106000
Processed variant 107000
Processed variant 108000
Processed variant 109000
Processed variant 110000
Processed variant 111000
Processed variant 112000
Processed variant 113000
Processed variant 114000
Processed variant 115000
Processed variant 116000
Processed variant 117000
Processed variant 118000
Processed variant 119000
Processed variant 120000
Processed variant 121000
Processed variant 122000
Processed variant 123000
Processed variant 124000
Processed variant 125000
Processed variant 126000
Processed variant 127000
Processed variant 128000
Processed variant 129000
Processed variant 130000
Processed variant 131000
Processed variant 132000
Processed variant 133000
Processed variant 134000
Processed variant 135000
Processed variant 136000
Processed variant 137000
Processed variant 138000
Processed variant 139000
Processed variant 140000
Processed variant 141000
Processed variant 142000
Processed variant 143000
Processed variant 144000
Processed variant 145000
Processed variant 146000
Processed variant 147000
Processed variant 148000
Processed variant 149000
Processed variant 150000
Processed variant 151000
Processed variant 152000
Processed variant 153000
Processed variant 154000
Processed variant 155000
Processed variant 156000
Processed variant 157000
Processed variant 158000
Processed variant 159000
Processed variant 160000
Processed variant 161000
Processed variant 162000
Processed variant 163000
Processed variant 164000
Processed variant 165000
Processed variant 166000
Processed variant 167000
Processed variant 168000
Processed variant 169000
Processed variant 170000
Processed variant 171000
Processed variant 172000
Processed variant 173000
Processed variant 174000
Processed variant 175000
Processed variant 176000
Processed variant 177000
Processed variant 178000
Processed variant 179000
Processed variant 180000
Processed variant 181000
Processed variant 182000
Processed variant 183000
Processed variant 184000
Processed variant 185000
Processed variant 186000
Processed variant 187000
Processed variant 188000
Processed variant 189000
Processed variant 190000
Processed variant 191000
Processed variant 192000
Processed variant 193000
Processed variant 194000
Processed variant 195000
Processed variant 196000
Processed variant 197000
Processed variant 198000
Processed variant 199000
Processed variant 200000
Processed variant 201000
Processed variant 202000
Processed variant 203000
Processed variant 204000
Processed variant 205000
Processed variant 206000
Processed variant 207000
Processed variant 208000
Processed variant 209000
Processed variant 210000
Processed variant 211000
Processed variant 212000
Processed variant 213000
Processed variant 214000
Processed variant 215000
Processed variant 216000
Processed variant 217000
Processed variant 218000
Processed variant 219000
Processed variant 220000
Processed variant 221000
Processed variant 222000
Processed variant 223000
Processed variant 224000
Processed variant 225000
Processed variant 226000
Processed variant 227000
Processed variant 228000
Processed variant 229000
Processed variant 230000
Processed variant 231000
Processed variant 232000
Processed variant 233000
Processed variant 234000
Processed variant 235000
Processed variant 236000
Processed variant 237000
Processed variant 238000
Processed variant 239000
Processed variant 240000
Processed variant 241000
Processed variant 242000
Processed variant 243000
Processed variant 244000
Processed variant 245000
Processed variant 246000
Processed variant 247000
Processed variant 248000
Processed variant 249000
Processed variant 250000
Processed variant: 250231
## All variants processed
#make popmap
pops<-read.csv("~/Desktop/phil.dicaeum/dicaeum.sampling.csv")
pops<-pops[pops$ID %in% colnames(v@gt),]
popmap<-pops[,c(1,4)]

#remove obviously contaminated sample from looking at the preliminary splitstree
v<-v[,colnames(v@gt)!= "D_hypoleucum_28663"]
#fix popmap
pops<-pops[pops$ID %in% colnames(v@gt),]
popmap<-pops[,c(1,4)]
#visualize distributions
hard_filter(v)
## no depth cutoff provided, exploratory visualization will be generated.

## no genotype quality cutoff provided, exploratory visualization will be generated.

## ***** Object of Class vcfR *****
## 63 samples
## 47564 CHROMs
## 250,231 variants
## Object size: 625.3 Mb
## 58.08 percent missing data
## *****        *****         *****
#hard filter to minimum depth of 5, and minimum genotype quality of 30
v<-hard_filter(v, depth = 5, gq = 35)
## 0.05% of genotypes fall below a read depth of 5 and were converted to NA
## 2.54% of genotypes fall below a genotype quality of 35 and were converted to NA
#execute allele balance filter
v<-filter_allele_balance(v)
## 16.48% of het genotypes (0.95% of all genotypes) fall outside of 0.25 - 0.75 allele balance ratio and were converted to NA

#visualize and pick appropriate max depth cutoff
max_depth(v)
## cutoff is not specified, exploratory visualization will be generated.

## dashed line indicates a mean depth across all SNPs of 72.8

#filter for max depth
max_depth(v, maxdepth = 150)
## maxdepth cutoff is specified, filtered vcfR object will be returned
## 16.31% of SNPs were above a mean depth of 150 and were removed from the vcf

## ***** Object of Class vcfR *****
## 63 samples
## 41978 CHROMs
## 209,408 variants
## Object size: 450.1 Mb
## 64.05 percent missing data
## *****        *****         *****
#remove invariant sites generated by filtering
v<-min_mac(v, min.mac = 1)
## 17.36% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#check vcfR to see how many SNPs we have left
v
## ***** Object of Class vcfR *****
## 63 samples
## 43982 CHROMs
## 206,800 variants
## Object size: 527.5 Mb
## 58.33 percent missing data
## *****        *****         *****
#investigate patterns of missingness per sample
missing_by_sample(v)
## No popmap provided

##                       indiv filt snps.retained
## 1        D_hypoleucum_18070  0.5         71578
## 2        D_hypoleucum_18191  0.5         73049
## 3        D_hypoleucum_14037  0.5         74014
## 4        D_hypoleucum_14079  0.5         60840
## 5        D_hypoleucum_14065  0.5         72256
## 6        D_hypoleucum_14120  0.5         46184
## 7        D_hypoleucum_17976  0.5         37744
## 8        D_hypoleucum_18159  0.5         72353
## 9        D_hypoleucum_14075  0.5         70540
## 10       D_hypoleucum_20218  0.5         67004
## 11       D_hypoleucum_19066  0.5         34890
## 12       D_hypoleucum_19177  0.5         72160
## 13       D_hypoleucum_19178  0.5         71258
## 14       D_hypoleucum_14146  0.5         36352
## 15       D_hypoleucum_20213  0.5         52551
## 16  D_hypoleucum_FMNH454949  0.5         55342
## 17       D_hypoleucum_25637  0.5         74455
## 18        D_hypoleucum_1271  0.5         71906
## 19        D_hypoleucum_1273  0.5         74371
## 20        D_hypoleucum_1275  0.5         59957
## 21       D_hypoleucum_25921  0.5         69190
## 22        D_hypoleucum_3274  0.5         68561
## 23       D_hypoleucum_26984  0.5         73388
## 24        D_hypoleucum_3208  0.5         74503
## 25        D_hypoleucum_3158  0.5         74560
## 26        D_hypoleucum_2253  0.5         63952
## 27        D_hypoleucum_3095  0.5         68611
## 28      D_hypoleucum_462070  0.5         73605
## 29       D_hypoleucum_25675  0.5         44555
## 30      D_hypoleucum_454950  0.5         73786
## 31       D_hypoleucum_25880  0.5         68333
## 32        D_hypoleucum_3314  0.5         72117
## 33      D_hypoleucum_357608  0.5         45510
## 34      D_hypoleucum_357615  0.5         75155
## 35      D_hypoleucum_357612  0.5         39822
## 36       D_hypoleucum_19638  0.5         66516
## 37       D_hypoleucum_25868  0.5         74066
## 38       D_hypoleucum_17969  0.5         68845
## 39       D_hypoleucum_25672  0.5         46130
## 40       D_hypoleucum_26975  0.5         64410
## 41        D_hypoleucum_2229  0.5         52940
## 42        D_hypoleucum_2067  0.5         75507
## 43       D_hypoleucum_28329  0.5         72370
## 44       D_hypoleucum_28361  0.5         61609
## 45       D_hypoleucum_28376  0.5         66034
## 46        D_hypoleucum_2208  0.5         75500
## 47       D_hypoleucum_28294  0.5         61791
## 48        D_hypoleucum_1956  0.5         76320
## 49       D_hypoleucum_25670  0.5         58783
## 50       D_hypoleucum_20921  0.5         75733
## 51       D_hypoleucum_20193  0.5         41715
## 52       D_hypoleucum_28416  0.5         70945
## 53       D_hypoleucum_18193  0.5         75608
## 54       D_hypoleucum_27450  0.5         74751
## 55       D_hypoleucum_19046  0.5         67654
## 56       D_hypoleucum_19136  0.5         69792
## 57       D_hypoleucum_28676  0.5         73799
## 58      D_nigrilore_KU28413  0.5         59445
## 59      D_nigrilore_KU28414  0.5         63208
## 60       D_hypoleucum_29945  0.5         74446
## 61       D_hypoleucum_31636  0.5         74750
## 62       D_hypoleucum_29951  0.5         72999
## 63       D_hypoleucum_31644  0.5         71622
## 64       D_hypoleucum_18070  0.6         63616
## 65       D_hypoleucum_18191  0.6         64388
## 66       D_hypoleucum_14037  0.6         64024
## 67       D_hypoleucum_14079  0.6         54176
## 68       D_hypoleucum_14065  0.6         62307
## 69       D_hypoleucum_14120  0.6         41210
## 70       D_hypoleucum_17976  0.6         33983
## 71       D_hypoleucum_18159  0.6         62882
## 72       D_hypoleucum_14075  0.6         60317
## 73       D_hypoleucum_20218  0.6         56835
## 74       D_hypoleucum_19066  0.6         31233
## 75       D_hypoleucum_19177  0.6         63991
## 76       D_hypoleucum_19178  0.6         62982
## 77       D_hypoleucum_14146  0.6         32573
## 78       D_hypoleucum_20213  0.6         47108
## 79  D_hypoleucum_FMNH454949  0.6         49611
## 80       D_hypoleucum_25637  0.6         64130
## 81        D_hypoleucum_1271  0.6         61842
## 82        D_hypoleucum_1273  0.6         65590
## 83        D_hypoleucum_1275  0.6         49600
## 84       D_hypoleucum_25921  0.6         58984
## 85        D_hypoleucum_3274  0.6         57740
## 86       D_hypoleucum_26984  0.6         63989
## 87        D_hypoleucum_3208  0.6         64870
## 88        D_hypoleucum_3158  0.6         65443
## 89        D_hypoleucum_2253  0.6         57370
## 90        D_hypoleucum_3095  0.6         61178
## 91      D_hypoleucum_462070  0.6         63547
## 92       D_hypoleucum_25675  0.6         39939
## 93      D_hypoleucum_454950  0.6         64997
## 94       D_hypoleucum_25880  0.6         57854
## 95        D_hypoleucum_3314  0.6         63991
## 96      D_hypoleucum_357608  0.6         40601
## 97      D_hypoleucum_357615  0.6         65342
## 98      D_hypoleucum_357612  0.6         36032
## 99       D_hypoleucum_19638  0.6         59324
## 100      D_hypoleucum_25868  0.6         65354
## 101      D_hypoleucum_17969  0.6         58527
## 102      D_hypoleucum_25672  0.6         41468
## 103      D_hypoleucum_26975  0.6         57578
## 104       D_hypoleucum_2229  0.6         47430
## 105       D_hypoleucum_2067  0.6         66059
## 106      D_hypoleucum_28329  0.6         63738
## 107      D_hypoleucum_28361  0.6         51416
## 108      D_hypoleucum_28376  0.6         56036
## 109       D_hypoleucum_2208  0.6         65821
## 110      D_hypoleucum_28294  0.6         51968
## 111       D_hypoleucum_1956  0.6         66784
## 112      D_hypoleucum_25670  0.6         52767
## 113      D_hypoleucum_20921  0.6         66196
## 114      D_hypoleucum_20193  0.6         37341
## 115      D_hypoleucum_28416  0.6         61005
## 116      D_hypoleucum_18193  0.6         65329
## 117      D_hypoleucum_27450  0.6         65793
## 118      D_hypoleucum_19046  0.6         57096
## 119      D_hypoleucum_19136  0.6         59218
## 120      D_hypoleucum_28676  0.6         63096
## 121     D_nigrilore_KU28413  0.6         52730
## 122     D_nigrilore_KU28414  0.6         54986
## 123      D_hypoleucum_29945  0.6         65331
## 124      D_hypoleucum_31636  0.6         65145
## 125      D_hypoleucum_29951  0.6         62654
## 126      D_hypoleucum_31644  0.6         63346
## 127      D_hypoleucum_18070  0.7         46250
## 128      D_hypoleucum_18191  0.7         46917
## 129      D_hypoleucum_14037  0.7         45940
## 130      D_hypoleucum_14079  0.7         40450
## 131      D_hypoleucum_14065  0.7         44765
## 132      D_hypoleucum_14120  0.7         30839
## 133      D_hypoleucum_17976  0.7         25388
## 134      D_hypoleucum_18159  0.7         45366
## 135      D_hypoleucum_14075  0.7         43208
## 136      D_hypoleucum_20218  0.7         40280
## 137      D_hypoleucum_19066  0.7         23463
## 138      D_hypoleucum_19177  0.7         46811
## 139      D_hypoleucum_19178  0.7         46325
## 140      D_hypoleucum_14146  0.7         24959
## 141      D_hypoleucum_20213  0.7         35396
## 142 D_hypoleucum_FMNH454949  0.7         37435
## 143      D_hypoleucum_25637  0.7         45900
## 144       D_hypoleucum_1271  0.7         44130
## 145       D_hypoleucum_1273  0.7         47527
## 146       D_hypoleucum_1275  0.7         34457
## 147      D_hypoleucum_25921  0.7         42260
## 148       D_hypoleucum_3274  0.7         40688
## 149      D_hypoleucum_26984  0.7         45975
## 150       D_hypoleucum_3208  0.7         46547
## 151       D_hypoleucum_3158  0.7         47346
## 152       D_hypoleucum_2253  0.7         42719
## 153       D_hypoleucum_3095  0.7         44832
## 154     D_hypoleucum_462070  0.7         45405
## 155      D_hypoleucum_25675  0.7         30064
## 156     D_hypoleucum_454950  0.7         47221
## 157      D_hypoleucum_25880  0.7         40835
## 158       D_hypoleucum_3314  0.7         46664
## 159     D_hypoleucum_357608  0.7         30345
## 160     D_hypoleucum_357615  0.7         46858
## 161     D_hypoleucum_357612  0.7         27495
## 162      D_hypoleucum_19638  0.7         43688
## 163      D_hypoleucum_25868  0.7         47240
## 164      D_hypoleucum_17969  0.7         41687
## 165      D_hypoleucum_25672  0.7         31019
## 166      D_hypoleucum_26975  0.7         42627
## 167       D_hypoleucum_2229  0.7         35465
## 168       D_hypoleucum_2067  0.7         47550
## 169      D_hypoleucum_28329  0.7         46794
## 170      D_hypoleucum_28361  0.7         36628
## 171      D_hypoleucum_28376  0.7         40345
## 172       D_hypoleucum_2208  0.7         47613
## 173      D_hypoleucum_28294  0.7         36843
## 174       D_hypoleucum_1956  0.7         47959
## 175      D_hypoleucum_25670  0.7         39754
## 176      D_hypoleucum_20921  0.7         47391
## 177      D_hypoleucum_20193  0.7         27971
## 178      D_hypoleucum_28416  0.7         44200
## 179      D_hypoleucum_18193  0.7         46632
## 180      D_hypoleucum_27450  0.7         47454
## 181      D_hypoleucum_19046  0.7         40743
## 182      D_hypoleucum_19136  0.7         42205
## 183      D_hypoleucum_28676  0.7         45089
## 184     D_nigrilore_KU28413  0.7         39038
## 185     D_nigrilore_KU28414  0.7         39764
## 186      D_hypoleucum_29945  0.7         47370
## 187      D_hypoleucum_31636  0.7         46995
## 188      D_hypoleucum_29951  0.7         44864
## 189      D_hypoleucum_31644  0.7         46351
## 190      D_hypoleucum_18070  0.8         23008
## 191      D_hypoleucum_18191  0.8         23340
## 192      D_hypoleucum_14037  0.8         22784
## 193      D_hypoleucum_14079  0.8         21227
## 194      D_hypoleucum_14065  0.8         22274
## 195      D_hypoleucum_14120  0.8         16317
## 196      D_hypoleucum_17976  0.8         13692
## 197      D_hypoleucum_18159  0.8         22717
## 198      D_hypoleucum_14075  0.8         21843
## 199      D_hypoleucum_20218  0.8         20799
## 200      D_hypoleucum_19066  0.8         12903
## 201      D_hypoleucum_19177  0.8         23346
## 202      D_hypoleucum_19178  0.8         23085
## 203      D_hypoleucum_14146  0.8         13558
## 204      D_hypoleucum_20213  0.8         18965
## 205 D_hypoleucum_FMNH454949  0.8         19615
## 206      D_hypoleucum_25637  0.8         22793
## 207       D_hypoleucum_1271  0.8         22216
## 208       D_hypoleucum_1273  0.8         23477
## 209       D_hypoleucum_1275  0.8         17998
## 210      D_hypoleucum_25921  0.8         21498
## 211       D_hypoleucum_3274  0.8         20545
## 212      D_hypoleucum_26984  0.8         22952
## 213       D_hypoleucum_3208  0.8         23091
## 214       D_hypoleucum_3158  0.8         23255
## 215       D_hypoleucum_2253  0.8         22029
## 216       D_hypoleucum_3095  0.8         22559
## 217     D_hypoleucum_462070  0.8         22609
## 218      D_hypoleucum_25675  0.8         16111
## 219     D_hypoleucum_454950  0.8         23330
## 220      D_hypoleucum_25880  0.8         20945
## 221       D_hypoleucum_3314  0.8         23270
## 222     D_hypoleucum_357608  0.8         16081
## 223     D_hypoleucum_357615  0.8         23068
## 224     D_hypoleucum_357612  0.8         15103
## 225      D_hypoleucum_19638  0.8         22099
## 226      D_hypoleucum_25868  0.8         23429
## 227      D_hypoleucum_17969  0.8         21357
## 228      D_hypoleucum_25672  0.8         16416
## 229      D_hypoleucum_26975  0.8         21598
## 230       D_hypoleucum_2229  0.8         18202
## 231       D_hypoleucum_2067  0.8         23497
## 232      D_hypoleucum_28329  0.8         23158
## 233      D_hypoleucum_28361  0.8         19196
## 234      D_hypoleucum_28376  0.8         20922
## 235       D_hypoleucum_2208  0.8         23394
## 236      D_hypoleucum_28294  0.8         19190
## 237       D_hypoleucum_1956  0.8         23595
## 238      D_hypoleucum_25670  0.8         20870
## 239      D_hypoleucum_20921  0.8         23380
## 240      D_hypoleucum_20193  0.8         15025
## 241      D_hypoleucum_28416  0.8         22394
## 242      D_hypoleucum_18193  0.8         23043
## 243      D_hypoleucum_27450  0.8         23464
## 244      D_hypoleucum_19046  0.8         20824
## 245      D_hypoleucum_19136  0.8         21414
## 246      D_hypoleucum_28676  0.8         22688
## 247     D_nigrilore_KU28413  0.8         20070
## 248     D_nigrilore_KU28414  0.8         20117
## 249      D_hypoleucum_29945  0.8         23414
## 250      D_hypoleucum_31636  0.8         23227
## 251      D_hypoleucum_29951  0.8         22688
## 252      D_hypoleucum_31644  0.8         23044
## 253      D_hypoleucum_18070  0.9          4692
## 254      D_hypoleucum_18191  0.9          4760
## 255      D_hypoleucum_14037  0.9          4657
## 256      D_hypoleucum_14079  0.9          4482
## 257      D_hypoleucum_14065  0.9          4603
## 258      D_hypoleucum_14120  0.9          3788
## 259      D_hypoleucum_17976  0.9          3325
## 260      D_hypoleucum_18159  0.9          4668
## 261      D_hypoleucum_14075  0.9          4623
## 262      D_hypoleucum_20218  0.9          4525
## 263      D_hypoleucum_19066  0.9          3289
## 264      D_hypoleucum_19177  0.9          4752
## 265      D_hypoleucum_19178  0.9          4691
## 266      D_hypoleucum_14146  0.9          3429
## 267      D_hypoleucum_20213  0.9          4346
## 268 D_hypoleucum_FMNH454949  0.9          4390
## 269      D_hypoleucum_25637  0.9          4710
## 270       D_hypoleucum_1271  0.9          4613
## 271       D_hypoleucum_1273  0.9          4790
## 272       D_hypoleucum_1275  0.9          4118
## 273      D_hypoleucum_25921  0.9          4611
## 274       D_hypoleucum_3274  0.9          4449
## 275      D_hypoleucum_26984  0.9          4662
## 276       D_hypoleucum_3208  0.9          4712
## 277       D_hypoleucum_3158  0.9          4707
## 278       D_hypoleucum_2253  0.9          4638
## 279       D_hypoleucum_3095  0.9          4679
## 280     D_hypoleucum_462070  0.9          4658
## 281      D_hypoleucum_25675  0.9          3893
## 282     D_hypoleucum_454950  0.9          4730
## 283      D_hypoleucum_25880  0.9          4549
## 284       D_hypoleucum_3314  0.9          4727
## 285     D_hypoleucum_357608  0.9          3790
## 286     D_hypoleucum_357615  0.9          4678
## 287     D_hypoleucum_357612  0.9          3629
## 288      D_hypoleucum_19638  0.9          4632
## 289      D_hypoleucum_25868  0.9          4730
## 290      D_hypoleucum_17969  0.9          4542
## 291      D_hypoleucum_25672  0.9          3920
## 292      D_hypoleucum_26975  0.9          4614
## 293       D_hypoleucum_2229  0.9          4134
## 294       D_hypoleucum_2067  0.9          4754
## 295      D_hypoleucum_28329  0.9          4721
## 296      D_hypoleucum_28361  0.9          4306
## 297      D_hypoleucum_28376  0.9          4585
## 298       D_hypoleucum_2208  0.9          4723
## 299      D_hypoleucum_28294  0.9          4408
## 300       D_hypoleucum_1956  0.9          4770
## 301      D_hypoleucum_25670  0.9          4495
## 302      D_hypoleucum_20921  0.9          4789
## 303      D_hypoleucum_20193  0.9          3691
## 304      D_hypoleucum_28416  0.9          4635
## 305      D_hypoleucum_18193  0.9          4701
## 306      D_hypoleucum_27450  0.9          4755
## 307      D_hypoleucum_19046  0.9          4474
## 308      D_hypoleucum_19136  0.9          4624
## 309      D_hypoleucum_28676  0.9          4680
## 310     D_nigrilore_KU28413  0.9          4303
## 311     D_nigrilore_KU28414  0.9          4345
## 312      D_hypoleucum_29945  0.9          4742
## 313      D_hypoleucum_31636  0.9          4734
## 314      D_hypoleucum_29951  0.9          4680
## 315      D_hypoleucum_31644  0.9          4750
## 316      D_hypoleucum_18070  1.0             2
## 317      D_hypoleucum_18191  1.0             2
## 318      D_hypoleucum_14037  1.0             2
## 319      D_hypoleucum_14079  1.0             2
## 320      D_hypoleucum_14065  1.0             2
## 321      D_hypoleucum_14120  1.0             2
## 322      D_hypoleucum_17976  1.0             2
## 323      D_hypoleucum_18159  1.0             2
## 324      D_hypoleucum_14075  1.0             2
## 325      D_hypoleucum_20218  1.0             2
## 326      D_hypoleucum_19066  1.0             2
## 327      D_hypoleucum_19177  1.0             2
## 328      D_hypoleucum_19178  1.0             2
## 329      D_hypoleucum_14146  1.0             2
## 330      D_hypoleucum_20213  1.0             2
## 331 D_hypoleucum_FMNH454949  1.0             2
## 332      D_hypoleucum_25637  1.0             2
## 333       D_hypoleucum_1271  1.0             2
## 334       D_hypoleucum_1273  1.0             2
## 335       D_hypoleucum_1275  1.0             2
## 336      D_hypoleucum_25921  1.0             2
## 337       D_hypoleucum_3274  1.0             2
## 338      D_hypoleucum_26984  1.0             2
## 339       D_hypoleucum_3208  1.0             2
## 340       D_hypoleucum_3158  1.0             2
## 341       D_hypoleucum_2253  1.0             2
## 342       D_hypoleucum_3095  1.0             2
## 343     D_hypoleucum_462070  1.0             2
## 344      D_hypoleucum_25675  1.0             2
## 345     D_hypoleucum_454950  1.0             2
## 346      D_hypoleucum_25880  1.0             2
## 347       D_hypoleucum_3314  1.0             2
## 348     D_hypoleucum_357608  1.0             2
## 349     D_hypoleucum_357615  1.0             2
## 350     D_hypoleucum_357612  1.0             2
## 351      D_hypoleucum_19638  1.0             2
## 352      D_hypoleucum_25868  1.0             2
## 353      D_hypoleucum_17969  1.0             2
## 354      D_hypoleucum_25672  1.0             2
## 355      D_hypoleucum_26975  1.0             2
## 356       D_hypoleucum_2229  1.0             2
## 357       D_hypoleucum_2067  1.0             2
## 358      D_hypoleucum_28329  1.0             2
## 359      D_hypoleucum_28361  1.0             2
## 360      D_hypoleucum_28376  1.0             2
## 361       D_hypoleucum_2208  1.0             2
## 362      D_hypoleucum_28294  1.0             2
## 363       D_hypoleucum_1956  1.0             2
## 364      D_hypoleucum_25670  1.0             2
## 365      D_hypoleucum_20921  1.0             2
## 366      D_hypoleucum_20193  1.0             2
## 367      D_hypoleucum_28416  1.0             2
## 368      D_hypoleucum_18193  1.0             2
## 369      D_hypoleucum_27450  1.0             2
## 370      D_hypoleucum_19046  1.0             2
## 371      D_hypoleucum_19136  1.0             2
## 372      D_hypoleucum_28676  1.0             2
## 373     D_nigrilore_KU28413  1.0             2
## 374     D_nigrilore_KU28414  1.0             2
## 375      D_hypoleucum_29945  1.0             2
## 376      D_hypoleucum_31636  1.0             2
## 377      D_hypoleucum_29951  1.0             2
## 378      D_hypoleucum_31644  1.0             2
#investigate patterns of missingness per SNP
missing_by_snp(v)
## cutoff is not specified, exploratory visualizations will be generated
## Picking joint bandwidth of 0.0671

##    filt missingness snps.retained
## 1  0.30  0.35380374        118045
## 2  0.50  0.27764467         90571
## 3  0.60  0.24277876         75418
## 4  0.65  0.22409186         66391
## 5  0.70  0.19504400         51513
## 6  0.75  0.16960576         38350
## 7  0.80  0.13985079         24435
## 8  0.85  0.10807756         12851
## 9  0.90  0.07399186          4827
## 10 0.95  0.03932816           963
## 11 1.00  0.00000000             2
#implement per sample completeness filter
v<-missing_by_sample(v, cutoff = .785)
## 3 samples are above a 0.785 missing data cutoff, and were removed from VCF

#remove invariant sites generated by dropping individuals
v<-min_mac(v, min.mac = 1)
## 0.64% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#fix popmap
pops<-pops[pops$ID %in% colnames(v@gt),]
popmap<-popmap[popmap$ID %in% colnames(v@gt),]
colnames(popmap)<-c('id','pop')
missing_by_snp(v)
## cutoff is not specified, exploratory visualizations will be generated
## Picking joint bandwidth of 0.0636

##    filt missingness snps.retained
## 1  0.30  0.34418169        119207
## 2  0.50  0.26937724         93086
## 3  0.60  0.23288863         77879
## 4  0.65  0.21423534         69242
## 5  0.70  0.19260721         58548
## 6  0.75  0.16827150         46121
## 7  0.80  0.14037959         32140
## 8  0.85  0.10845811         18499
## 9  0.90  0.07491050          8101
## 10 0.95  0.03711678          1881
## 11 1.00  0.00000000            62
#reorder popmap to match vcf, this is a bug, you shouldn't have to do this
popmaps<-popmap[order(match(popmap$id,colnames(v@gt)[-1])),]
miss<-assess_missing_data_tsne(v, popmap=popmaps, threshold=c(.85,.9,.95), clustering = FALSE)
## cutoff is specified, filtered vcfR object will be returned
## 91% of SNPs fell below a completeness cutoff of 0.85 and were removed from the VCF
## Loading required namespace: adegenet

## cutoff is specified, filtered vcfR object will be returned

## 97.32% of SNPs fell below a completeness cutoff of 0.9 and were removed from the VCF

## cutoff is specified, filtered vcfR object will be returned

## 99.08% of SNPs fell below a completeness cutoff of 0.95 and were removed from the VCF

#missing data doesn't seem to be driving clustering regardless of threhsold
#implement missing data per SNP filter
v<-missing_by_snp(v, cutoff=.85)
## cutoff is specified, filtered vcfR object will be returned
## 91% of SNPs fell below a completeness cutoff of 0.85 and were removed from the VCF

#investigate clustering patterns with a minor allele cutoff
v.mac<-min_mac(v, min.mac = 2)
## 40.57% of SNPs fell below a minor allele count of 2 and were removed from the VCF

v.mac
## ***** Object of Class vcfR *****
## 60 samples
## 2389 CHROMs
## 10,994 variants
## Object size: 54.9 Mb
## 11.01 percent missing data
## *****        *****         *****
assess_missing_data_tsne(v.mac, popmap=popmaps, clustering = FALSE)

##      tsne.ax1   tsne.ax2 popmap.pop
## 1   23.285083  44.283749      Luzon
## 2   52.012789 -29.714065  Zamboanga
## 3    3.045111  -1.630845   Mindanao
## 4    1.823689  -3.004490   Mindanao
## 5    5.069517  -1.928278   Mindanao
## 6  -21.486003 -20.117547   Mindanao
## 7   51.187124 -27.261521  Zamboanga
## 8    3.688963  -3.300658   Mindanao
## 9   36.229384  36.102958      Luzon
## 10  51.127101 -31.785423  Zamboanga
## 11  50.403871 -29.546373  Zamboanga
## 12  26.245166  47.584687      Luzon
## 13  24.933394  48.755476      Luzon
## 14  38.368464  37.057353      Luzon
## 15 -32.112363  -7.306372   Mindanao
## 16 -40.417793 -11.877839   Mindanao
## 17 -30.359613  -8.896437   Mindanao
## 18  34.622797  37.416804      Luzon
## 19 -41.437414 -14.264191   Mindanao
## 20  34.119261  35.824224      Luzon
## 21 -31.498155 -11.099356   Mindanao
## 22 -22.038848 -26.867581   Mindanao
## 23 -21.399663 -31.884239   Mindanao
## 24 -32.210442  -9.779238   Mindanao
## 25  28.078437  42.480649      Luzon
## 26  20.932720  53.772649      Luzon
## 27  35.526005  39.006265      Luzon
## 28  27.468343  44.819879      Luzon
## 29 -36.804177  -8.645378   Mindanao
## 30 -17.671575 -19.275233   Mindanao
## 31 -21.809102 -29.518483   Mindanao
## 32 -18.646999 -20.203929   Mindanao
## 33  24.864162  44.419736      Luzon
## 34  24.022050  39.589388      Luzon
## 35  21.760675  41.772599      Luzon
## 36  22.679416  53.651250      Luzon
## 37  24.143084  46.289444      Luzon
## 38 -22.493396 -32.838849   Mindanao
## 39 -22.188346 -35.655099   Mindanao
## 40 -35.332834 -16.593551   Mindanao
## 41 -24.290429 -19.816074   Mindanao
## 42 -24.114153 -17.526218   Mindanao
## 43 -20.520434 -35.460526   Mindanao
## 44 -36.534331 -15.110493   Mindanao
## 45 -20.740588 -37.575934   Mindanao
## 46  22.013085  45.310476      Luzon
## 47 -42.849258 -11.619304   Mindanao
## 48  21.772676  55.228000      Luzon
## 49 -39.616292 -10.707290   Mindanao
## 50  53.613577 -31.041270  Zamboanga
## 51 -25.534335 -16.229277   Mindanao
## 52 -26.884330 -15.495748   Mindanao
## 53 -26.351991 -20.047901   Mindanao
## 54 -34.322229 -14.904292   Mindanao
## 55 -14.544637  -8.102419  nigrilore
## 56 -14.891110  -6.991050  nigrilore
## 57  50.735209 -33.781664  Zamboanga
## 58 -30.879867 -18.007916   Mindanao
## 59  49.243578 -31.930041  Zamboanga
## 60 -33.034026 -16.023193   Mindanao
#seeing potentially spurious groupings with singletons removed, we will ignore for now
#plot depth per snp and per sample
dp <- extract.gt(v, element = "DP", as.numeric=TRUE)
heatmap.bp(dp, rlabels = FALSE)

#plot genotype quality per snp and per sample
gq <- extract.gt(v, element = "GQ", as.numeric=TRUE)
heatmap.bp(gq, rlabels = FALSE)

#write out vcf with all SNPs
#vcfR::write.vcf(v, "~/Desktop/phil.dicaeum/filtered.85.vcf.gz")

#linkage filter vcf to thin SNPs to one per 500bp
v.thin<-distance_thin(v, min.distance = 500)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=======================                                               |  34%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## 2590 out of 18499 input SNPs were not located within 500 base-pairs of another SNP and were retained despite filtering
#write out thinned vcf
#vcfR::write.vcf(v.thin, "~/Desktop/phil.dicaeum/filtered.85.unlinked.vcf.gz")