#load packages
library(vcfR)
library(ggplot2)
library(adegenet)
library(SNPfiltR)
library(StAMPP)

#read in vcf as vcfR
#this has been pre-filtered to remove SNPs near indels and SNPs mapping with low confidence
vcfR <- read.vcfR("~/Desktop/mex.towhees/genotyped_X_samples_only_PASS_snp.vcf")
#12,580 SNPs

#check out the details on the unfiltered vcf
vcfR.unfilt <- read.vcfR("~/Desktop/mex.towhees/genotyped_X_samples_snps.vcf")
#16,237 SNPs

#read in sample info csv
sample.info<-read.csv("~/Desktop/mex.towhees/sample.locs.csv")

#make sure sampling file matches vcf
sample.info$ID %in% colnames(vcfR@gt)[-1]
colnames(vcfR@gt)[-1] %in% sample.info$ID

#if needed, subset sampling file to include only samples in vcf
#sample.info<-sample.info[sample.info$id %in% colnames(vcfR@gt)[-1],]

#reorder sampling file to match order of samples in vcf
sample.info<-sample.info[match(colnames(vcfR@gt)[-1], sample.info$ID),]
sample.info$ID == colnames(vcfR@gt)[-1]
#retain only biallelic SNPs
vcfR<-filter_biallelic(vcfR)

## [1] "84 SNPs, 0.007% of all input SNPs, contained more than 2 alleles, and were removed from the VCF"
#execute allele balance filter
vcfR<-filter_allele_balance(vcfR)
## [1] "50.83% of het genotypes (7.8% of all genotypes) fall outside of .25 - .75 allele balance and were converted to NA"

#visualize and pick appropriate max depth cutoff
max_depth(vcfR)
## [1] "cutoff is not specified, exploratory visualization will be generated."

## [1] "dashed line indicates a mean depth across all SNPs of 26.2"
#filter vcf by the max depth cutoff you chose
vcfR<-max_depth(vcfR, maxdepth = 100)
## [1] "maxdepth cutoff is specified, filtered vcfR object will be returned"
## [1] "0.77% of SNPs were above a mean depth of 100 and were removed from the vcf"
#remove invariant SNPs
vcfR<-min_mac(vcfR, min.mac = 1)

## [1] "4.6% of SNPs fell below a minor allele count of 1 and were removed from the VCF"
#check vcfR to see how many SNPs we have left
vcfR
## ***** Object of Class vcfR *****
## 32 samples
## 3925 CHROMs
## 11,830 variants
## Object size: 13.8 Mb
## 5.903 percent missing data
## *****        *****         *****
#run function to visualize samples
miss<-missing_by_sample(vcfR=vcfR)
## [1] "No popmap provided"

#every sample is missing less than 60% of SNPs, which is great
#this means we should be able to retain all samples with a reasonable per-SNP filtering threshold
#visualize missing data by SNP and the effect of various cutoffs on the missingness of each sample
missing_by_snp(vcfR)
## [1] "cutoff is not specified, exploratory visualizations will be generated"
## Picking joint bandwidth of 0.0474

##    filt missingness snps.retained
## 1  0.30 0.093612804         11161
## 2  0.50 0.071585015         10701
## 3  0.60 0.054557050         10241
## 4  0.65 0.050501211         10113
## 5  0.70 0.041647536          9801
## 6  0.75 0.036809098          9607
## 7  0.80 0.028097483          9199
## 8  0.85 0.019351419          8667
## 9  0.90 0.014520240          8288
## 10 0.95 0.005079213          7180
## 11 1.00 0.000000000          6013
#make popmap
popmap<-sample.info[sample.info$ID %in% colnames(vcfR@gt)[-1],c(5,1)]
colnames(popmap)<-c("id","pop")
popmap$pop<-as.factor(popmap$pop)

#assess missing data effects on clustering
assess_missing_data_pca(vcfR = vcfR, popmap = popmap, thresholds = c(.75,.85,1), clustering = FALSE)
## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "18.79% of SNPs fell below a completeness cutoff of 0.75 and were removed from the VCF"

## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "26.74% of SNPs fell below a completeness cutoff of 0.85 and were removed from the VCF"

## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "49.17% of SNPs fell below a completeness cutoff of 1 and were removed from the VCF"

## [[1]]
##                  PC1        PC2         PC3        PC4         PC5         PC6
## MLZ23076  -3.4408945  1.5016270 -2.35242478 -0.9060119 -0.54757361  0.68560557
## MLZ23077  -2.5312853  1.0641920 -2.06490549 -2.6010390 -1.08664967 -0.51033977
## MLZ24113   0.6453732 -1.3806667  3.61106095 -1.2432372  2.91734383 -5.21618261
## MLZ24115   0.6667550 -1.2911924  2.69656079 -1.1333566  2.27836429 -3.30948232
## MLZ24116   0.9960392 -0.8624820  3.53880305 -1.0347133 -1.21515595  3.11350449
## MLZ24118   0.8004817 -1.4152129  4.73754400 -2.0286371 -1.64504306  1.95423351
## MLZ26978   2.7944173 -2.5415003 -0.91278475 -0.3707517  2.69429738 -3.30827216
## MLZ26979   3.3577174 -3.8640117 -4.85923768  0.5185918  3.45175223  3.14722725
## MLZ26980   3.0828073 -3.7644489 -2.49239605 -1.5958699  0.77105390  0.03157791
## MLZ26984   3.5314322 -2.5249725 -3.72432453  0.3980606  2.17716761  1.71632722
## MLZ30405  -2.6320915  1.7434823 -0.17855993  5.7167553  1.38074762 -0.13152295
## MLZ30408  -1.8021263  1.2032514  0.14874709  0.4721152 -0.89520509 -0.37710142
## MLZ30413  -1.7770906  0.5596514  0.09159234  0.5969241 -0.36308270 -0.17732847
## MLZ30415  -2.1575796  0.9828752 -0.19806036  3.2383487 -0.56296525 -1.97127307
## MLZ41012  -2.0732084  0.6534767 -0.64925132 -4.4770531 -0.47059593  1.87086167
## MLZ41050  -1.8702176  0.2961247 -0.50518103 -3.5404503 -1.09260510 -0.18549831
## MLZ41051  -2.6924658  0.9251661 -1.51585508 -3.3135022 -0.09847242 -0.13584982
## MLZ41271  -2.9130433  1.0028442 -0.78248288  0.7694928 -0.34640993  2.05545290
## MLZ46829  -3.4967650  1.8819678 -2.08379080  0.7223026  1.65107132 -0.39541773
## MLZ46851  -3.1444035  1.5822846 -1.52075885 -1.7973585 -0.78719357 -1.28326524
## MLZ55360  -0.1420718 -1.4795700  3.82240740  0.7814072  1.68554710  0.76761071
## MLZ55361   0.3112816 -1.5363408  3.97389919  1.6130831  1.71631136  4.69249548
## MLZ55363   0.7227579 -0.7919691  1.65649599 -0.0232489 -0.54299147  0.68583497
## MLZ55366   0.1476713 -0.8057730  1.45176389 -1.3149368  0.48354766 -0.46061558
## MLZ57701  -1.8249294  0.9740361 -0.68120711  1.8277736  1.46321387 -1.76989992
## MLZ57703  -2.1058177  0.7283156  0.15530202  2.4853046 -2.72484071  1.37741565
## MLZ57707  -1.5563299  0.4960458  0.48012631  1.1831584  1.06857218  0.16524956
## MLZ57719  -1.6818622  0.8481359  0.50010255  1.7162611  0.35184505 -0.01488890
## MVZ122216  9.5067437 11.4541280  0.23597477 -0.7288874  1.14205227  0.47310138
## MVZ122217  4.1270776 -2.3022831 -0.84310263  1.5306698 -5.14729653 -3.27055708
## MVZ122219  3.1754278 -1.6247092 -0.56797269  1.0107068 -5.88827058 -0.78643791
## MVZ122223  3.9761991 -1.7124722 -1.16808440  1.5280980 -1.81853609  0.56743500
##                   PC7        PC8         PC9       PC10                   pop
## MLZ23076  -2.40541976  0.1207203  3.20576347 -2.0778858          El Venerable
## MLZ23077  -0.84698742 -2.6115741 -0.93273690 -1.1931067          El Venerable
## MLZ24113   0.82638797  2.3949536  0.57728566 -2.4406858    Rancho La Cofradia
## MLZ24115   1.62906009  1.4801747 -0.14081106 -1.2148973    Rancho La Cofradia
## MLZ24116   0.52274358 -1.4008361  1.91887145  1.4950702    Rancho La Cofradia
## MLZ24118  -0.17929094 -2.0525227  3.47848266  3.9851433    Rancho La Cofradia
## MLZ26978  -0.73196243 -0.4168690 -1.23462004  3.1534882               Tapalpa
## MLZ26979   1.56118453  1.3742507  1.38710840 -0.2508291               Tapalpa
## MLZ26980   0.43794277 -5.3283120 -3.61839817  0.8514866               Tapalpa
## MLZ26984   1.53800889  0.5933887  1.41151648 -0.1148743               Tapalpa
## MLZ30405  -0.92897309  0.5668339  1.57559697  3.5087113 Puerto Lengua de Vaca
## MLZ30408   1.90363770  0.1258526 -0.33212326  2.3990802 Puerto Lengua de Vaca
## MLZ30413   1.46107135 -0.6620272 -0.28255192  1.4765155 Puerto Lengua de Vaca
## MLZ30415   2.03593954  2.0655463  0.12385008  2.7765230 Puerto Lengua de Vaca
## MLZ41012  -0.73397017  3.0640379  0.70497978  1.1424606                 Lerma
## MLZ41050   1.85598927  0.8665997  0.53396918  1.3256709                 Lerma
## MLZ41051   0.61862620  1.6409468 -1.54656715  0.6903126                 Lerma
## MLZ41271  -1.06547393  3.6631206 -4.78463484  1.1777819                 Lerma
## MLZ46829  -4.14289007 -1.5979406  2.20828572 -2.3903870          El Venerable
## MLZ46851  -0.86239655 -0.1128000 -0.07905488 -1.1527436          El Venerable
## MLZ55360  -2.18956670 -1.9128176 -0.23844637 -1.6793211              Lagunita
## MLZ55361  -2.85341556  1.8399877 -3.66998181 -2.2462688              Lagunita
## MLZ55363   0.08647145  0.3258425  0.33775817 -0.3372391              Lagunita
## MLZ55366   0.63226944 -0.1848745  0.25290137 -1.6089418              Lagunita
## MLZ57701  -1.73844398 -3.4395377 -0.40504878  1.5609510       Puerto Morillos
## MLZ57703   6.87807376 -1.6788343 -0.57769899 -4.1007586       Puerto Morillos
## MLZ57707   0.79176713  0.2245275 -2.27812051 -0.4627647       Puerto Morillos
## MLZ57719   0.79033536 -2.4591786  1.56533876 -1.8138643       Puerto Morillos
## MVZ122216  0.21885919 -0.3140935 -0.63845158 -0.1579450            Manzamitla
## MVZ122217 -2.14792839  1.4326783  0.06911554 -1.2524034            Manzamitla
## MVZ122219 -2.18075736 -0.2722789 -2.43000302  0.1287506            Manzamitla
## MVZ122223 -0.78089188  2.6650349  3.83842560 -1.1770295            Manzamitla
##              missing
## MLZ23076  0.11107354
## MLZ23077  0.10109890
## MLZ24113  0.21580727
## MLZ24115  0.23102282
## MLZ24116  0.13093829
## MLZ24118  0.13169907
## MLZ26978  0.11715976
## MLZ26979  0.12992392
## MLZ26980  0.12062553
## MLZ26984  0.07142857
## MLZ30405  0.09611158
## MLZ30408  0.17185123
## MLZ30413  0.22045647
## MLZ30415  0.11327134
## MLZ41012  0.10498732
## MLZ41050  0.25300085
## MLZ41051  0.13778529
## MLZ41271  0.16466610
## MLZ46829  0.15604396
## MLZ46851  0.13178360
## MLZ55360  0.23034658
## MLZ55361  0.14201183
## MLZ55363  0.15131023
## MLZ55366  0.13786982
## MLZ57701  0.14201183
## MLZ57703  0.08968724
## MLZ57707  0.11031276
## MLZ57719  0.12510566
## MVZ122216 0.03685545
## MVZ122217 0.07641589
## MVZ122219 0.08892646
## MVZ122223 0.07768385
#super weird that the sample we used as a reference pops out but only above a certain filtering threshold

#may be a good idea to remove singletons (degraded samples) see how this affects the number of SNPs retained
vcfR.mac2<-min_mac(vcfR, min.mac = 2)

## [1] "41.5% of SNPs fell below a minor allele count of 2 and were removed from the VCF"
missing_by_snp(vcfR.mac2)
## [1] "cutoff is not specified, exploratory visualizations will be generated"
## Picking joint bandwidth of 0.0497

##    filt missingness snps.retained
## 1  0.30 0.133315260          6340
## 2  0.50 0.100704982          5931
## 3  0.60 0.075763049          5537
## 4  0.65 0.070091093          5434
## 5  0.70 0.057696480          5185
## 6  0.75 0.050715990          5028
## 7  0.80 0.038274113          4707
## 8  0.85 0.025748547          4300
## 9  0.90 0.019098945          4030
## 10 0.95 0.006557004          3298
## 11 1.00 0.000000000          2606
assess_missing_data_pca(vcfR = vcfR.mac2, popmap = popmap, thresholds = c(.70,.75,.80,1), clustering = FALSE)
## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "25.07% of SNPs fell below a completeness cutoff of 0.7 and were removed from the VCF"

## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "27.34% of SNPs fell below a completeness cutoff of 0.75 and were removed from the VCF"

## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "31.98% of SNPs fell below a completeness cutoff of 0.8 and were removed from the VCF"

## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "62.34% of SNPs fell below a completeness cutoff of 1 and were removed from the VCF"

## [[1]]
##                  PC1         PC2         PC3         PC4        PC5         PC6
## MLZ23076  -3.2942668  0.57578990  1.76898954 -0.18288791 -0.6455864  0.32994892
## MLZ23077  -2.4448982  0.41292758  1.61288714  0.19714496 -1.8616307 -0.17277867
## MLZ24113   0.9423257 -1.43110890 -3.95956282  5.81471689  0.1099429 -1.66652567
## MLZ24115   0.9812272 -1.37810038 -3.14394622  5.01993354 -0.2825499 -1.27489223
## MLZ24116   1.1544168 -0.50107052 -3.05756337 -2.79669038 -1.4051970  1.63827072
## MLZ24118   1.0449022 -1.17328450 -4.03737672 -2.61888374 -2.4552978  1.02600353
## MLZ26978   3.0592156 -1.48743997  0.67209490  1.94364025  0.5494955 -0.15281577
## MLZ26979   4.0980697 -2.95451617  4.82897935  1.09585037  1.5375207  3.53211360
## MLZ26980   3.7387102 -2.96291864  2.39980852  0.31204513 -1.0115112  0.46755584
## MLZ26984   3.8573000 -1.22240178  3.28131407  0.76675159  0.8048244  1.75938389
## MLZ30405  -2.7710595  1.23578526  0.34056583 -0.45581691  4.6574602 -0.63875281
## MLZ30408  -1.9586398  0.92847938  0.01715874 -0.17037373 -0.2610504 -1.10276798
## MLZ30413  -1.8839394  0.16636599  0.05588067 -0.37369109  0.3574970 -0.69634176
## MLZ30415  -2.2054466  0.55494864  0.29436003  0.04388828  2.1150433 -2.31687328
## MLZ41012  -2.1266419  0.06698752  0.74288542 -0.09247191 -4.2839188  2.39401090
## MLZ41050  -1.8806836 -0.19070397  0.57089620  0.39926739 -3.9120630 -0.02545807
## MLZ41051  -2.6316581  0.17464350  1.28151045  0.95874403 -2.4576924  0.41995546
## MLZ41271  -3.0233544  0.25179905  1.03430320 -1.14162199  0.5706904  0.54281377
## MLZ46829  -3.5478021  0.90794884  1.63792877  0.82644022  1.2517014  0.84263065
## MLZ46851  -3.1917576  0.74965912  1.21765527  0.83301606 -1.5472264 -0.73508160
## MLZ55360   0.1491273 -1.90639726 -3.76046135 -1.51418847  2.2675996  2.59093858
## MLZ55361   0.5754835 -1.52101894 -2.94616169 -2.01344548  2.0458038  2.87510625
## MLZ55363   0.8763117 -0.42545643 -1.35867144 -1.00898606 -0.2249254  0.05903291
## MLZ55366   0.3106687 -0.73516189 -1.26131868  0.58761989 -0.8570816  0.38811311
## MLZ57701  -1.8575950  0.57504928  0.58034934  0.72342269  2.0362192 -0.32229555
## MLZ57703  -2.0639895  0.27450232  0.11745141 -1.77175331  0.8776728 -1.76692060
## MLZ57707  -1.5564811  0.09737942 -0.29024386  0.09305635  1.4536445  0.46785912
## MLZ57719  -1.7307039  0.52951247 -0.29606497 -0.52348233  1.6541550 -0.04768159
## MVZ122216  5.4820434 10.96324446 -0.72634954  1.06185196 -0.2035199  1.97738666
## MVZ122217  4.3239346 -0.47463791  0.67214826 -1.66938024 -0.3335997 -4.75753793
## MVZ122219  3.3335934 -0.11447007  0.59679362 -3.00039727 -1.0428496 -4.27186902
## MVZ122223  4.2415873  0.01366461  1.11375991 -1.34331878  0.4964295 -1.36253140
##                   PC7         PC8         PC9        PC10                   pop
## MLZ23076  -0.49495796 -0.68393072  2.34543404  0.31989879          El Venerable
## MLZ23077  -1.60349318  0.41360721 -0.04685268  0.45238044          El Venerable
## MLZ24113   0.81725363 -1.18227580  0.76123074 -0.41475089    Rancho La Cofradia
## MLZ24115   1.64181452  0.40494907 -1.35377628  1.05484303    Rancho La Cofradia
## MLZ24116   1.44476308  2.24970613 -0.03486463  1.97394486    Rancho La Cofradia
## MLZ24118   0.64413636  2.66605578  1.72071759  3.18008827    Rancho La Cofradia
## MLZ26978  -2.07531177 -0.40164048  0.18552622  0.32338945               Tapalpa
## MLZ26979   2.87741876  0.58664411  0.67406447 -0.29351188               Tapalpa
## MLZ26980  -4.42415581  2.59327576 -3.82047207  1.11749705               Tapalpa
## MLZ26984   1.35545706  0.97668290  0.24454660 -0.35358964               Tapalpa
## MLZ30405   1.21220163  0.54001544  0.99740567  3.16250943 Puerto Lengua de Vaca
## MLZ30408   1.35534786  1.40405041 -1.29675120  0.90000270 Puerto Lengua de Vaca
## MLZ30413   0.26102898  1.87908877 -0.62912955 -0.04287724 Puerto Lengua de Vaca
## MLZ30415   1.32442301  0.64845014  0.18041853 -0.50981586 Puerto Lengua de Vaca
## MLZ41012   0.77413132 -2.71120302  1.70103844 -0.91670237                 Lerma
## MLZ41050   0.86488620  0.88644622  0.12547099 -1.09518287                 Lerma
## MLZ41051   0.07956763 -0.79928716 -0.67457541 -0.27169254                 Lerma
## MLZ41271   1.58075103 -4.45540853 -4.51598041  0.52704660                 Lerma
## MLZ46829  -2.26984954 -0.69960060  2.86170558  1.87096156          El Venerable
## MLZ46851  -0.94675488 -0.25514312  0.19005450  0.52196888          El Venerable
## MLZ55360  -4.35620269 -1.31110094  1.84858899 -3.64447336              Lagunita
## MLZ55361   1.12309841 -3.11336492 -2.52566779  1.20097984              Lagunita
## MLZ55363   0.83427790 -0.33266722  0.04196921 -0.69571357              Lagunita
## MLZ55366  -0.50133478  0.07434165  0.07799107 -1.89531401              Lagunita
## MLZ57701  -2.38173119  1.06534472  0.27857053  2.26819647       Puerto Morillos
## MLZ57703   1.88101581  3.49370736 -1.78401212 -4.92201166       Puerto Morillos
## MLZ57707  -0.41295168 -0.49304093 -1.50461936 -1.65225725       Puerto Morillos
## MLZ57719  -0.48248678  1.79617735  1.00463034 -1.87982956       Puerto Morillos
## MVZ122216 -0.66718472  0.18928607 -0.94259717 -0.43954151            Manzamitla
## MVZ122217 -0.93785669 -2.29361237  1.26749158 -0.59546151            Manzamitla
## MVZ122219 -1.14691870 -1.83700305 -1.11110950  1.21695780            Manzamitla
## MVZ122223  2.62961719 -1.29855025  3.73355305 -0.46793945            Manzamitla
##              missing
## MLZ23076  0.15823699
## MLZ23077  0.14768786
## MLZ24113  0.29465318
## MLZ24115  0.31387283
## MLZ24116  0.18583815
## MLZ24118  0.18656069
## MLZ26978  0.16864162
## MLZ26979  0.18453757
## MLZ26980  0.17167630
## MLZ26984  0.10447977
## MLZ30405  0.14248555
## MLZ30408  0.23294798
## MLZ30413  0.30722543
## MLZ30415  0.16271676
## MLZ41012  0.15404624
## MLZ41050  0.33786127
## MLZ41051  0.19797688
## MLZ41271  0.23063584
## MLZ46829  0.22369942
## MLZ46851  0.19089595
## MLZ55360  0.30924855
## MLZ55361  0.20317919
## MLZ55363  0.21473988
## MLZ55366  0.19335260
## MLZ57701  0.20476879
## MLZ57703  0.13135838
## MLZ57707  0.16069364
## MLZ57719  0.18208092
## MVZ122216 0.04985549
## MVZ122217 0.11242775
## MVZ122219 0.13236994
## MVZ122223 0.11372832
#again, the sample we used as a reference pops out but only above a certain filtering threshold
#definitely something to watch for going forward
#set 75% cutoff without minor allele filter
vcfRa<-missing_by_snp(vcfR, cutoff = .75)
## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "18.79% of SNPs fell below a completeness cutoff of 0.75 and were removed from the VCF"
#convert each to genlight
gena<-vcfR2genlight(vcfRa)

pop(gena)<-gena@ind.names
sample.div.75 <- stamppNeisD(gena, pop = FALSE)
#export for splitstree
#stamppPhylip(distance.mat=sample.div.75, file="~/Desktop/mex.towhees/towhee.75.splits.txt")

#75% completeness cutoff splitstree
knitr::include_graphics(c("/Users/devder/Desktop/mex.towhees/towhee.75.splitstree.png"))

#set 75% cutoff with mac = 2 filter
vcfRb<-missing_by_snp(vcfR.mac2, cutoff = .75)
## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "27.34% of SNPs fell below a completeness cutoff of 0.75 and were removed from the VCF"
#convert each to genlight
genb<-vcfR2genlight(vcfRb)

pop(genb)<-genb@ind.names
sample.div.mac.75 <- stamppNeisD(genb, pop = FALSE)
#export for splitstree
#stamppPhylip(distance.mat=sample.div.mac.75, file="~/Desktop/mex.towhees/towhee.75.mac.splits.txt")

#75% completeness cutoff with MAC splitstree
knitr::include_graphics(c("/Users/devder/Desktop/mex.towhees/towhee.75.mac.splitstree.png"))

#set 85% cutoff with mac = 2 filter
vcfRc<-missing_by_snp(vcfR.mac2, cutoff = .85)
## [1] "cutoff is specified, filtered vcfR object will be returned"

## [1] "37.86% of SNPs fell below a completeness cutoff of 0.85 and were removed from the VCF"
#convert each to genlight
genc<-vcfR2genlight(vcfRc)

pop(genc)<-genc@ind.names
sample.div.mac.85 <- stamppNeisD(genc, pop = FALSE)
#export for splitstree
#stamppPhylip(distance.mat=sample.div.mac.85, file="~/Desktop/mex.towhees/towhee.85.mac.splits.txt")

#85% completeness cutoff with MAC splitstree
knitr::include_graphics(c("/Users/devder/Desktop/mex.towhees/towhee.85.mac.splitstree.png"))

#for whatever reason, MVZ12226, our pseudo-reference sample, pops out on a long branch at a certain filtering threshold.
#still not sure why that is occurring
#good news is that other than this sample, missing data proportion is not affecting sample clustering patterns in any discernable way

We will forge ahead with a 75% completeness and mac = 2 filtering scheme

#plot depth per snp and per sample
dp <- extract.gt(vcfRb, element = "DP", as.numeric=TRUE)
heatmap.bp(dp, rlabels = FALSE)

#plot genotype quality per snp and per sample
gq <- extract.gt(vcfRb, element = "GQ", as.numeric=TRUE)
heatmap.bp(gq, rlabels = FALSE)

###We can use the convenient function ‘write.vcf’ from vcfR to export our filtered vcf file for downstream analyses

#write out vcf
#vcfR::write.vcf(vcfRb, file = "~/Desktop/mex.towhees/towhee.75.mac2.vcf.gz")
#perform linkage filtering to get a reduced vcf with only unlinked (one per UCE) SNPs
vcfR.thin<-distance_thin(vcfRb, min.distance = 10000)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=======================                                               |  34%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## [1] "2669 out of 5028 input SNPs were not located within 10000 base-pairs of another SNP and were retained despite filtering"
vcfR.thin #retains 2,669 SNPs
## ***** Object of Class vcfR *****
## 32 samples
## 2669 CHROMs
## 2,669 variants
## Object size: 4.7 Mb
## 2.879 percent missing data
## *****        *****         *****
#vcfR::write.vcf(vcfR.thin, file = "~/Desktop/mex.towhees/towhee.75.mac2.unlinked.vcf.gz")