library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.12.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
##
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/
##
## If you use SNPfiltR in your published work, please cite the following papers:
##
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618
##
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
#read in unlinked SNP dataset
vcfR <- read.vcfR("~/Desktop/phil.dicaeum/filtered.85.unlinked.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 14
## header_line: 15
## variant count: 2590
## column count: 69
##
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 2590
## Character matrix gt cols: 69
## skip: 0
## nrows: 2590
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant: 2590
## All variants processed
vcfR
## ***** Object of Class vcfR *****
## 60 samples
## 2590 CHROMs
## 2,590 variants
## Object size: 12.9 Mb
## 11.18 percent missing data
## ***** ***** *****
#get a list of samples present in the vcf to to subsample from
pops<-read.csv("~/Desktop/phil.dicaeum/dicaeum.sampling.csv") #read in your sample info file
#retain only samples that passed all filtering protocols (assumes that the 'ID' column is identical to sample names in the vcf)
pops<-pops[pops$ID %in% colnames(vcfR@gt),]
#reorder the sample info file to match the order of samples in the vcf
pops<-pops[order(match(pops$ID,colnames(vcfR@gt)[-1])),]
rownames(pops)<-2:61
colnames(vcfR@gt)
## [1] "FORMAT" "D_hypoleucum_18070"
## [3] "D_hypoleucum_18191" "D_hypoleucum_14037"
## [5] "D_hypoleucum_14079" "D_hypoleucum_14065"
## [7] "D_hypoleucum_14120" "D_hypoleucum_18159"
## [9] "D_hypoleucum_14075" "D_hypoleucum_20218"
## [11] "D_hypoleucum_19177" "D_hypoleucum_19178"
## [13] "D_hypoleucum_20213" "D_hypoleucum_FMNH454949"
## [15] "D_hypoleucum_25637" "D_hypoleucum_1271"
## [17] "D_hypoleucum_1273" "D_hypoleucum_1275"
## [19] "D_hypoleucum_25921" "D_hypoleucum_3274"
## [21] "D_hypoleucum_26984" "D_hypoleucum_3208"
## [23] "D_hypoleucum_3158" "D_hypoleucum_2253"
## [25] "D_hypoleucum_3095" "D_hypoleucum_462070"
## [27] "D_hypoleucum_25675" "D_hypoleucum_454950"
## [29] "D_hypoleucum_25880" "D_hypoleucum_3314"
## [31] "D_hypoleucum_357608" "D_hypoleucum_357615"
## [33] "D_hypoleucum_357612" "D_hypoleucum_19638"
## [35] "D_hypoleucum_25868" "D_hypoleucum_17969"
## [37] "D_hypoleucum_25672" "D_hypoleucum_26975"
## [39] "D_hypoleucum_2229" "D_hypoleucum_2067"
## [41] "D_hypoleucum_28329" "D_hypoleucum_28361"
## [43] "D_hypoleucum_28376" "D_hypoleucum_2208"
## [45] "D_hypoleucum_28294" "D_hypoleucum_1956"
## [47] "D_hypoleucum_25670" "D_hypoleucum_20921"
## [49] "D_hypoleucum_20193" "D_hypoleucum_28416"
## [51] "D_hypoleucum_18193" "D_hypoleucum_27450"
## [53] "D_hypoleucum_19046" "D_hypoleucum_19136"
## [55] "D_hypoleucum_28676" "D_nigrilore_KU28413"
## [57] "D_nigrilore_KU28414" "D_hypoleucum_29945"
## [59] "D_hypoleucum_31636" "D_hypoleucum_29951"
## [61] "D_hypoleucum_31644"
#look at file
pops
## ID Tissue Species Taxa State
## 2 D_hypoleucum_18070 18070 hypoleucum Luzon Camarines Norte
## 3 D_hypoleucum_18191 18191 hypoleucum Zamboanga
## 4 D_hypoleucum_14037 14037 hypoleucum Mindanao Dinagat
## 5 D_hypoleucum_14079 14079 hypoleucum Mindanao Dinagat
## 6 D_hypoleucum_14065 14065 hypoleucum Mindanao Dinagat
## 7 D_hypoleucum_14120 14120 hypoleucum Mindanao Eastern Samar
## 8 D_hypoleucum_18159 18159 hypoleucum Zamboanga
## 9 D_hypoleucum_14075 14075 hypoleucum Mindanao Dinagat
## 10 D_hypoleucum_20218 20218 hypoleucum Luzon Aurora
## 11 D_hypoleucum_19177 19177 hypoleucum Zamboanga Zamboanga City
## 12 D_hypoleucum_19178 19178 hypoleucum Zamboanga Zamboanga City
## 13 D_hypoleucum_20213 20213 hypoleucum Luzon Aurora
## 14 D_hypoleucum_FMNH454949 FMNH454949 hypoleucum Luzon Luzon
## 15 D_hypoleucum_25637 25637 hypoleucum Luzon Ilocos Norte
## 16 D_hypoleucum_1271 1271 hypoleucum Mindanao Mindanao
## 17 D_hypoleucum_1273 1273 hypoleucum Mindanao Mindanao
## 18 D_hypoleucum_1275 1275 hypoleucum Mindanao Mindanao
## 19 D_hypoleucum_25921 25921 hypoleucum Luzon Cagayan
## 20 D_hypoleucum_3274 3274 hypoleucum Mindanao Mindanao
## 21 D_hypoleucum_26984 26984 hypoleucum Luzon Cagayan
## 22 D_hypoleucum_3208 3208 hypoleucum Mindanao Mindanao
## 23 D_hypoleucum_3158 3158 hypoleucum Mindanao Mindanao
## 24 D_hypoleucum_2253 2253 hypoleucum Mindanao Mindanao
## 25 D_hypoleucum_3095 3095 hypoleucum Mindanao Mindanao
## 26 D_hypoleucum_462070 462070 hypoleucum Luzon Luzon
## 27 D_hypoleucum_25675 25675 hypoleucum Luzon Ilocos Norte
## 28 D_hypoleucum_454950 454950 hypoleucum Luzon Luzon
## 29 D_hypoleucum_25880 25880 hypoleucum Luzon Cagayan
## 30 D_hypoleucum_3314 3314 hypoleucum Mindanao Mindanao
## 31 D_hypoleucum_357608 357608 hypoleucum Mindanao Mindanao
## 32 D_hypoleucum_357615 357615 hypoleucum Mindanao Mindanao
## 33 D_hypoleucum_357612 357612 hypoleucum Mindanao Mindanao
## 34 D_hypoleucum_19638 19638 hypoleucum Luzon Aurora
## 35 D_hypoleucum_25868 25868 hypoleucum Luzon Cagayan
## 36 D_hypoleucum_17969 17969 hypoleucum Luzon Camarines Norte
## 37 D_hypoleucum_25672 25672 hypoleucum Luzon Ilocos Norte
## 38 D_hypoleucum_26975 26975 hypoleucum Luzon Cagayan
## 39 D_hypoleucum_2229 2229 hypoleucum Mindanao
## 40 D_hypoleucum_2067 2067 hypoleucum Mindanao
## 41 D_hypoleucum_28329 28329 hypoleucum Mindanao Agusan del Norte
## 42 D_hypoleucum_28361 28361 hypoleucum Mindanao Agusan del Norte
## 43 D_hypoleucum_28376 28376 hypoleucum Mindanao Agusan del Norte
## 44 D_hypoleucum_2208 2208 hypoleucum Mindanao Mindanao
## 45 D_hypoleucum_28294 28294 hypoleucum Mindanao Agusan del Norte
## 46 D_hypoleucum_1956 1956 hypoleucum Mindanao Mindanao
## 47 D_hypoleucum_25670 25670 hypoleucum Luzon Ilocos Norte
## 48 D_hypoleucum_20921 20921 hypoleucum Mindanao Bohol
## 49 D_hypoleucum_20193 20193 hypoleucum Luzon Aurora
## 50 D_hypoleucum_28416 28416 hypoleucum Mindanao Agusan del Norte
## 51 D_hypoleucum_18193 18193 hypoleucum Zamboanga
## 52 D_hypoleucum_27450 27450 hypoleucum Mindanao Leyte
## 53 D_hypoleucum_19046 19046 hypoleucum Mindanao Agusan del Sur
## 54 D_hypoleucum_19136 19136 hypoleucum Mindanao Misamis Oriental
## 55 D_hypoleucum_28676 28676 hypoleucum Mindanao
## 56 D_nigrilore_KU28413 KU28413 nigrilore nigrilore
## 57 D_nigrilore_KU28414 KU28414 nigrilore nigrilore
## 58 D_hypoleucum_29945 29945 hypoleucum Zamboanga
## 59 D_hypoleucum_31636 31636 hypoleucum Mindanao Eastern Samar
## 60 D_hypoleucum_29951 29951 hypoleucum Zamboanga
## 61 D_hypoleucum_31644 31644 hypoleucum Mindanao Eastern Samar
## Specificlocality
## 2 Luzon Island; Municipality Labo; Barangay Tulay na Lupa, Mt. Labo
## 3 Mindanao Island, City of Zamboanga; Brgy. Pasonanca, Pasonanca Nat. Park
## 4 Dinagat Island, Muncipality of Loreto, Brgy. Santiago, Sitio Cambinliw, Sudlon
## 5 Dinagat Island, Muncipality of Loreto, Brgy. Santiago, Sitio Cambinliw, Sudlon
## 6 Dinagat Island, Municipality Loreto, Brgy. Santiago, Sitio Cambinliu, Sudlon
## 7 Samar Island, Muncipality of Taft, Brgy. San Rafael
## 8 Mindanao Island, City of Zamboanga; Brgy. Pasonanca, Pasonanca Nat. Park
## 9 Dinagat Island, Muncipality of Loreto, Brgy. Santiago, Sitio Cambinliw, Sudlon
## 10 Luzon Island, Aurora Memorial National Park
## 11 Mindanao Island; Brgy. Tolosa; Sitio Santa Clara; Pasonanca Natural Park
## 12 Mindanao Island; Brgy. Tolosa; Sitio Santa Clara; Pasonanca Natural Park
## 13 Luzon Island, Municipality of Maria Aurora, Barangay Villa Aurora, Sitio Damani
## 14 Mountain, Mt Amnyao
## 15 Luzon Island, Adams, Mt. Pao
## 16 Davao City, Mt. Talomo
## 17 Davao City, Mt. Talomo
## 18 Davao City, Mt. Talomo
## 19 Luzon Island, Gonzaga, Mt. Cagua
## 20 Davao del Norte, Mt. Pasian
## 21 Luzon Island, Gonzaga, Mt. Cagua
## 22 Davao del Norte, Mt. Pasian
## 23 Davao del Norte, Mt. Pasian
## 24 Sarangani, Mt Busa
## 25 Davao del Norte, Mt. Pasian
## 26 Camarines Sur, Lagonoy
## 27 Luzon Island, Adams, Mt. Pao
## 28 Mountain, Mt Amnyao
## 29 Luzon Island, Gonzaga, Mt. Cagua
## 30 Davao del Norte, Mt. Pasian
## 31 Bukidnon, Mt. Kitanglad Range
## 32 Bukidnon, Mt. Kitanglad Range
## 33 Bukidnon, Mt. Kitanglad Range
## 34 Luzon Island, Municipality of San Luis, ca. 12 km WSW of Baler
## 35 Luzon Island, Gonzaga, Mt. Cagua
## 36 Luzon Island; Municipality Labo; Barangay Tulay na Lupa, Mt. Labo
## 37 Luzon Island, Adams, Mt. Pao
## 38 Luzon Island, Gonzaga, Mt. Cagua
## 39 Sarangani, Mt Busa
## 40 Sarangani, Mt Busa
## 41 Mindanao Island, Mt. Hilong-Hilong
## 42 Mindanao Island, Mt. Hilong-Hilong
## 43 Mindanao Island, Mt. Hilong-Hilong
## 44 Sarangani, Mt Busa
## 45 Mindanao Island, Mt. Hilong-Hilong
## 46 Sarangani, Mt Busa
## 47 Luzon Island, Adams, Mt. Pao
## 48 Bohol Island, Rajah Sikatuna Protected Landscape
## 49 Luzon Island, Aurora Memorial National Park
## 50 Mindanao Island, Mt. Hilong-Hilong
## 51 Mindanao Island, City of Zamboanga; Brgy. Pasonanca, Pasonanca Nat. Park
## 52
## 53 Mindanao Island; Mun. San Francisco, Brgy. Bayugan 2; Mt. Magdiwata
## 54 Mindanao Island, Mt. Balatukan, Gingoog City, Brgy. Lunotan, Sitio San Isidro
## 55 Mindanao: Misamis Oriental Prov., Mt. Lumot
## 56
## 57
## 58 Mindanao Island, City of Zamboanga, Labuyo Station, Pasonanca National Park
## 59 Samar Island, Municipality Taft, Brgy. San Rafael, DENR Field Station
## 60 Mindanao Island, City of Zamboanga, Labuyo Station, Pasonanca National Park
## 61 Samar Island, Municipality Taft, Brgy. San Rafael, DENR Field Station
## Latitude Longitude Altitude
## 2 14.039000 122.7860 580
## 3 7.018483 122.0273 750
## 4 10.343680 125.6181 200
## 5 10.343680 125.6181 200
## 6 10.343680 125.6181 190
## 7 11.817400 125.2779 200
## 8 7.018483 122.0273 750
## 9 10.343680 125.6181 200
## 10 15.685100 121.3407 535
## 11 7.107800 122.1195 604
## 12 7.107800 122.1194 600
## 13 15.685100 121.3407 535
## 14 17.000000 121.0000 NA
## 15 18.438000 120.8780 750
## 16 NA NA NA
## 17 NA NA NA
## 18 NA NA NA
## 19 18.219000 122.1110 780
## 20 NA NA NA
## 21 18.236000 122.1040 680
## 22 NA NA NA
## 23 NA NA NA
## 24 NA NA NA
## 25 NA NA NA
## 26 NA NA NA
## 27 18.438000 120.8780 750
## 28 NA NA NA
## 29 18.219000 122.1110 780
## 30 NA NA NA
## 31 NA NA NA
## 32 NA NA NA
## 33 NA NA NA
## 34 15.643620 121.5149 1075
## 35 18.226000 122.1080 940
## 36 14.039000 122.7860 580
## 37 18.438000 120.8780 750
## 38 18.236000 122.1040 680
## 39 NA NA NA
## 40 NA NA NA
## 41 9.075000 125.6650 500
## 42 9.063000 125.6730 1000
## 43 9.062000 125.6730 1000
## 44 NA NA NA
## 45 9.075000 125.6650 500
## 46 NA NA NA
## 47 18.438000 120.8780 750
## 48 9.705600 124.1234 400
## 49 15.685100 121.3407 535
## 50 9.064000 125.6840 1250
## 51 7.018483 122.0273 750
## 52 NA NA NA
## 53 8.473000 125.9860 490
## 54 8.732500 125.0035 1432
## 55 8.721000 125.0760 420
## 56 NA NA NA
## 57 NA NA NA
## 58 7.164000 122.0960 843
## 59 11.828000 125.2760 180
## 60 7.164000 122.0960 843
## 61 11.828000 125.2760 180
#use a for loop to randomly downsample 3 samples from each of the 3 lineages (exclude outgroup) 5 separate times
for (i in 1:5){
#randomly sample 3 individuals from each of the 3 lineages
#remember that the first column of a vcfR object is 'INFO' and samples start in column 2
#manually set these ranges to match your sample order in your vcf, to make sure you sample each lineage separately
sample.specs<-c(sample(c(2,10,13:15,19,21,26:29,34:38,47,49),size = 3),
sample(c(3,8,11,12,51,58,60),size = 3),
sample(c(4:7,9,16:18,20,22:25,30:33,39:46,48,50,52:55,59,61),size = 3))
#subset the random samples plus the vcfR info (column 1)
vcf.sub <- vcfR[,c(1,sample.specs)]
#filter out invariant sites
vcf.comp<-min_mac(vcf.sub, min.mac = 1)
#write filtered subset vcf to disk
print(colnames(vcf.comp@gt)) #print sample names in retained vcf
print(pops$Taxa[sample.specs-1]) #print the assigned taxa for each sample to make sure we have subsampled correctly
#uncomment if you want to save a vcf for each replicate, rather than just a nexus
#vcfR::write.vcf(vcf.comp, file = paste0("~/Desktop/phil.dicaeum/snapp/rep",i,".vcf.gz")) #write to disk
#extract genotype matrix
vcf.gt<-extract.gt(vcf.comp, element = "GT", as.numeric = F, convertNA = T)
#convert 'NA' to '?'
vcf.gt[is.na(vcf.gt)]<-"?"
#convert '0/0' to '0'
vcf.gt[vcf.gt == "0/0"]<-"0"
#convert '0/1' to '1'
vcf.gt[vcf.gt == "0/1"]<-"1"
#convert '1/1' to '2'
vcf.gt[vcf.gt == "1/1"]<-"2"
#transpose matrix
vcf.gt <- t(vcf.gt)
#add lineages to sample names
rownames(vcf.gt)<-paste0(pops$Taxa[sample.specs-1],gsub("D_hypoleucum","",rownames(vcf.gt)))
#write to disk as nexus file
#commented out to prevent overwriting the nexus files I used for these analyses. Uncomment to write nexus files to disk
#ape::write.nexus.data(x = vcf.gt, file = paste0("~/Desktop/phil.dicaeum/snapp/rep",i,".nex"),format = "DNA", interleaved = FALSE)
}
## 75.98% of SNPs fell below a minor allele count of 1 and were removed from the VCF

## [1] "FORMAT" "D_hypoleucum_20213" "D_hypoleucum_25672"
## [4] "D_hypoleucum_20218" "D_hypoleucum_18193" "D_hypoleucum_18191"
## [7] "D_hypoleucum_29951" "D_hypoleucum_28329" "D_hypoleucum_14065"
## [10] "D_hypoleucum_28376"
## [1] "Luzon" "Luzon" "Luzon" "Zamboanga" "Zamboanga" "Zamboanga"
## [7] "Mindanao" "Mindanao" "Mindanao"
## 76.06% of SNPs fell below a minor allele count of 1 and were removed from the VCF

## [1] "FORMAT" "D_hypoleucum_25921" "D_hypoleucum_26975"
## [4] "D_hypoleucum_20213" "D_hypoleucum_18191" "D_hypoleucum_18193"
## [7] "D_hypoleucum_29951" "D_hypoleucum_1273" "D_hypoleucum_3314"
## [10] "D_hypoleucum_14079"
## [1] "Luzon" "Luzon" "Luzon" "Zamboanga" "Zamboanga" "Zamboanga"
## [7] "Mindanao" "Mindanao" "Mindanao"
## 76.41% of SNPs fell below a minor allele count of 1 and were removed from the VCF

## [1] "FORMAT" "D_hypoleucum_25672" "D_hypoleucum_25637"
## [4] "D_hypoleucum_25880" "D_hypoleucum_18191" "D_hypoleucum_29945"
## [7] "D_hypoleucum_18193" "D_hypoleucum_28361" "D_hypoleucum_1273"
## [10] "D_hypoleucum_14079"
## [1] "Luzon" "Luzon" "Luzon" "Zamboanga" "Zamboanga" "Zamboanga"
## [7] "Mindanao" "Mindanao" "Mindanao"
## 76.41% of SNPs fell below a minor allele count of 1 and were removed from the VCF

## [1] "FORMAT" "D_hypoleucum_20213" "D_hypoleucum_26975"
## [4] "D_hypoleucum_25670" "D_hypoleucum_18159" "D_hypoleucum_19178"
## [7] "D_hypoleucum_29951" "D_hypoleucum_14065" "D_hypoleucum_357608"
## [10] "D_hypoleucum_3314"
## [1] "Luzon" "Luzon" "Luzon" "Zamboanga" "Zamboanga" "Zamboanga"
## [7] "Mindanao" "Mindanao" "Mindanao"
## 74.17% of SNPs fell below a minor allele count of 1 and were removed from the VCF

## [1] "FORMAT" "D_hypoleucum_25868" "D_hypoleucum_20218"
## [4] "D_hypoleucum_25675" "D_hypoleucum_19177" "D_hypoleucum_19178"
## [7] "D_hypoleucum_18193" "D_hypoleucum_19046" "D_hypoleucum_1956"
## [10] "D_hypoleucum_27450"
## [1] "Luzon" "Luzon" "Luzon" "Zamboanga" "Zamboanga" "Zamboanga"
## [7] "Mindanao" "Mindanao" "Mindanao"
#read in last file read to disk, to check that it looks right
nex.file <- scan(file=paste0("~/Desktop/phil.dicaeum/snapp/rep",i,".nex"), what = "character", sep = "\n",quiet = TRUE)
nex.file
## [1] "#NEXUS"
## [2] "[Data written by write.nexus.data.R, Thu Jan 12 08:06:25 2023]"
## [3] "BEGIN DATA;"
## [4] " DIMENSIONS NTAX=9 NCHAR=667;"
## [5] " FORMAT DATATYPE=SNP MISSING=? GAP=- INTERLEAVE=NO;"
## [6] " MATRIX"
## [7] " Luzon_20193 ?01?10?000010?0100?010??0??00?0000?100?001000??00???0???0000?00???00?0000000?0??002?00???0001?010?001??001020?0?0?0000??00?1??00?10?00?00?00?000??00000?020?000??0?0???00?0000??1????00??0?0000??1???1???00??10???000?000?0000?0?1???10?2?0?1??000?00?0?0?10?0?0??00?0100??00??0???00?010?0?0?00?01?02?0000???10?110?0??00100?0?020???0??0??000?0???00????0?2000?0?0000??0?000000000?0??0?0??0000??2???000??0???120?0000?1??000?00?000?02?0?0?00?0000?0???0????0000?000000??0?000?????0?10??00?0??00??0?0?00?02?0010?000000?201000????2000?0?0000200000?00000??000000222?0??000?000?0????10100?01?0002?0?000?001?00000000?0010?0000?000?000?0??010?2?0012000?10000????1000000000??1?00?11?0"
## [8] " Luzon_25637 0?10?0100001?02000000001010001010100000000100100000000001000000000000000?0000010?0010021000001?020000010000001000000000000000000000000000000020001000?00200010000002000000?10000?10000?01100?000000000110100100?21000?000000011000000010?2?000000000000000?0000000001010?0010011000000000000010001001200101000000??00001000002000000010102001100020000000010000001100011000000000000001000?011000000000000100100020001200001001011010000001000001110100000010000000100000100000110000000110001121100?01000010000000000010000010000100100?1002000110000000?00001010100?00000000200010002002?1000000?001010000000000000000000010002000?00000100000010000?000000100?000001010010000000100?1012"
## [9] " Luzon_26984 002000010000000100101000000110001010000000000010000001010010200010110000020?00001?0010000000010110000100000?0010010?00001000000000000000000001000?20002020?00101000000000010002000001000000000000000000020020100200???001000?000100000010220010001101000000100?000000000000000??0020000000000000000002000001102022000?00100000000000200000000000000100000010011100?001?020000201?00002011000000000000001001200000?000001000000011000000101000202200?000011?000000?0000000000200000000000000?0000100101010000100000000100000000020000110010000?00?2001000000000000001001?010000210000000002020010001010000110010000000001000000001100?0010000120002002010100001110000001000?002000000000?020"
## [10] " Zamboanga_18191 022000000000012200000?201000000?000000020?0011000001002020000001000000010001000200000100100000010010110?202200001001000102000001000000201011200?00010?020001001000000020002001000110002200000000002010000??0000000010210010?000002002000020200201?0?0?00020000002000020000002000?00000??01000000200000000000020000000000000000210002000000?00000000001000001000000020000000000100000000020200000010000100000000010?000001000020020002000000220000000001000102010?000200020200200001000000022000?00010000200000002000100001020000012101010?0000000200000000001002000000100001000002000000000?00000020000000002000020002200020000000100001110200000000210000?10000?1010020002?00?10000?110000"
## [11] " Zamboanga_18159 022000000000022201000?20001?001?00001?00?0?0010?0?000010000000100000011000000000000002000?0000020000110220?20000?0000000010020?000000020000020100002000100000?0000000000002000000101002000100000002110000??00000?00002000?2010000201200002020010?002000???0010002000020000?02000000000?200?0000020?000?0000002000000000000?01020200?000000?000101000010?000000000001000000?0102000000000222000000210001000000000000010000000120020000000100210000?00?10000100000101012002020020000?000?2002000020011000010200100?000100000?10000112000000?0000000100000100100000000000?0002101?10?000000000000000120010000001000?02001100020002000100000000100100000200010100000200100200000001000000021?00"
## [12] " Zamboanga_19178 022001000100021202000120000000000?0000?0?00000000000202000000000000000100000000?00000200?00?000?0?00201100220000?01?001002001012100000200000200000020100000200101100001100201000010000210000000000212000002000?010000200010001000100200?020200200?010102020000012001010100000000000?00?200110000?000000001000200000000000000202000?20000002000010000020000000000000?000000210020001000002020002?0?0000100100010220000000000002?02000?00020020000000100000020?00022002?00202002000010000200200002000000000110000?0000000000020000021000000000010100000?01000100000000000000000?00000000?010000000002100000000200000010120002000202011000000020000000020?1100000002001011000100?2200000120000"
## [13] " Mindanao_14075 00?1021?100010001001000000000000000001001002?10?111000001000001000?000001001010000000000010110020100210010000??0?000?100000121?0000001000100000000000001000000000000200001001000010001000?00?020100?00?000000??10002020?00000100001000001000?0?00?000010100101100?0000000120010011000000?00000?0000?00000000001101?00000?10200000?100000100100000?200000?1000000000000000000000000012000000100010000010000001100001100100?00?00010000200200000000?00100?00000100000001000001021002001?0000??210000000?00000?00000010000?1010?00000200200001100102011000010?000000000?0001001000010001010001201010000000000000000100?1000100000000?00010000000000?000100000?0010?10000011?000000?2200100100?"
## [14] " Mindanao_357615 100111000010?010000000200000?000101001100?0?0?00020000001001000000002000?0002?00000000000010?00200011100000000010000000000100000?00110200000000110001001001?00000010011000000000010000200000110000?0100000000010000210010000000100001000000000010?00000000010000010000001000000??2001101?0000011010100110000?2000000111?000000?00000001000000001000100110000000010000000001000001010010010110001000010001000010000000000000?0100000010001100002?0?000001001000000000000100000000020102000010000002001000001000000012100000000000000001000000000010000100010001000100000010?01001000000010010200000000?20100110000000001001010000100001?00000000000?000?00000?10000110?000100000010000000?10"
## [15] " Mindanao_14065 0000?110000020020000000000010?0000?000000000000102000000?100010001?00000001?00000111000000002210000021001?001000000?100000020110?01000010000000000000002000000001000000020100001000000200001?0010000000000?000?0002?02?00001010000000001100000000?1000001?0000000010100100000100020100102?001000??000000?000000000010000010000000010?000201001000000100010000000000010000100000001000?002001000110000000000011100000000000100000001002100000000000000000000002010000001000002000?2?000100010000102000000000000?1011000100000001000200100000100200201001010?0010000012?20000100?000?1010000220000001000000000001000000000000001010000101000000001??1000?0?210100?00001000?0001010?000200?000"
## [16] " ;"
## [17] "END;"
use SED in a terminal window to change the data type from DNA to
SNP
cd /Users/devder/Desktop/phil.dicaeum/snapp/
sed -i '' "s/DNA/SNP/g" rep1.nex
sed -i '' "s/DNA/SNP/g" rep2.nex
sed -i '' "s/DNA/SNP/g" rep3.nex
sed -i '' "s/DNA/SNP/g" rep4.nex
sed -i '' "s/DNA/SNP/g" rep5.nex
Now, open beauti and choose file > template > SNAPP. Import
the first nexus as ‘alignment’, assign samples to tips, leave parameters
default except, uncheck the box “Include non-polymorphic sites”. Remove
any calibrations in the ‘Prior’ window (if needed). Reduce chain length
to 5M, and name tree and log filenames according to the specific
replicate so they don’t overwrite eachother. Then repeat for each nexus
until all of your beauti .xml input files are ready for SNAPP.
Then start 5 replicate SNAPP runs as an array on the cluster using
the following code:
#!/bin/sh
#
#SBATCH --job-name=snapp # Job Name
#SBATCH --nodes=1 # 40 nodes
#SBATCH --ntasks-per-node=15 # 40 CPU allocation per Task
#SBATCH --partition=bi # Name of the Slurm partition used
#SBATCH --chdir=/home/d669d153/work/phil.dicaeum/snapp # Set working d$
#SBATCH --mem-per-cpu=800 # memory requested
#SBATCH --array=1-5
#SBATCH --time=10000
#run beast 2.7.1
/home/d669d153/work/beast.2.7.1/beast/bin/beast -threads 15 rep$SLURM_ARRAY_TASK_ID.xml
Use tracer to make sure each individual run achieved
convergence
#example of tracer output from a converged run. All ESS's > 200, and a stable trace plot for each parameter estimate
knitr::include_graphics("/Users/devder/Desktop/example.trace.plot.png")

Investigate whether individual runs converged on similar trees
#rep1
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep1.png")

#rep2
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep2.png")

#rep3
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep.3.png")

#rep4
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep4.png")

#rep5
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep5.png")

Because these replicates (each with different samples included) did
not converge on the same topology, I will use TreeAnnotator to generate
a maximum clade credibility tree for each replicate to assess the
posterior probability of each recovered branching order
#visualize each maximum clade credibility tree, discarding the first 1M generations as burn-in
#rep1
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep1.con.tree.png")

#rep2
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep2.con.tree.png")

#rep3
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep3.con.tree.png")

#rep4
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep4.con.tree.png")

#rep5
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/rep5.con.tree.png")

To get a sense of the overall consensus among all runs, I will use
logcombiner to combine post-burnin trees from each iteration and
visualize all trees sampled from the posterior distributions together on
a single plot using densitree
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/snapp/all.reps.cloudogram.png")
