We are going to run Dsuite to test ABBA/BABA comparisons (more info can be found at: https://github.com/millanek/Dsuite). A detailed tutorial can be found at (https://github.com/millanek/tutorials/tree/master/analysis_of_introgression_with_snp_data).

Get input vcf

#first thing to do is copy a filtered vcf file containing SNPs for all samples you're interested in to the working directory you're using on the cluster, e.g.
#scp -r /Users/devder/Desktop/phil.dicaeum/filtered.85.vcf d669d153@hpc.crc.ku.edu:/home/d669d153/work/phil.dicaeum/dsuite/

###NOTE: DSUITE cannot tolerate periods in input tip names or species names (must change to underscores) ###

Get input sample assignments

library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
#we will also read in the vcf to R:
v<-read.vcfR("~/Desktop/phil.dicaeum/filtered.85.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 14
##   header_line: 15
##   variant count: 18499
##   column count: 69
## 
Meta line 14 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 18499
##   Character matrix gt cols: 69
##   skip: 0
##   nrows: 18499
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant: 18499
## All variants processed
#also read in your sample info file
pops<-read.csv("~/Desktop/phil.dicaeum/dicaeum.sampling.csv")
#it should look roughly like this:
head(pops)
##                   ID Tissue    Species      Taxa           State
## 1 D_hypoleucum_18070  18070 hypoleucum     Luzon Camarines Norte
## 2 D_hypoleucum_18191  18191 hypoleucum Zamboanga                
## 3 D_hypoleucum_14061  14061 hypoleucum  Mindanao         Dinagat
## 4 D_hypoleucum_14037  14037 hypoleucum  Mindanao         Dinagat
## 5 D_hypoleucum_14079  14079 hypoleucum  Mindanao         Dinagat
## 6 D_hypoleucum_14065  14065 hypoleucum  Mindanao         Dinagat
##                                                                 Specificlocality
## 1              Luzon Island; Municipality Labo; Barangay Tulay na Lupa, Mt. Labo
## 2       Mindanao Island, City of Zamboanga; Brgy. Pasonanca, Pasonanca Nat. Park
## 3 Dinagat Island, Muncipality of Loreto, Brgy. Santiago, Sitio Cambinliw, Sudlon
## 4 Dinagat Island, Muncipality of Loreto, Brgy. Santiago, Sitio Cambinliw, Sudlon
## 5 Dinagat Island, Muncipality of Loreto, Brgy. Santiago, Sitio Cambinliw, Sudlon
## 6   Dinagat Island, Municipality Loreto, Brgy. Santiago, Sitio Cambinliu, Sudlon
##    Latitude Longitude Altitude
## 1 14.039000  122.7860      580
## 2  7.018483  122.0273      750
## 3 10.343680  125.6181      200
## 4 10.343680  125.6181      200
## 5 10.343680  125.6181      200
## 6 10.343680  125.6181      190
#retain only samples that passed all filtering protocols (assumes that the 'ID' column is identical to sample names in the vcf)
pops<-pops[pops$ID %in% colnames(v@gt),]

#now we can make a popmap file for Dsuite
popmap<-pops[,c(1,4)]

#print popmap to screen
popmap
##                          ID      Taxa
## 1        D_hypoleucum_18070     Luzon
## 2        D_hypoleucum_18191 Zamboanga
## 4        D_hypoleucum_14037  Mindanao
## 5        D_hypoleucum_14079  Mindanao
## 6        D_hypoleucum_14065  Mindanao
## 9        D_hypoleucum_14120  Mindanao
## 12       D_hypoleucum_18159 Zamboanga
## 13       D_hypoleucum_14075  Mindanao
## 14       D_hypoleucum_20218     Luzon
## 16       D_hypoleucum_19177 Zamboanga
## 17       D_hypoleucum_19178 Zamboanga
## 19       D_hypoleucum_20213     Luzon
## 20  D_hypoleucum_FMNH454949     Luzon
## 21       D_hypoleucum_25637     Luzon
## 22        D_hypoleucum_1271  Mindanao
## 23        D_hypoleucum_1273  Mindanao
## 24        D_hypoleucum_1275  Mindanao
## 25       D_hypoleucum_25921     Luzon
## 26        D_hypoleucum_3274  Mindanao
## 27       D_hypoleucum_26984     Luzon
## 28        D_hypoleucum_3208  Mindanao
## 29        D_hypoleucum_3158  Mindanao
## 30        D_hypoleucum_2253  Mindanao
## 31        D_hypoleucum_3095  Mindanao
## 32      D_hypoleucum_462070     Luzon
## 33       D_hypoleucum_25675     Luzon
## 35      D_hypoleucum_454950     Luzon
## 36       D_hypoleucum_25880     Luzon
## 39        D_hypoleucum_3314  Mindanao
## 40      D_hypoleucum_357608  Mindanao
## 41       D_hypoleucum_17969     Luzon
## 42      D_hypoleucum_357615  Mindanao
## 43      D_hypoleucum_357612  Mindanao
## 44       D_hypoleucum_19638     Luzon
## 45       D_hypoleucum_25868     Luzon
## 49       D_hypoleucum_25672     Luzon
## 50       D_hypoleucum_26975     Luzon
## 51        D_hypoleucum_2229  Mindanao
## 52        D_hypoleucum_2067  Mindanao
## 54       D_hypoleucum_28329  Mindanao
## 55       D_hypoleucum_28361  Mindanao
## 56       D_hypoleucum_28376  Mindanao
## 58        D_hypoleucum_2208  Mindanao
## 60       D_hypoleucum_28294  Mindanao
## 61        D_hypoleucum_1956  Mindanao
## 66       D_hypoleucum_25670     Luzon
## 68       D_hypoleucum_20921  Mindanao
## 69       D_hypoleucum_20193     Luzon
## 70       D_hypoleucum_28416  Mindanao
## 71       D_hypoleucum_18193 Zamboanga
## 73       D_hypoleucum_27450  Mindanao
## 74       D_hypoleucum_19046  Mindanao
## 75       D_hypoleucum_19136  Mindanao
## 77       D_hypoleucum_28676  Mindanao
## 78       D_hypoleucum_29945 Zamboanga
## 79       D_hypoleucum_31636  Mindanao
## 80       D_hypoleucum_29951 Zamboanga
## 81       D_hypoleucum_31644  Mindanao
## 127     D_nigrilore_KU28413 nigrilore
## 128     D_nigrilore_KU28414 nigrilore
#we are going to manually change "nigrilore" to "Outgroup"
popmap[popmap$Taxa == "nigrilore",]<-"Outgroup"

#write out this table and copy it into your cluster working directory next to your vcf (make sure to remove column and row names)
#write.table(popmap, file="~/Downloads/dicaeum.pops.txt", sep="\t", quote=F, row.names = F, col.names=F)

Get input newick tree

#This is maybe the trickiest part, you must input a perfectly formatted newick tree showing the species tree topology for the taxa you assigned in your popmap. Use (http://etetoolkit.org/treeview/) to test whether your trees are valid and look the way you want.
#Here it is easy because there are only 4 taxa, so my tree is:
#(((zamboanga,mindanao),luzon),Outgroup);

#paste this tree into a text file named 'nwk.tre', stored in your cluster working directory

We now have all three needed input files to run Dtrios and perform ABBA/BABA tests

##run Dtrios
##-c means this is the whole file, -n gives the name prefix for this run, -t gives the guidetree, followed by the vcf and popmap
/home/d669d153/work/Dsuite/Build/Dsuite Dtrios -c -n all.samples -t nwk.tre todi.unlinked.85.vcf pops.txt

Visualize your results

#make a 'plot order' text file. Mine just looks like this:
Zamboanga
Mindanao
Luzon
Outgroup

##copy in ruby scripts. I already have them downloaded in another directory. 
cp /home/d669d153/work/todi.subset.2022/dsuite/plot_d.rb .
cp /home/d669d153/work/todi.subset.2022/dsuite/plot_f4ratio.rb .

#If you don't, you can download them with:
wget https://raw.githubusercontent.com/millanek/tutorials/master/analysis_of_introgression_with_snp_data/src/plot_d.rb
wget https://raw.githubusercontent.com/millanek/tutorials/master/analysis_of_introgression_with_snp_data/src/plot_f4ratio.rb

##plot heatmap of D
#the text file is an input file you generated by running Dtrios, that will always end with '_BBAA.txt'
ruby plot_d.rb pops_all.samples_BBAA.txt plot_order.txt 0.7 all.samples_BBAA_D.svg

##plot heatmap of f4
ruby plot_f4ratio.rb pops_all.samples_BBAA.txt plot_order.txt 0.2 all.samples_BBAA_f4ratio.svg

##run fbranch on the guidetree, with the Dtrios output ending in 'tree.txt' (guidetree must be rooted on #outgroup)
/home/d669d153/work/Dsuite/Build/Dsuite Fbranch todi.nwk.tre pops_all.samples_tree.txt > unlinked_Fbranch.txt

#activate python
module load python
#plot the fbranch heatmap with guidetree above
/home/d669d153/work/Dsuite/utils/dtools.py all.samples_Fbranch.txt nwk.tre

check out results

#heatmap of D
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/dsuite/unlinked_BBAA_D.svg")

#heatmap of F4
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/dsuite/unlinked_BBAA_f4ratio.svg")

#plot of fbranch statistic
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/dsuite/fbranch.svg")

Now we are going to repeat the above exercise with samples from Zamboanga removed

#remove the five samples from Mt. busa from the vcf
v.nobusa<-v[,colnames(v@gt)!="D_hypoleucum_2253" & colnames(v@gt)!="D_hypoleucum_2229" & colnames(v@gt)!="D_hypoleucum_2067" & colnames(v@gt)!="D_hypoleucum_2208" & colnames(v@gt)!="D_hypoleucum_1956"]
#remove SNPs that have become invariant because of sample removal
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
## 
## Attaching package: 'SNPfiltR'
## The following object is masked _by_ '.GlobalEnv':
## 
##     popmap
v.nobusa<-min_mac(v.nobusa, min.mac = 1)
## 5.01% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#check vcf
v.nobusa
## ***** Object of Class vcfR *****
## 55 samples
## 2586 CHROMs
## 17,573 variants
## Object size: 80.6 Mb
## 11.05 percent missing data
## *****        *****         *****
#write out
#vcfR::write.vcf(v.nobusa, "~/Desktop/phil.dicaeum/vcf.nomtbusa.vcf.gz")

#update the popmap
popmap<-popmap[c(popmap$ID!="D_hypoleucum_2253" & popmap$ID!="D_hypoleucum_2229" & popmap$ID!="D_hypoleucum_2067" & popmap$ID!="D_hypoleucum_2208" & popmap$ID!="D_hypoleucum_1956"),]
#write out
#write.table(popmap, file="~/Downloads/dicaeum.nomtbusa.pops.txt", sep="\t", quote=F, row.names = F, col.names=F)

#copy both of these to a subfolder eg:
#scp -r /Users/devder/Desktop/phil.dicaeum/vcf.nomtbusa.vcf d669d153@hpc.crc.ku.edu:/home/d669d153/work/phil.dicaeum/dsuite/nomtbusa/

#no need to update the tree, as the tips haven't changed, we've just removed a few samples from one of the tips
#just copy the tree file into your new subdirectory for this run

We now have all three needed input files to run Dtrios and perform ABBA/BABA tests

##run Dtrios
##-c means this is the whole file, -n gives the name prefix for this run, -t gives the guidetree, followed by the vcf and popmap
/home/d669d153/work/Dsuite/Build/Dsuite Dtrios -c -n no.mtbusa -t nwk.tre vcf.nomtbusa.vcf pops.txt

Visualize your results

#use the same 'plot order' text file

##copy in ruby scripts. I already have them downloaded in another directory. 
cp /home/d669d153/work/todi.subset.2022/dsuite/plot_d.rb .
cp /home/d669d153/work/todi.subset.2022/dsuite/plot_f4ratio.rb .

##plot heatmap of D
#the text file is an input file you generated by running Dtrios, that will always end with '_BBAA.txt'
ruby plot_d.rb pops_no.mtbusa_BBAA.txt plot_order.txt 0.7 no.mtbusa_BBAA_D.svg

##plot heatmap of f4
ruby plot_f4ratio.rb pops_no.mtbusa_BBAA.txt plot_order.txt 0.2 no.mtbusa_BBAA_f4ratio.svg

##run fbranch on the guidetree, with the Dtrios output ending in 'tree.txt' (guidetree must be rooted on #outgroup)
/home/d669d153/work/Dsuite/Build/Dsuite Fbranch todi.nwk.tre pops_no.mtbusa_tree.txt > unlinked_Fbranch.txt

#activate python
module load python
#plot the fbranch heatmap with guidetree above
/home/d669d153/work/Dsuite/utils/dtools.py no.mtbusa_Fbranch.txt nwk.tre

check out results

#heatmap of D
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/dsuite/nomtbusa/unlinked_BBAA_D.svg")

#heatmap of F4
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/dsuite/nomtbusa/unlinked_BBAA_f4ratio.svg")

#plot of fbranch statistic
knitr::include_graphics("/Users/devder/Desktop/phil.dicaeum/dsuite/nomtbusa/fbranch.svg")