splitstree and Fst

Setup

#load packages
library(vcfR)

## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****

library(SNPfiltR)

## This is SNPfiltR v.1.0.0
## 
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 
## 
## If you use SNPfiltR in your published work, please cite the following papers: 
## 
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618 
## 
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549

library(RColorBrewer)
library(ggplot2)
library(StAMPP)

## Loading required package: pegas

## Loading required package: ape

## Registered S3 method overwritten by 'pegas':
##   method      from
##   print.amova ade4

## 
## Attaching package: 'pegas'

## The following object is masked from 'package:ape':
## 
##     mst

## The following objects are masked from 'package:vcfR':
## 
##     getINFO, write.vcf

library(adegenet)

## Loading required package: ade4

## 
## Attaching package: 'ade4'

## The following object is masked from 'package:pegas':
## 
##     amova

## 
##    /// adegenet 2.1.5 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()

#read in vcf as vcfR
vcfR <- read.vcfR("~/Desktop/marsh.wren.rad/filtered.vcf.gz")

samps<-read.csv("~/Desktop/marsh.wren.rad/marsh.wren.rad.sampling.csv")
#retain samples that were sequenced successfully
samps<-samps[samps$Tissue.num %in% colnames(vcfR@gt),]
#reorder sampling file to match order of samples in vcf
samps<-samps[match(colnames(vcfR@gt)[-1], samps$Tissue.num),]
samps$Tissue.num == colnames(vcfR@gt)[-1] #check that order matches

table(samps$Population)

Make splitstree

#convert vcfR to genlight
gen<-vcfR2genlight(vcfR)
#see sample names
gen@ind.names

##   [1] "B00908" "B00910" "B00911" "B00912" "B00913" "B00914" "B00915" "B00916"
##   [9] "B00927" "B00928" "B00929" "B00930" "B00931" "B00932" "B00934" "B00935"
##  [17] "B00936" "B00937" "B00938" "B00939" "B00940" "B00941" "B00942" "B00943"
##  [25] "B00944" "B00945" "B00946" "B00947" "B00948" "B00949" "B00950" "B00951"
##  [33] "B00953" "B00954" "B00955" "B00956" "B00957" "B00958" "B00959" "B00960"
##  [41] "B00961" "B00962" "B00963" "B00964" "B00965" "B00966" "B00967" "B02324"
##  [49] "B02325" "B02326" "B02327" "B02328" "B02330" "B02332" "B02333" "B02334"
##  [57] "B02336" "B02337" "B02338" "B02339" "B02340" "B02341" "B02342" "B02343"
##  [65] "B02344" "B02345" "B02346" "B02347" "B02348" "B02350" "B02351" "B02352"
##  [73] "B02353" "B02354" "B02355" "B02358" "B02360" "B02361" "B02363" "B02364"
##  [81] "B02366" "B02367" "B02368" "B02371" "B02372" "B02373" "B02374" "B02375"
##  [89] "B02376" "B02377" "B02378" "B02379" "B02380" "B02381" "B02382" "B02383"
##  [97] "B02385" "B02386" "B02387" "B02388" "B02389" "B02390" "B02391" "B02392"
## [105] "B02393" "B02394" "B02395" "B02397" "B02438" "B02439" "B02440" "B02441"
## [113] "B02449" "B02456" "B02458" "B02459" "B02460" "B02461" "B02464" "B02478"
## [121] "B02546" "B02569" "B02578" "B02588" "B02594" "B02609" "B02620" "25372" 
## [129] "25374"  "25375"  "25376"  "25377"  "25379"  "25380"  "25381"  "25382" 
## [137] "25383"

gen@ind.names<-paste0(gen@ind.names,samps$Population)
#assign inds to pops
pop(gen)<-gen@ind.names
#calculate pairwise nei's D matrix to use as splitstree input
sample.div <- stamppNeisD(gen, pop = FALSE)

#write matrix to disk and open in splitstree
stamppPhylip(distance.mat=sample.div, file="~/Desktop/marsh.wren.rad/filtered.splits.txt")

#view tree
knitr::include_graphics("/Users/devder/Desktop/marsh.wren.rad/splitstree.png")

calculate pairwise FST between the two pure ends

#define populations
gen@pop<-as.factor(samps$Population)
#calculate pairwise FST
di.heat<-stamppFst(gen)
#extract values
m<-di.heat$Fsts
#fill in upper triangle of matrix
m[upper.tri(m)] <- t(m)[upper.tri(m)]
#melt for plotting
heat <- reshape::melt(m)

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

#plot with labels
ggplot(data = heat, aes(x=X1, y=X2, fill=value)) + 
  geom_tile()+
  geom_text(data=heat,aes(label=round(value, 2)))+
  theme_minimal()+
  scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.text.y = element_text(angle = 45, hjust = 1))

## Warning: Removed 6 rows containing missing values (geom_text).

splitstree and Fst

Devon DeRaad

2022-10-03

Setup

Make splitstree

calculate pairwise FST between the two pure ends