Setup
#load packages
library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.12.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(SNPfiltR)
## This is SNPfiltR v.1.0.0
##
## Detailed usage information is available at: devonderaad.github.io/SNPfiltR/
##
## If you use SNPfiltR in your published work, please cite the following papers:
##
## DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 00, 1-15. http://doi.org/10.1111/1755-0998.13618
##
## Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(RColorBrewer)
library(ggplot2)
library(StAMPP)
## Loading required package: pegas
## Loading required package: ape
## Registered S3 method overwritten by 'pegas':
## method from
## print.amova ade4
##
## Attaching package: 'pegas'
## The following object is masked from 'package:ape':
##
## mst
## The following objects are masked from 'package:vcfR':
##
## getINFO, write.vcf
library(adegenet)
## Loading required package: ade4
##
## Attaching package: 'ade4'
## The following object is masked from 'package:pegas':
##
## amova
##
## /// adegenet 2.1.5 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
#read in vcf as vcfR
vcfR <- read.vcfR("~/Desktop/marsh.wren.rad/filtered.vcf.gz")
samps<-read.csv("~/Desktop/marsh.wren.rad/marsh.wren.rad.sampling.csv")
#retain samples that were sequenced successfully
samps<-samps[samps$Tissue.num %in% colnames(vcfR@gt),]
#reorder sampling file to match order of samples in vcf
samps<-samps[match(colnames(vcfR@gt)[-1], samps$Tissue.num),]
samps$Tissue.num == colnames(vcfR@gt)[-1] #check that order matches
table(samps$Population)
Make splitstree
#convert vcfR to genlight
gen<-vcfR2genlight(vcfR)
#see sample names
gen@ind.names
## [1] "B00908" "B00910" "B00911" "B00912" "B00913" "B00914" "B00915" "B00916"
## [9] "B00927" "B00928" "B00929" "B00930" "B00931" "B00932" "B00934" "B00935"
## [17] "B00936" "B00937" "B00938" "B00939" "B00940" "B00941" "B00942" "B00943"
## [25] "B00944" "B00945" "B00946" "B00947" "B00948" "B00949" "B00950" "B00951"
## [33] "B00953" "B00954" "B00955" "B00956" "B00957" "B00958" "B00959" "B00960"
## [41] "B00961" "B00962" "B00963" "B00964" "B00965" "B00966" "B00967" "B02324"
## [49] "B02325" "B02326" "B02327" "B02328" "B02330" "B02332" "B02333" "B02334"
## [57] "B02336" "B02337" "B02338" "B02339" "B02340" "B02341" "B02342" "B02343"
## [65] "B02344" "B02345" "B02346" "B02347" "B02348" "B02350" "B02351" "B02352"
## [73] "B02353" "B02354" "B02355" "B02358" "B02360" "B02361" "B02363" "B02364"
## [81] "B02366" "B02367" "B02368" "B02371" "B02372" "B02373" "B02374" "B02375"
## [89] "B02376" "B02377" "B02378" "B02379" "B02380" "B02381" "B02382" "B02383"
## [97] "B02385" "B02386" "B02387" "B02388" "B02389" "B02390" "B02391" "B02392"
## [105] "B02393" "B02394" "B02395" "B02397" "B02438" "B02439" "B02440" "B02441"
## [113] "B02449" "B02456" "B02458" "B02459" "B02460" "B02461" "B02464" "B02478"
## [121] "B02546" "B02569" "B02578" "B02588" "B02594" "B02609" "B02620" "25372"
## [129] "25374" "25375" "25376" "25377" "25379" "25380" "25381" "25382"
## [137] "25383"
gen@ind.names<-paste0(gen@ind.names,samps$Population)
#assign inds to pops
pop(gen)<-gen@ind.names
#calculate pairwise nei's D matrix to use as splitstree input
sample.div <- stamppNeisD(gen, pop = FALSE)
#write matrix to disk and open in splitstree
stamppPhylip(distance.mat=sample.div, file="~/Desktop/marsh.wren.rad/filtered.splits.txt")
#view tree
knitr::include_graphics("/Users/devder/Desktop/marsh.wren.rad/splitstree.png")

calculate pairwise FST between the two pure ends
#define populations
gen@pop<-as.factor(samps$Population)
#calculate pairwise FST
di.heat<-stamppFst(gen)
#extract values
m<-di.heat$Fsts
#fill in upper triangle of matrix
m[upper.tri(m)] <- t(m)[upper.tri(m)]
#melt for plotting
heat <- reshape::melt(m)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
#plot with labels
ggplot(data = heat, aes(x=X1, y=X2, fill=value)) +
geom_tile()+
geom_text(data=heat,aes(label=round(value, 2)))+
theme_minimal()+
scale_fill_gradient2(low = "white", high = "red", space = "Lab", name="Fst") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(angle = 45, hjust = 1))
## Warning: Removed 6 rows containing missing values (geom_text).
