library(geosphere)
## Warning: package 'geosphere' was built under R version 3.5.2
library(StAMPP)
## Warning: package 'StAMPP' was built under R version 3.5.2
## Loading required package: pegas
## Warning: package 'pegas' was built under R version 3.5.2
## Loading required package: ape
## Loading required package: adegenet
## Loading required package: ade4
## Warning: package 'ade4' was built under R version 3.5.2
## 
##    /// adegenet 2.1.3 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## 
## Attaching package: 'pegas'
## The following object is masked from 'package:ade4':
## 
##     amova
## The following object is masked from 'package:ape':
## 
##     mst
library(ggplot2)
library(adegenet)
library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
## 
## Attaching package: 'vcfR'
## The following object is masked from 'package:pegas':
## 
##     getINFO
library(gridExtra)

#read in all significance values
all.sigs<-read.csv("~/Downloads/all.sigs.csv")
locs<-read.csv(file = "~/Downloads/phase1_all_vairables_last.csv")
#make a subset of the locality data that only includes the 14 unique localities once
xy.coords.only<- locs[,c(15,16)]
as.numeric(rownames(unique(xy.coords.only)))
##  [1]   1  47  96 151 197 212 228 288 389 472 563 666 679 710
phase1.uniques<-locs[as.numeric(rownames(unique(xy.coords.only))),]

#generate pairwise geographic distance matrix
unique.xy<-phase1.uniques[,c(15,16)]

#make geographic dist matrix convert to km distance
d.geo <- as.dist(distm(unique.xy, fun=distGeo))
d.geo<-d.geo/1000
d.geo<-as.matrix(d.geo)

#generate pairwise environmental distance matrix for all 5 env variables (3 real and 2 simulated)
d.prec<-as.matrix(dist(phase1.uniques$p_annual))
d.hum<-as.matrix(dist(phase1.uniques$h_mean))
d.temp<-as.matrix(dist(phase1.uniques$t_mean))
d.rand<-as.matrix(dist(phase1.uniques$random))
d.corr<-as.matrix(dist(phase1.uniques$autocorrelated))

#make plots showing the relationship between geographic distance and each environmental value
plot(d.geo,d.prec)

plot(d.geo,d.hum)

plot(d.geo,d.temp)

plot(d.geo,d.rand)

plot(d.geo,d.corr)

analyze effect of spatial autocorrelation on number of GEA outliers identified

#mantel test to quantify the strength of each of these relationships
mantel.prec<-vegan::mantel(xdis = d.geo, ydis = d.prec)
mantel.hum<-vegan::mantel(xdis = d.geo, ydis = d.hum)
mantel.temp<-vegan::mantel(xdis = d.geo, ydis = d.temp)
mantel.rand<-vegan::mantel(xdis = d.geo, ydis = d.rand)
mantel.corr<-vegan::mantel(xdis = d.geo, ydis = d.corr)

#open empty vector
out<-c()
#calc number of sig outliers for each df
out[1]<-sum(all.sigs$lfmm.prec < .01)
out[2]<-sum(all.sigs$lfmm.hum < .01)
out[3]<-sum(all.sigs$lfmm.temp < .01)
out[4]<-sum(all.sigs$lfmm.rand < .01)
out[5]<-sum(all.sigs$lfmm.corr < .01)

out[6]<-sum(all.sigs$baye.prec < .01)
out[7]<-sum(all.sigs$baye.hum < .01)
out[8]<-sum(all.sigs$baye.temp < .01)
out[9]<-sum(all.sigs$baye.rand < .01)
out[10]<-sum(all.sigs$baye.corr < .01)

out[11]<-sum(all.sigs$vim.prec > 1)
out[12]<-sum(all.sigs$vim.hum > 1)
out[13]<-sum(all.sigs$vim.temp > 1)
out[14]<-sum(all.sigs$vim.rand > 1)
out[15]<-sum(all.sigs$vim.corr > 1)
out[16]<-sum(all.sigs$cors.prec < .0001)
out[17]<-sum(all.sigs$cors.hum <.0001)
out[18]<-sum(all.sigs$cors.temp <.0001)
out[19]<-sum(all.sigs$cors.rand <.0001)
out[20]<-sum(all.sigs$cors.corr <.0001)


#make dataframe for plotting
spatial.autocorr<-c(mantel.prec$statistic,mantel.hum$statistic,mantel.temp$statistic,mantel.rand$statistic,mantel.corr$statistic)
spatial.autocorr<-rep(spatial.autocorr, times=4)
spatial.autocorr<-abs(spatial.autocorr)
cors<-data.frame(env.var=rep(c("prec","hum","temp","rand","corr"), times=4),
                 test=c(rep("LFMM2", times=5), rep("BayeScEnv", times=5), rep("r2VIM", times=5),rep("rep. correlations", times=5)),
                 spat.autocorr=spatial.autocorr,
                 num.sig.outliers=out)

#plot 4A
spat.plot<-ggplot(cors, aes(x=spat.autocorr, y=num.sig.outliers, color=test)) +
  geom_point(cex=3, alpha=.6)+
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+
  labs(x="correlation between geographic and environmental distance", y="significant GEA SNPs")
#ggsave("~/Downloads/geographic.genomic.correlations.pdf", height=3, width=5.5)

#test significance of each correlation
cor.test(cors$spat.autocorr[1:5],cors$num.sig.outliers[1:5])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[1:5] and cors$num.sig.outliers[1:5]
## t = 0.59441, df = 3, p-value = 0.5941
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7814647  0.9381856
## sample estimates:
##       cor 
## 0.3246015
cor.test(cors$spat.autocorr[5:10],cors$num.sig.outliers[5:10])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[5:10] and cors$num.sig.outliers[5:10]
## t = 0.11448, df = 4, p-value = 0.9144
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7911054  0.8302039
## sample estimates:
##        cor 
## 0.05714417
cor.test(cors$spat.autocorr[10:15],cors$num.sig.outliers[10:15])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[10:15] and cors$num.sig.outliers[10:15]
## t = 1.6554, df = 4, p-value = 0.1732
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3604838  0.9549990
## sample estimates:
##       cor 
## 0.6376147
cor.test(cors$spat.autocorr[15:20],cors$num.sig.outliers[15:20])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[15:20] and cors$num.sig.outliers[15:20]
## t = 0.2412, df = 4, p-value = 0.8213
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7662890  0.8488145
## sample estimates:
##       cor 
## 0.1197333

analyze effect of genomic autocorrelation of environmental vairables on number of GEA outliers identified

#read in vcfs
vcf.2L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2L.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4486
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4486
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4486
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4486
## All variants processed
vcf.2R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2R.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 5551
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 5551
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 5551
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant: 5551
## All variants processed
vcf.3L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3L.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 3693
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3693
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 3693
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant: 3693
## All variants processed
vcf.3R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3R.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4700
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4700
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4700
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4700
## All variants processed
vcf.X<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.X.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 2123
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 2123
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 2123
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant: 2123
## All variants processed
#combine all the variants into a single vcf
vcf.2L@gt<-rbind(vcf.2L@gt,vcf.2R@gt,vcf.3L@gt,vcf.3R@gt,vcf.X@gt)
vcf.2L@fix<-rbind(vcf.2L@fix,vcf.2R@fix,vcf.3L@fix,vcf.3R@fix,vcf.X@fix)
vcf.2L
## ***** Object of Class vcfR *****
## 765 samples
## 5 CHROMs
## 20,553 variants
## Object size: 439.7 Mb
## 0 percent missing data
## *****        *****         *****
#convert to genlight
gen<-vcfR2genlight(vcf.2L)
## Warning in vcfR2genlight(vcf.2L): Found 445 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 445 loci will be omitted from the genlight object.
#assign pop to individuals
pop(gen)<-locs$latitude
#calculate pairwise Fst between pops across all variants
fsts<-stamppFst(gen)
#convert to distance matrix
fsts<-as.matrix(as.dist(fsts$Fsts))

#calculate mantel r statistic for each variable with genome-wide divergence between pops
fst.prec<-vegan::mantel(xdis = fsts, ydis = d.prec)
fst.hum<-vegan::mantel(xdis = fsts, ydis = d.hum)
fst.temp<-vegan::mantel(xdis = fsts, ydis = d.temp)
fst.rand<-vegan::mantel(xdis = fsts, ydis = d.rand)
fst.corr<-vegan::mantel(xdis = fsts, ydis = d.corr)

#make vector and add to dataframe
genetic.autocorr<-c(fst.prec$statistic,fst.hum$statistic,fst.temp$statistic,fst.rand$statistic,fst.corr$statistic)
genetic.autocorr<-rep(genetic.autocorr, times=4)
cors$gen.autocorr<-abs(genetic.autocorr)

#make plot 4B
gen.autocorr.plot<-ggplot(cors, aes(x=gen.autocorr, y=num.sig.outliers, color=test)) +
  geom_point(cex=3, alpha=.6)+
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+
  labs(x="correlation between genome-wide Fst and environmental divergence", y="significant GEA SNPs")
#ggsave("~/Downloads/env.genomic.correlations.pdf", height=3, width=5.5)

#test significance of each regression line
cor.test(cors$gen.autocorr[1:5],cors$num.sig.outliers[1:5])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[1:5] and cors$num.sig.outliers[1:5]
## t = 0.14649, df = 3, p-value = 0.8928
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8620897  0.8996503
## sample estimates:
##        cor 
## 0.08427773
cor.test(cors$gen.autocorr[5:10],cors$num.sig.outliers[5:10])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[5:10] and cors$num.sig.outliers[5:10]
## t = -0.12419, df = 4, p-value = 0.9072
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8317052  0.7892836
## sample estimates:
##         cor 
## -0.06197752
cor.test(cors$gen.autocorr[10:15],cors$num.sig.outliers[10:15])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[10:15] and cors$num.sig.outliers[10:15]
## t = 2.0006, df = 4, p-value = 0.116
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2449253  0.9649460
## sample estimates:
##       cor 
## 0.7072093
cor.test(cors$gen.autocorr[15:20],cors$num.sig.outliers[15:20])
## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[15:20] and cors$num.sig.outliers[15:20]
## t = 0.19015, df = 4, p-value = 0.8585
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7765634  0.8415662
## sample estimates:
##        cor 
## 0.09464747

test for significant difference in overall number of GEA outliers identified between real and randomly generated variables

#add the variable type to the dataframe
cors$var.type<-c("environmental","environmental","environmental","randomly generated","randomly generated",
                 "environmental","environmental","environmental","randomly generated","randomly generated",
                 "environmental","environmental","environmental","randomly generated","randomly generated",
                 "environmental","environmental","environmental","randomly generated","randomly generated")

#4D
t.test.plot<-ggplot(cors, aes(x=var.type, y=num.sig.outliers, col=var.type)) + 
  geom_boxplot()+
  geom_jitter(position=position_jitter(0), cex=2, alpha=.6)+
  scale_color_manual(values=c("black","black"))+
  theme_classic()+
labs(x="GEA independent variable", y="significant GEA SNPs")
#ggsave("~/Downloads/env.random.comparison.pdf", height=3, width=4.25)

#t test to assess whether we see a significant difference in total number of outliers identified between real and fake variables
t.test(cors$num.sig.outliers[cors$var.type == "environmental"], cors$num.sig.outliers[cors$var.type == "randomly generated"])
## 
##  Welch Two Sample t-test
## 
## data:  cors$num.sig.outliers[cors$var.type == "environmental"] and cors$num.sig.outliers[cors$var.type == "randomly generated"]
## t = 0.13809, df = 14.937, p-value = 0.892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3520.505  4008.088
## sample estimates:
## mean of x mean of y 
##  2393.417  2149.625

visualize the genomic autocorrelation of each variable in each of the four discrete regions of genomic relatedness

#make pairwise correlation matrices for Fst within each of the four distinct regions
#compare pairwise correlation matrices for each env variable with each genomic region (4x5 comparisons)
#calculate the proportion of significant SNPs within each region for each method (3 methods x 4 regions)
#total: 60 comparisons showing correlation between genetic and environmental variation on the x axis, and %outlier SNPs on the x axis

#reset VCFs
vcf.2L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2L.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4486
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4486
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4486
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4486
## All variants processed
vcf.2R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2R.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 5551
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 5551
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 5551
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant: 5551
## All variants processed
vcf.3L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3L.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 3693
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3693
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 3693
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant: 3693
## All variants processed
vcf.3R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3R.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4700
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4700
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4700
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4700
## All variants processed
vcf.X<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.X.vcf.gz.recode.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 2123
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 2123
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 2123
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant: 2123
## All variants processed
#reorder locs file to match the same order as the vcfs
locs<-locs[match(colnames(vcf.2L@gt)[-1],locs$ox_code),]
rownames(locs)<-1:765
#verify that all samples are present in the vcf in the same order as in the locality df
colnames(vcf.2L@gt)[-1] == locs$ox_code
##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [256] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [286] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [316] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [331] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [346] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [361] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [376] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [391] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [406] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [421] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [436] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [451] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [466] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [481] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [496] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [511] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [526] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [541] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [556] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [571] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [586] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [601] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [616] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [631] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [646] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [661] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [676] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [691] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [706] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [721] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [736] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [751] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#make a subset of the locality data that only includes the 14 unique localities once
xy.coords.only<- locs[,c(15,16)]
as.numeric(rownames(unique(xy.coords.only)))
##  [1]   1   2  33 151 254 300 328 344 356 536 619 679 735 751
phase1.uniques<-locs[as.numeric(rownames(unique(xy.coords.only))),]
#make sure these are in the same order
phase1.uniques$t_mean == unique(locs$t_mean)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#generate pairwise environmental distance matrix for all 5 env variables (3 real and 2 simulated)
d.prec<-as.matrix(dist(phase1.uniques$p_annual))
d.hum<-as.matrix(dist(phase1.uniques$h_mean))
d.temp<-as.matrix(dist(phase1.uniques$t_mean))
d.rand<-as.matrix(dist(phase1.uniques$random))
d.corr<-as.matrix(dist(phase1.uniques$autocorrelated))

#generate pairwise genetic distance matrix between the same 14 localities,
#using each of the 4 discrete regions of relatedness throughout the genome
#convert vcfR to genlight
gen.2La<-vcfR2genlight(vcf.2L)
## Warning in vcfR2genlight(vcf.2L): Found 122 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 122 loci will be omitted from the genlight object.
#subset 2La
gen.2La<-gen.2La[,gen.2La$position > 20524058 & gen.2La$position < 42165532]
gen.2La
##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  1,963 binary SNPs, size: 1.6 Mb
##  123 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  1963 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names
#assign pop to individuals
pop(gen.2La)<-locs$latitude
#calculate pairwise Fst between pops
fst.2La<-stamppFst(gen.2La)
#convert to distance matrix
fst.2La<-as.matrix(as.dist(fst.2La$Fsts))

#subset 2Rb
#convert vcfR to genlight
gen.2Rb<-vcfR2genlight(vcf.2R)
## Warning in vcfR2genlight(vcf.2R): Found 99 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 99 loci will be omitted from the genlight object.
#subset 2Rb out of 2R
gen.2Rb<-gen.2Rb[,gen.2Rb$position > 19023925 & gen.2Rb$position < 26758676]
gen.2Rb
##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  740 binary SNPs, size: 1.4 Mb
##  73 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  740 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names
#assign pop to individuals
pop(gen.2Rb)<-locs$latitude
#calculate pairwise Fst between pops
fst.2Rb<-stamppFst(gen.2Rb)
#convert to distance matrix
fst.2Rb<-as.matrix(as.dist(fst.2Rb$Fsts))

#subset geography
#3R 0-25Mb
#convert vcfR to genlight
gen.geog<-vcfR2genlight(vcf.3R)
## Warning in vcfR2genlight(vcf.3R): Found 115 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 115 loci will be omitted from the genlight object.
#subset geog out of 3R
gen.geog<-gen.geog[,gen.geog$position < 25000000]
gen.geog
##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  2,277 binary SNPs, size: 1.7 Mb
##  129 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  2277 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names
#assign pop to individuals
pop(gen.geog)<-locs$latitude
#calculate pairwise Fst between pops
fst.geog<-stamppFst(gen.geog)
#convert to distance matrix
fst.geog<-as.matrix(as.dist(fst.geog$Fsts))


#subset species
#2R 35-55Mb
#convert vcfR to genlight
gen.spec<-vcfR2genlight(vcf.2R)
## Warning in vcfR2genlight(vcf.2R): Found 99 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 99 loci will be omitted from the genlight object.
#subset spec out of X
gen.spec<-gen.spec[,gen.spec$position > 35000000 & gen.spec$position < 55000000]
gen.spec
##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  1,707 binary SNPs, size: 1.6 Mb
##  173 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  1707 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names
#assign pop to individuals
pop(gen.spec)<-locs$latitude
#calculate pairwise Fst between pops
fst.spec<-stamppFst(gen.spec)
#convert to distance matrix
fst.spec<-as.matrix(as.dist(fst.spec$Fsts))

#####
####
###
##
#test for correlation between each of the 4 discrete genomic regions x 5 environmental variables
#open vectors to store test info and r value
test<-c()
storer<-c()
#plot figure
#pdf(file="~/Downloads/corrs.nolabs.pdf", width=10,height=8)
par(mar=c(2,2,2,2))
par(mfrow=c(4,5))

#start with 2Rb
#d.prec x gen structure in 2Rb
s<-ape::mantel.test(d.prec,fst.2Rb)
r<-vegan::mantel(xdis = d.prec, ydis = fst.2Rb)
plot(d.prec,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[1]<-"prec.2Rb"
storer[1]<-r$statistic

#d.hum x gen structure in 2Rb
s<-ape::mantel.test(d.hum,fst.2Rb)
r<-vegan::mantel(xdis = d.hum, ydis = fst.2Rb)
plot(d.hum,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[2]<-"hum.2Rb"
storer[2]<-r$statistic

#d.temp x gen structure in 2Rb
s<-ape::mantel.test(d.temp,fst.2Rb)
r<-vegan::mantel(xdis = d.temp, ydis = fst.2Rb)
plot(d.temp,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[3]<-"temp.2Rb"
storer[3]<-r$statistic

#d.corr x gen structure in 2Rb
s<-ape::mantel.test(d.corr,fst.2Rb)
r<-vegan::mantel(xdis = d.corr, ydis = fst.2Rb)
plot(d.corr,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[4]<-"corr.2Rb"
storer[4]<-r$statistic

#d.rand x gen structure in 2Rb
s<-ape::mantel.test(d.rand,fst.2Rb)
r<-vegan::mantel(xdis = d.rand, ydis = fst.2Rb)
plot(d.rand,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[5]<-"rand.2Rb"
storer[5]<-r$statistic


#####
####
###
##now 2La
#d.prec x gen structure in 2La
s<-ape::mantel.test(d.prec,fst.2La)
r<-vegan::mantel(xdis = d.prec, ydis = fst.2La)
plot(d.prec,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[6]<-"prec.2La"
storer[6]<-r$statistic

#d.hum x gen structure in 2La
s<-ape::mantel.test(d.hum,fst.2La)
r<-vegan::mantel(xdis = d.hum, ydis = fst.2La)
plot(d.hum,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[7]<-"hum.2La"
storer[7]<-r$statistic

#d.temp x gen structure in 2La
s<-ape::mantel.test(d.temp,fst.2La)
r<-vegan::mantel(xdis = d.temp, ydis = fst.2La)
plot(d.temp,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[8]<-"temp.2La"
storer[8]<-r$statistic

#d.corr x gen structure in 2La
s<-ape::mantel.test(d.corr,fst.2La)
r<-vegan::mantel(xdis = d.corr, ydis = fst.2La)
plot(d.corr,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[9]<-"corr.2La"
storer[9]<-r$statistic

#d.rand x gen structure in 2La
s<-ape::mantel.test(d.rand,fst.2La)
r<-vegan::mantel(xdis = d.rand, ydis = fst.2La)
plot(d.rand,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[10]<-"rand.2La"
storer[10]<-r$statistic


#####
####
###
##now geog
#d.prec x gen structure in geog
s<-ape::mantel.test(d.prec,fst.geog)
r<-vegan::mantel(xdis = d.prec, ydis = fst.geog)
plot(d.prec,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[11]<-"prec.geog"
storer[11]<-r$statistic

#d.hum x gen structure in geog
s<-ape::mantel.test(d.hum,fst.geog)
r<-vegan::mantel(xdis = d.hum, ydis = fst.geog)
plot(d.hum,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
test[12]<-"hum.geog"
storer[12]<-r$statistic

#d.temp x gen structure in geog
s<-ape::mantel.test(d.temp,fst.geog)
r<-vegan::mantel(xdis = d.temp, ydis = fst.geog)
plot(d.temp,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
test[13]<-"temp.geog"
storer[13]<-r$statistic

#d.corr x gen structure in geog
s<-ape::mantel.test(d.corr,fst.geog)
r<-vegan::mantel(xdis = d.corr, ydis = fst.geog)
plot(d.corr,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 3))))
test[14]<-"corr.geog"
storer[14]<-r$statistic

#d.rand x gen structure in geog
s<-ape::mantel.test(d.rand,fst.geog)
r<-vegan::mantel(xdis = d.rand, ydis = fst.geog)
plot(d.rand,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
test[15]<-"rand.geog"
storer[15]<-r$statistic

#####
####
###
##
#now species
#d.prec x gen structure in spec
s<-ape::mantel.test(d.prec,fst.spec)
r<-vegan::mantel(xdis = d.prec, ydis = fst.spec)
plot(d.prec,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[16]<-"prec.species"
storer[16]<-r$statistic

#d.hum x gen structure in spec
s<-ape::mantel.test(d.hum,fst.spec)
r<-vegan::mantel(xdis = d.hum, ydis = fst.spec)
plot(d.hum,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[17]<-"hum.species"
storer[17]<-r$statistic

#d.temp x gen structure in spec
s<-ape::mantel.test(d.temp,fst.spec)
r<-vegan::mantel(xdis = d.temp, ydis = fst.spec)
plot(d.temp,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[18]<-"temp.species"
storer[18]<-r$statistic

#d.corr x gen structure in spec
s<-ape::mantel.test(d.corr,fst.spec)
r<-vegan::mantel(xdis = d.corr, ydis = fst.spec)
plot(d.corr,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[19]<-"corr.species"
storer[19]<-r$statistic

#d.rand x gen structure in spec
s<-ape::mantel.test(d.rand,fst.spec)
r<-vegan::mantel(xdis = d.rand, ydis = fst.spec)
plot(d.rand,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))

#store values
test[20]<-"rand.species"
storer[20]<-r$statistic

#shut down the figure
#dev.off()

calculate proportion of tested SNPs in each genomic region identified as significant for each GEA test (4 methods x 4 genomic regions x 5 environmental vairables = 80 data points)

#create dataframe to hold comparisons between significant percentage and degree of confounding genetic variation
f<-data.frame(test=rep(test, times=4),
              r=rep(storer, times=4))

#create vector to hold the significant proportion for each of the tests in each of the genomic regions
prop.sig<-c()
test<-c(rep("LFMM2", times=20),rep("BayeScEnv", times=20),rep("r2VIM", times=20),rep("rep. correlations", times=20))
#calc sig prop for each test
sigs.2Rb<-all.sigs[all.sigs$Chromosome == "2R" & all.sigs$Position > 20524058 & all.sigs$Position < 42165532,]
sigs.2La<-all.sigs[all.sigs$Chromosome == "2L" & all.sigs$Position > 19023925 & all.sigs$Position < 26758676,]
sigs.geog<-all.sigs[all.sigs$Chromosome == "3R" & all.sigs$Position < 25000000,]
sigs.species<-all.sigs[all.sigs$Chromosome == "2R" & all.sigs$Position > 35000000 & all.sigs$Position < 55000000,]

#lfmm test 2Rb, 2La, geog, species
prop.sig[1]<-sum(sigs.2Rb$lfmm.prec < .01)/nrow(sigs.2Rb)
prop.sig[2]<-sum(sigs.2Rb$lfmm.hum < .01)/nrow(sigs.2Rb)
prop.sig[3]<-sum(sigs.2Rb$lfmm.temp < .01)/nrow(sigs.2Rb)
prop.sig[4]<-sum(sigs.2Rb$lfmm.corr < .01)/nrow(sigs.2Rb)
prop.sig[5]<-sum(sigs.2Rb$lfmm.rand < .01)/nrow(sigs.2Rb)
prop.sig[6]<-sum(sigs.2La$lfmm.prec < .01)/nrow(sigs.2La)
prop.sig[7]<-sum(sigs.2La$lfmm.hum < .01)/nrow(sigs.2La)
prop.sig[8]<-sum(sigs.2La$lfmm.temp < .01)/nrow(sigs.2La)
prop.sig[9]<-sum(sigs.2La$lfmm.corr < .01)/nrow(sigs.2La)
prop.sig[10]<-sum(sigs.2La$lfmm.rand < .01)/nrow(sigs.2La)
prop.sig[11]<-sum(sigs.geog$lfmm.prec < .01)/nrow(sigs.geog)
prop.sig[12]<-sum(sigs.geog$lfmm.hum < .01)/nrow(sigs.geog)
prop.sig[13]<-sum(sigs.geog$lfmm.temp < .01)/nrow(sigs.geog)
prop.sig[14]<-sum(sigs.geog$lfmm.corr < .01)/nrow(sigs.geog)
prop.sig[15]<-sum(sigs.geog$lfmm.rand < .01)/nrow(sigs.geog)
prop.sig[16]<-sum(sigs.species$lfmm.prec < .01)/nrow(sigs.species)
prop.sig[17]<-sum(sigs.species$lfmm.hum < .01)/nrow(sigs.species)
prop.sig[18]<-sum(sigs.species$lfmm.temp < .01)/nrow(sigs.species)
prop.sig[19]<-sum(sigs.species$lfmm.corr < .01)/nrow(sigs.species)
prop.sig[20]<-sum(sigs.species$lfmm.rand < .01)/nrow(sigs.species)

#bayescenv test 2Rb, 2La, geog, species
prop.sig[21]<-sum(sigs.2Rb$baye.prec < .01)/nrow(sigs.2Rb)
prop.sig[22]<-sum(sigs.2Rb$baye.hum < .01)/nrow(sigs.2Rb)
prop.sig[23]<-sum(sigs.2Rb$baye.temp < .01)/nrow(sigs.2Rb)
prop.sig[24]<-sum(sigs.2Rb$baye.corr < .01)/nrow(sigs.2Rb)
prop.sig[25]<-sum(sigs.2Rb$baye.rand < .01)/nrow(sigs.2Rb)
prop.sig[26]<-sum(sigs.2La$baye.prec < .01)/nrow(sigs.2La)
prop.sig[27]<-sum(sigs.2La$baye.hum < .01)/nrow(sigs.2La)
prop.sig[28]<-sum(sigs.2La$baye.temp < .01)/nrow(sigs.2La)
prop.sig[29]<-sum(sigs.2La$baye.corr < .01)/nrow(sigs.2La)
prop.sig[30]<-sum(sigs.2La$baye.rand < .01)/nrow(sigs.2La)
prop.sig[31]<-sum(sigs.geog$baye.prec < .01)/nrow(sigs.geog)
prop.sig[32]<-sum(sigs.geog$baye.hum < .01)/nrow(sigs.geog)
prop.sig[33]<-sum(sigs.geog$baye.temp < .01)/nrow(sigs.geog)
prop.sig[34]<-sum(sigs.geog$baye.corr < .01)/nrow(sigs.geog)
prop.sig[35]<-sum(sigs.geog$baye.rand < .01)/nrow(sigs.geog)
prop.sig[36]<-sum(sigs.species$baye.prec < .01)/nrow(sigs.species)
prop.sig[37]<-sum(sigs.species$baye.hum < .01)/nrow(sigs.species)
prop.sig[38]<-sum(sigs.species$baye.temp < .01)/nrow(sigs.species)
prop.sig[39]<-sum(sigs.species$baye.corr < .01)/nrow(sigs.species)
prop.sig[40]<-sum(sigs.species$baye.rand < .01)/nrow(sigs.species)

#r2VIM test 2Rb, 2La, geog, species
prop.sig[41]<-sum(sigs.2Rb$vim.prec > 1)/nrow(sigs.2Rb)
prop.sig[42]<-sum(sigs.2Rb$vim.hum > 1)/nrow(sigs.2Rb)
prop.sig[43]<-sum(sigs.2Rb$vim.temp > 1)/nrow(sigs.2Rb)
prop.sig[44]<-sum(sigs.2Rb$vim.corr > 1)/nrow(sigs.2Rb)
prop.sig[45]<-sum(sigs.2Rb$vim.rand > 1)/nrow(sigs.2Rb)
prop.sig[46]<-sum(sigs.2La$vim.prec > 1)/nrow(sigs.2La)
prop.sig[47]<-sum(sigs.2La$vim.hum > 1)/nrow(sigs.2La)
prop.sig[48]<-sum(sigs.2La$vim.temp > 1)/nrow(sigs.2La)
prop.sig[49]<-sum(sigs.2La$vim.corr > 1)/nrow(sigs.2La)
prop.sig[50]<-sum(sigs.2La$vim.rand > 1)/nrow(sigs.2La)
prop.sig[51]<-sum(sigs.geog$vim.prec > 1)/nrow(sigs.geog)
prop.sig[52]<-sum(sigs.geog$vim.hum > 1)/nrow(sigs.geog)
prop.sig[53]<-sum(sigs.geog$vim.temp > 1)/nrow(sigs.geog)
prop.sig[54]<-sum(sigs.geog$vim.corr > 1)/nrow(sigs.geog)
prop.sig[55]<-sum(sigs.geog$vim.rand > 1)/nrow(sigs.geog)
prop.sig[56]<-sum(sigs.species$vim.prec > 1)/nrow(sigs.species)
prop.sig[57]<-sum(sigs.species$vim.hum > 1)/nrow(sigs.species)
prop.sig[58]<-sum(sigs.species$vim.temp > 1)/nrow(sigs.species)
prop.sig[59]<-sum(sigs.species$vim.corr > 1)/nrow(sigs.species)
prop.sig[60]<-sum(sigs.species$vim.rand > 1)/nrow(sigs.species)

#cors test 2Rb, 2La, geog, species
prop.sig[61]<-sum(sigs.2Rb$cors.prec < .0001)/nrow(sigs.2Rb)
prop.sig[62]<-sum(sigs.2Rb$cors.hum < .0001)/nrow(sigs.2Rb)
prop.sig[63]<-sum(sigs.2Rb$cors.temp < .0001)/nrow(sigs.2Rb)
prop.sig[64]<-sum(sigs.2Rb$cors.corr < .0001)/nrow(sigs.2Rb)
prop.sig[65]<-sum(sigs.2Rb$cors.rand < .0001)/nrow(sigs.2Rb)
prop.sig[66]<-sum(sigs.2La$cors.prec < .0001)/nrow(sigs.2La)
prop.sig[67]<-sum(sigs.2La$cors.hum < .0001)/nrow(sigs.2La)
prop.sig[68]<-sum(sigs.2La$cors.temp < .0001)/nrow(sigs.2La)
prop.sig[69]<-sum(sigs.2La$cors.corr < .0001)/nrow(sigs.2La)
prop.sig[70]<-sum(sigs.2La$cors.rand < .0001)/nrow(sigs.2La)
prop.sig[71]<-sum(sigs.geog$cors.prec < .0001)/nrow(sigs.geog)
prop.sig[72]<-sum(sigs.geog$cors.hum < .0001)/nrow(sigs.geog)
prop.sig[73]<-sum(sigs.geog$cors.temp < .0001)/nrow(sigs.geog)
prop.sig[74]<-sum(sigs.geog$cors.corr < .0001)/nrow(sigs.geog)
prop.sig[75]<-sum(sigs.geog$cors.rand < .0001)/nrow(sigs.geog)
prop.sig[76]<-sum(sigs.species$cors.prec < .0001)/nrow(sigs.species)
prop.sig[77]<-sum(sigs.species$cors.hum < .0001)/nrow(sigs.species)
prop.sig[78]<-sum(sigs.species$cors.temp < .0001)/nrow(sigs.species)
prop.sig[79]<-sum(sigs.species$cors.corr < .0001)/nrow(sigs.species)
prop.sig[80]<-sum(sigs.species$cors.rand < .0001)/nrow(sigs.species)

#add to dataframe
f$comps<-test
f$prop.sig<-prop.sig

#plot the resulting relationship
#4C
gen.regions.plot<-ggplot(f, aes(x=abs(r), y=prop.sig, color=comps)) + 
  geom_point(cex=3, alpha=.6)+
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+
  labs(x="correlation between genetic and environmental divergence \n for a given region of genomic relatedness",
       y="proportion of significant GEA SNPs")
#ggsave("~/Downloads/corr.snps.genomic.regions.pdf", height=3, width=5.5)

#test significance of each correlation
cor.test(f$r[1:20],f$prop.sig[1:20])
## 
##  Pearson's product-moment correlation
## 
## data:  f$r[1:20] and f$prop.sig[1:20]
## t = -1.1491, df = 18, p-value = 0.2655
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6309606  0.2047751
## sample estimates:
##        cor 
## -0.2614363
cor.test(f$r[20:40],f$prop.sig[20:40])
## 
##  Pearson's product-moment correlation
## 
## data:  f$r[20:40] and f$prop.sig[20:40]
## t = -0.45492, df = 19, p-value = 0.6543
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5125222  0.3432671
## sample estimates:
##        cor 
## -0.1038014
cor.test(f$r[40:60],f$prop.sig[40:60])
## 
##  Pearson's product-moment correlation
## 
## data:  f$r[40:60] and f$prop.sig[40:60]
## t = 0.93515, df = 19, p-value = 0.3614
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2440175  0.5881901
## sample estimates:
##       cor 
## 0.2097658
cor.test(f$r[60:80],f$prop.sig[60:80])
## 
##  Pearson's product-moment correlation
## 
## data:  f$r[60:80] and f$prop.sig[60:80]
## t = 2.8018, df = 19, p-value = 0.01138
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1422216 0.7883768
## sample estimates:
##       cor 
## 0.5407113
h <- arrangeGrob(spat.plot, gen.autocorr.plot, gen.regions.plot, t.test.plot, nrow = 2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggsave(h, filename="~/Desktop/anoph.phase2/corr.plots.pdf", width = 10, height = 6, units ="in")