library(geosphere)

## Warning: package 'geosphere' was built under R version 3.5.2

library(StAMPP)

## Warning: package 'StAMPP' was built under R version 3.5.2

## Loading required package: pegas

## Warning: package 'pegas' was built under R version 3.5.2

## Loading required package: ape

## Loading required package: adegenet

## Loading required package: ade4

## Warning: package 'ade4' was built under R version 3.5.2

## 
##    /// adegenet 2.1.3 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()

## 
## Attaching package: 'pegas'

## The following object is masked from 'package:ade4':
## 
##     amova

## The following object is masked from 'package:ape':
## 
##     mst

library(ggplot2)
library(adegenet)
library(vcfR)

## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.12.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****

## 
## Attaching package: 'vcfR'

## The following object is masked from 'package:pegas':
## 
##     getINFO

library(gridExtra)

#read in all significance values
all.sigs<-read.csv("~/Downloads/all.sigs.csv")
locs<-read.csv(file = "~/Downloads/phase1_all_vairables_last.csv")
#make a subset of the locality data that only includes the 14 unique localities once
xy.coords.only<- locs[,c(15,16)]
as.numeric(rownames(unique(xy.coords.only)))

##  [1]   1  47  96 151 197 212 228 288 389 472 563 666 679 710

phase1.uniques<-locs[as.numeric(rownames(unique(xy.coords.only))),]

#generate pairwise geographic distance matrix
unique.xy<-phase1.uniques[,c(15,16)]

#make geographic dist matrix convert to km distance
d.geo <- as.dist(distm(unique.xy, fun=distGeo))
d.geo<-d.geo/1000
d.geo<-as.matrix(d.geo)

#generate pairwise environmental distance matrix for all 5 env variables (3 real and 2 simulated)
d.prec<-as.matrix(dist(phase1.uniques$p_annual))
d.hum<-as.matrix(dist(phase1.uniques$h_mean))
d.temp<-as.matrix(dist(phase1.uniques$t_mean))
d.rand<-as.matrix(dist(phase1.uniques$random))
d.corr<-as.matrix(dist(phase1.uniques$autocorrelated))

#make plots showing the relationship between geographic distance and each environmental value
plot(d.geo,d.prec)

plot(d.geo,d.hum)

plot(d.geo,d.temp)

plot(d.geo,d.rand)

plot(d.geo,d.corr)

analyze effect of spatial autocorrelation on number of GEA outliers identified

#mantel test to quantify the strength of each of these relationships
mantel.prec<-vegan::mantel(xdis = d.geo, ydis = d.prec)
mantel.hum<-vegan::mantel(xdis = d.geo, ydis = d.hum)
mantel.temp<-vegan::mantel(xdis = d.geo, ydis = d.temp)
mantel.rand<-vegan::mantel(xdis = d.geo, ydis = d.rand)
mantel.corr<-vegan::mantel(xdis = d.geo, ydis = d.corr)

#open empty vector
out<-c()
#calc number of sig outliers for each df
out[1]<-sum(all.sigs$lfmm.prec < .01)
out[2]<-sum(all.sigs$lfmm.hum < .01)
out[3]<-sum(all.sigs$lfmm.temp < .01)
out[4]<-sum(all.sigs$lfmm.rand < .01)
out[5]<-sum(all.sigs$lfmm.corr < .01)

out[6]<-sum(all.sigs$baye.prec < .01)
out[7]<-sum(all.sigs$baye.hum < .01)
out[8]<-sum(all.sigs$baye.temp < .01)
out[9]<-sum(all.sigs$baye.rand < .01)
out[10]<-sum(all.sigs$baye.corr < .01)

out[11]<-sum(all.sigs$vim.prec > 1)
out[12]<-sum(all.sigs$vim.hum > 1)
out[13]<-sum(all.sigs$vim.temp > 1)
out[14]<-sum(all.sigs$vim.rand > 1)
out[15]<-sum(all.sigs$vim.corr > 1)
out[16]<-sum(all.sigs$cors.prec < .0001)
out[17]<-sum(all.sigs$cors.hum <.0001)
out[18]<-sum(all.sigs$cors.temp <.0001)
out[19]<-sum(all.sigs$cors.rand <.0001)
out[20]<-sum(all.sigs$cors.corr <.0001)


#make dataframe for plotting
spatial.autocorr<-c(mantel.prec$statistic,mantel.hum$statistic,mantel.temp$statistic,mantel.rand$statistic,mantel.corr$statistic)
spatial.autocorr<-rep(spatial.autocorr, times=4)
spatial.autocorr<-abs(spatial.autocorr)
cors<-data.frame(env.var=rep(c("prec","hum","temp","rand","corr"), times=4),
                 test=c(rep("LFMM2", times=5), rep("BayeScEnv", times=5), rep("r2VIM", times=5),rep("rep. correlations", times=5)),
                 spat.autocorr=spatial.autocorr,
                 num.sig.outliers=out)

#plot 4A
spat.plot<-ggplot(cors, aes(x=spat.autocorr, y=num.sig.outliers, color=test)) +
  geom_point(cex=3, alpha=.6)+
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+
  labs(x="correlation between geographic and environmental distance", y="significant GEA SNPs")
#ggsave("~/Downloads/geographic.genomic.correlations.pdf", height=3, width=5.5)

#test significance of each correlation
cor.test(cors$spat.autocorr[1:5],cors$num.sig.outliers[1:5])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[1:5] and cors$num.sig.outliers[1:5]
## t = 0.59441, df = 3, p-value = 0.5941
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7814647  0.9381856
## sample estimates:
##       cor 
## 0.3246015

cor.test(cors$spat.autocorr[5:10],cors$num.sig.outliers[5:10])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[5:10] and cors$num.sig.outliers[5:10]
## t = 0.11448, df = 4, p-value = 0.9144
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7911054  0.8302039
## sample estimates:
##        cor 
## 0.05714417

cor.test(cors$spat.autocorr[10:15],cors$num.sig.outliers[10:15])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[10:15] and cors$num.sig.outliers[10:15]
## t = 1.6554, df = 4, p-value = 0.1732
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3604838  0.9549990
## sample estimates:
##       cor 
## 0.6376147

cor.test(cors$spat.autocorr[15:20],cors$num.sig.outliers[15:20])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$spat.autocorr[15:20] and cors$num.sig.outliers[15:20]
## t = 0.2412, df = 4, p-value = 0.8213
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7662890  0.8488145
## sample estimates:
##       cor 
## 0.1197333

analyze effect of genomic autocorrelation of environmental vairables on number of GEA outliers identified

#read in vcfs
vcf.2L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2L.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4486
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4486
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4486
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4486
## All variants processed

vcf.2R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2R.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 5551
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 5551
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 5551
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant: 5551
## All variants processed

vcf.3L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3L.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 3693
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3693
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 3693
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant: 3693
## All variants processed

vcf.3R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3R.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4700
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4700
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4700
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4700
## All variants processed

vcf.X<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.X.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 2123
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 2123
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 2123
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant: 2123
## All variants processed

#combine all the variants into a single vcf
vcf.2L@gt<-rbind(vcf.2L@gt,vcf.2R@gt,vcf.3L@gt,vcf.3R@gt,vcf.X@gt)
vcf.2L@fix<-rbind(vcf.2L@fix,vcf.2R@fix,vcf.3L@fix,vcf.3R@fix,vcf.X@fix)
vcf.2L

## ***** Object of Class vcfR *****
## 765 samples
## 5 CHROMs
## 20,553 variants
## Object size: 439.7 Mb
## 0 percent missing data
## *****        *****         *****

#convert to genlight
gen<-vcfR2genlight(vcf.2L)

## Warning in vcfR2genlight(vcf.2L): Found 445 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 445 loci will be omitted from the genlight object.

#assign pop to individuals
pop(gen)<-locs$latitude
#calculate pairwise Fst between pops across all variants
fsts<-stamppFst(gen)
#convert to distance matrix
fsts<-as.matrix(as.dist(fsts$Fsts))

#calculate mantel r statistic for each variable with genome-wide divergence between pops
fst.prec<-vegan::mantel(xdis = fsts, ydis = d.prec)
fst.hum<-vegan::mantel(xdis = fsts, ydis = d.hum)
fst.temp<-vegan::mantel(xdis = fsts, ydis = d.temp)
fst.rand<-vegan::mantel(xdis = fsts, ydis = d.rand)
fst.corr<-vegan::mantel(xdis = fsts, ydis = d.corr)

#make vector and add to dataframe
genetic.autocorr<-c(fst.prec$statistic,fst.hum$statistic,fst.temp$statistic,fst.rand$statistic,fst.corr$statistic)
genetic.autocorr<-rep(genetic.autocorr, times=4)
cors$gen.autocorr<-abs(genetic.autocorr)

#make plot 4B
gen.autocorr.plot<-ggplot(cors, aes(x=gen.autocorr, y=num.sig.outliers, color=test)) +
  geom_point(cex=3, alpha=.6)+
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+
  labs(x="correlation between genome-wide Fst and environmental divergence", y="significant GEA SNPs")
#ggsave("~/Downloads/env.genomic.correlations.pdf", height=3, width=5.5)

#test significance of each regression line
cor.test(cors$gen.autocorr[1:5],cors$num.sig.outliers[1:5])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[1:5] and cors$num.sig.outliers[1:5]
## t = 0.14649, df = 3, p-value = 0.8928
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8620897  0.8996503
## sample estimates:
##        cor 
## 0.08427773

cor.test(cors$gen.autocorr[5:10],cors$num.sig.outliers[5:10])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[5:10] and cors$num.sig.outliers[5:10]
## t = -0.12419, df = 4, p-value = 0.9072
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8317052  0.7892836
## sample estimates:
##         cor 
## -0.06197752

cor.test(cors$gen.autocorr[10:15],cors$num.sig.outliers[10:15])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[10:15] and cors$num.sig.outliers[10:15]
## t = 2.0006, df = 4, p-value = 0.116
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2449253  0.9649460
## sample estimates:
##       cor 
## 0.7072093

cor.test(cors$gen.autocorr[15:20],cors$num.sig.outliers[15:20])

## 
##  Pearson's product-moment correlation
## 
## data:  cors$gen.autocorr[15:20] and cors$num.sig.outliers[15:20]
## t = 0.19015, df = 4, p-value = 0.8585
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7765634  0.8415662
## sample estimates:
##        cor 
## 0.09464747

test for significant difference in overall number of GEA outliers identified between real and randomly generated variables

#add the variable type to the dataframe
cors$var.type<-c("environmental","environmental","environmental","randomly generated","randomly generated",
                 "environmental","environmental","environmental","randomly generated","randomly generated",
                 "environmental","environmental","environmental","randomly generated","randomly generated",
                 "environmental","environmental","environmental","randomly generated","randomly generated")

#4D
t.test.plot<-ggplot(cors, aes(x=var.type, y=num.sig.outliers, col=var.type)) + 
  geom_boxplot()+
  geom_jitter(position=position_jitter(0), cex=2, alpha=.6)+
  scale_color_manual(values=c("black","black"))+
  theme_classic()+
labs(x="GEA independent variable", y="significant GEA SNPs")
#ggsave("~/Downloads/env.random.comparison.pdf", height=3, width=4.25)

#t test to assess whether we see a significant difference in total number of outliers identified between real and fake variables
t.test(cors$num.sig.outliers[cors$var.type == "environmental"], cors$num.sig.outliers[cors$var.type == "randomly generated"])

## 
##  Welch Two Sample t-test
## 
## data:  cors$num.sig.outliers[cors$var.type == "environmental"] and cors$num.sig.outliers[cors$var.type == "randomly generated"]
## t = 0.13809, df = 14.937, p-value = 0.892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3520.505  4008.088
## sample estimates:
## mean of x mean of y 
##  2393.417  2149.625

visualize the genomic autocorrelation of each variable in each of the four discrete regions of genomic relatedness

#make pairwise correlation matrices for Fst within each of the four distinct regions
#compare pairwise correlation matrices for each env variable with each genomic region (4x5 comparisons)
#calculate the proportion of significant SNPs within each region for each method (3 methods x 4 regions)
#total: 60 comparisons showing correlation between genetic and environmental variation on the x axis, and %outlier SNPs on the x axis

#reset VCFs
vcf.2L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2L.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4486
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4486
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4486
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4486
## All variants processed

vcf.2R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.2R.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 5551
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 5551
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 5551
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant: 5551
## All variants processed

vcf.3L<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3L.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 3693
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 3693
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 3693
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant: 3693
## All variants processed

vcf.3R<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.3R.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 4700
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 4700
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 4700
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant: 4700
## All variants processed

vcf.X<-read.vcfR("~/Downloads/ag1000g.phase1.ar3.pass.biallelic.maf05.ld10000.X.vcf.gz.recode.vcf")

## Scanning file to determine attributes.
## File attributes:
##   meta lines: 91
##   header_line: 92
##   variant count: 2123
##   column count: 774
## 
Meta line 91 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 2123
##   Character matrix gt cols: 774
##   skip: 0
##   nrows: 2123
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant: 2123
## All variants processed

#reorder locs file to match the same order as the vcfs
locs<-locs[match(colnames(vcf.2L@gt)[-1],locs$ox_code),]
rownames(locs)<-1:765
#verify that all samples are present in the vcf in the same order as in the locality df
colnames(vcf.2L@gt)[-1] == locs$ox_code

##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [256] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [286] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [316] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [331] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [346] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [361] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [376] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [391] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [406] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [421] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [436] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [451] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [466] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [481] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [496] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [511] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [526] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [541] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [556] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [571] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [586] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [601] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [616] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [631] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [646] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [661] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [676] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [691] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [706] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [721] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [736] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [751] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

#make a subset of the locality data that only includes the 14 unique localities once
xy.coords.only<- locs[,c(15,16)]
as.numeric(rownames(unique(xy.coords.only)))

##  [1]   1   2  33 151 254 300 328 344 356 536 619 679 735 751

phase1.uniques<-locs[as.numeric(rownames(unique(xy.coords.only))),]
#make sure these are in the same order
phase1.uniques$t_mean == unique(locs$t_mean)

##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

#generate pairwise environmental distance matrix for all 5 env variables (3 real and 2 simulated)
d.prec<-as.matrix(dist(phase1.uniques$p_annual))
d.hum<-as.matrix(dist(phase1.uniques$h_mean))
d.temp<-as.matrix(dist(phase1.uniques$t_mean))
d.rand<-as.matrix(dist(phase1.uniques$random))
d.corr<-as.matrix(dist(phase1.uniques$autocorrelated))

#generate pairwise genetic distance matrix between the same 14 localities,
#using each of the 4 discrete regions of relatedness throughout the genome
#convert vcfR to genlight
gen.2La<-vcfR2genlight(vcf.2L)

## Warning in vcfR2genlight(vcf.2L): Found 122 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 122 loci will be omitted from the genlight object.

#subset 2La
gen.2La<-gen.2La[,gen.2La$position > 20524058 & gen.2La$position < 42165532]
gen.2La

##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  1,963 binary SNPs, size: 1.6 Mb
##  123 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  1963 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names

#assign pop to individuals
pop(gen.2La)<-locs$latitude
#calculate pairwise Fst between pops
fst.2La<-stamppFst(gen.2La)
#convert to distance matrix
fst.2La<-as.matrix(as.dist(fst.2La$Fsts))

#subset 2Rb
#convert vcfR to genlight
gen.2Rb<-vcfR2genlight(vcf.2R)

## Warning in vcfR2genlight(vcf.2R): Found 99 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 99 loci will be omitted from the genlight object.

#subset 2Rb out of 2R
gen.2Rb<-gen.2Rb[,gen.2Rb$position > 19023925 & gen.2Rb$position < 26758676]
gen.2Rb

##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  740 binary SNPs, size: 1.4 Mb
##  73 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  740 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names

#assign pop to individuals
pop(gen.2Rb)<-locs$latitude
#calculate pairwise Fst between pops
fst.2Rb<-stamppFst(gen.2Rb)
#convert to distance matrix
fst.2Rb<-as.matrix(as.dist(fst.2Rb$Fsts))

#subset geography
#3R 0-25Mb
#convert vcfR to genlight
gen.geog<-vcfR2genlight(vcf.3R)

## Warning in vcfR2genlight(vcf.3R): Found 115 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 115 loci will be omitted from the genlight object.

#subset geog out of 3R
gen.geog<-gen.geog[,gen.geog$position < 25000000]
gen.geog

##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  2,277 binary SNPs, size: 1.7 Mb
##  129 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  2277 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names

#assign pop to individuals
pop(gen.geog)<-locs$latitude
#calculate pairwise Fst between pops
fst.geog<-stamppFst(gen.geog)
#convert to distance matrix
fst.geog<-as.matrix(as.dist(fst.geog$Fsts))


#subset species
#2R 35-55Mb
#convert vcfR to genlight
gen.spec<-vcfR2genlight(vcf.2R)

## Warning in vcfR2genlight(vcf.2R): Found 99 loci with more than two alleles.
## Objects of class genlight only support loci with two alleles.
## 99 loci will be omitted from the genlight object.

#subset spec out of X
gen.spec<-gen.spec[,gen.spec$position > 35000000 & gen.spec$position < 55000000]
gen.spec

##  /// GENLIGHT OBJECT /////////
## 
##  // 765 genotypes,  1,707 binary SNPs, size: 1.6 Mb
##  173 (0.01 %) missing data
## 
##  // Basic content
##    @gen: list of 765 SNPbin
## 
##  // Optional content
##    @ind.names:  765 individual labels
##    @loc.names:  1707 locus labels
##    @chromosome: factor storing chromosomes of the SNPs
##    @position: integer storing positions of the SNPs
##    @other: a list containing: elements without names

#assign pop to individuals
pop(gen.spec)<-locs$latitude
#calculate pairwise Fst between pops
fst.spec<-stamppFst(gen.spec)
#convert to distance matrix
fst.spec<-as.matrix(as.dist(fst.spec$Fsts))

#####
####
###
##
#test for correlation between each of the 4 discrete genomic regions x 5 environmental variables
#open vectors to store test info and r value
test<-c()
storer<-c()
#plot figure
#pdf(file="~/Downloads/corrs.nolabs.pdf", width=10,height=8)
par(mar=c(2,2,2,2))
par(mfrow=c(4,5))

#start with 2Rb
#d.prec x gen structure in 2Rb
s<-ape::mantel.test(d.prec,fst.2Rb)
r<-vegan::mantel(xdis = d.prec, ydis = fst.2Rb)
plot(d.prec,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[1]<-"prec.2Rb"
storer[1]<-r$statistic

#d.hum x gen structure in 2Rb
s<-ape::mantel.test(d.hum,fst.2Rb)
r<-vegan::mantel(xdis = d.hum, ydis = fst.2Rb)
plot(d.hum,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[2]<-"hum.2Rb"
storer[2]<-r$statistic

#d.temp x gen structure in 2Rb
s<-ape::mantel.test(d.temp,fst.2Rb)
r<-vegan::mantel(xdis = d.temp, ydis = fst.2Rb)
plot(d.temp,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[3]<-"temp.2Rb"
storer[3]<-r$statistic

#d.corr x gen structure in 2Rb
s<-ape::mantel.test(d.corr,fst.2Rb)
r<-vegan::mantel(xdis = d.corr, ydis = fst.2Rb)
plot(d.corr,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[4]<-"corr.2Rb"
storer[4]<-r$statistic

#d.rand x gen structure in 2Rb
s<-ape::mantel.test(d.rand,fst.2Rb)
r<-vegan::mantel(xdis = d.rand, ydis = fst.2Rb)
plot(d.rand,fst.2Rb, pch=20,cex=1.5, col = rgb(red = 1, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in 2Rb inversion", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2Rb)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.2Rb)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.2Rb)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[5]<-"rand.2Rb"
storer[5]<-r$statistic


#####
####
###
##now 2La
#d.prec x gen structure in 2La
s<-ape::mantel.test(d.prec,fst.2La)
r<-vegan::mantel(xdis = d.prec, ydis = fst.2La)
plot(d.prec,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[6]<-"prec.2La"
storer[6]<-r$statistic

#d.hum x gen structure in 2La
s<-ape::mantel.test(d.hum,fst.2La)
r<-vegan::mantel(xdis = d.hum, ydis = fst.2La)
plot(d.hum,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[7]<-"hum.2La"
storer[7]<-r$statistic

#d.temp x gen structure in 2La
s<-ape::mantel.test(d.temp,fst.2La)
r<-vegan::mantel(xdis = d.temp, ydis = fst.2La)
plot(d.temp,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[8]<-"temp.2La"
storer[8]<-r$statistic

#d.corr x gen structure in 2La
s<-ape::mantel.test(d.corr,fst.2La)
r<-vegan::mantel(xdis = d.corr, ydis = fst.2La)
plot(d.corr,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[9]<-"corr.2La"
storer[9]<-r$statistic

#d.rand x gen structure in 2La
s<-ape::mantel.test(d.rand,fst.2La)
r<-vegan::mantel(xdis = d.rand, ydis = fst.2La)
plot(d.rand,fst.2La, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = 1, alpha = 0.5),
     #ylab = "pairwise Fst in 2La inversion", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.2La)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.2La)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.2La)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[10]<-"rand.2La"
storer[10]<-r$statistic


#####
####
###
##now geog
#d.prec x gen structure in geog
s<-ape::mantel.test(d.prec,fst.geog)
r<-vegan::mantel(xdis = d.prec, ydis = fst.geog)
plot(d.prec,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[11]<-"prec.geog"
storer[11]<-r$statistic

#d.hum x gen structure in geog
s<-ape::mantel.test(d.hum,fst.geog)
r<-vegan::mantel(xdis = d.hum, ydis = fst.geog)
plot(d.hum,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
test[12]<-"hum.geog"
storer[12]<-r$statistic

#d.temp x gen structure in geog
s<-ape::mantel.test(d.temp,fst.geog)
r<-vegan::mantel(xdis = d.temp, ydis = fst.geog)
plot(d.temp,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
test[13]<-"temp.geog"
storer[13]<-r$statistic

#d.corr x gen structure in geog
s<-ape::mantel.test(d.corr,fst.geog)
r<-vegan::mantel(xdis = d.corr, ydis = fst.geog)
plot(d.corr,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 3))))
test[14]<-"corr.geog"
storer[14]<-r$statistic

#d.rand x gen structure in geog
s<-ape::mantel.test(d.rand,fst.geog)
r<-vegan::mantel(xdis = d.rand, ydis = fst.geog)
plot(d.rand,fst.geog, pch=20,cex=1.5, col = rgb(red = .0, green = .0, blue = .0, alpha = 0.5),
     #ylab = "pairwise Fst in geog", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.geog)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.geog)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.geog)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
test[15]<-"rand.geog"
storer[15]<-r$statistic

#####
####
###
##
#now species
#d.prec x gen structure in spec
s<-ape::mantel.test(d.prec,fst.spec)
r<-vegan::mantel(xdis = d.prec, ydis = fst.spec)
plot(d.prec,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "annual precip difference")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.prec)), lty = 2)
text(x = max(d.prec)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.prec)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[16]<-"prec.species"
storer[16]<-r$statistic

#d.hum x gen structure in spec
s<-ape::mantel.test(d.hum,fst.spec)
r<-vegan::mantel(xdis = d.hum, ydis = fst.spec)
plot(d.hum,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "humidity difference (10^5 kg of water/kg of air)")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.hum)), lty = 2)
text(x = max(d.hum)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.hum)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[17]<-"hum.species"
storer[17]<-r$statistic

#d.temp x gen structure in spec
s<-ape::mantel.test(d.temp,fst.spec)
r<-vegan::mantel(xdis = d.temp, ydis = fst.spec)
plot(d.temp,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "mean annual temp difference")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.temp)), lty = 2)
text(x = max(d.temp)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.temp)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[18]<-"temp.species"
storer[18]<-r$statistic

#d.corr x gen structure in spec
s<-ape::mantel.test(d.corr,fst.spec)
r<-vegan::mantel(xdis = d.corr, ydis = fst.spec)
plot(d.corr,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "spatially autocorrelated random variable")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.corr)), lty = 2)
text(x = max(d.corr)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.corr)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))
#store values
test[19]<-"corr.species"
storer[19]<-r$statistic

#d.rand x gen structure in spec
s<-ape::mantel.test(d.rand,fst.spec)
r<-vegan::mantel(xdis = d.rand, ydis = fst.spec)
plot(d.rand,fst.spec, pch=20,cex=1.5, col = rgb(red = .6, green = .6, blue = .6, alpha = 0.5),
     #ylab = "pairwise Fst in spec", xlab = "true random variable")
     ann=FALSE)
abline(lm(as.dist(fst.spec)~as.dist(d.rand)), lty = 2)
text(x = max(d.rand)*.86, y = max(fst.spec)*.98, labels = bquote(r == .(round(r$statistic, digits = 2))))
text(x = max(d.rand)*.86, y = max(fst.spec)*.9, labels = bquote(p == .(round(s$p, digits = 2))))

#store values
test[20]<-"rand.species"
storer[20]<-r$statistic

#shut down the figure
#dev.off()

calculate proportion of tested SNPs in each genomic region identified as significant for each GEA test (4 methods x 4 genomic regions x 5 environmental vairables = 80 data points)

#create dataframe to hold comparisons between significant percentage and degree of confounding genetic variation
f<-data.frame(test=rep(test, times=4),
              r=rep(storer, times=4))

#create vector to hold the significant proportion for each of the tests in each of the genomic regions
prop.sig<-c()
test<-c(rep("LFMM2", times=20),rep("BayeScEnv", times=20),rep("r2VIM", times=20),rep("rep. correlations", times=20))
#calc sig prop for each test
sigs.2Rb<-all.sigs[all.sigs$Chromosome == "2R" & all.sigs$Position > 20524058 & all.sigs$Position < 42165532,]
sigs.2La<-all.sigs[all.sigs$Chromosome == "2L" & all.sigs$Position > 19023925 & all.sigs$Position < 26758676,]
sigs.geog<-all.sigs[all.sigs$Chromosome == "3R" & all.sigs$Position < 25000000,]
sigs.species<-all.sigs[all.sigs$Chromosome == "2R" & all.sigs$Position > 35000000 & all.sigs$Position < 55000000,]

#lfmm test 2Rb, 2La, geog, species
prop.sig[1]<-sum(sigs.2Rb$lfmm.prec < .01)/nrow(sigs.2Rb)
prop.sig[2]<-sum(sigs.2Rb$lfmm.hum < .01)/nrow(sigs.2Rb)
prop.sig[3]<-sum(sigs.2Rb$lfmm.temp < .01)/nrow(sigs.2Rb)
prop.sig[4]<-sum(sigs.2Rb$lfmm.corr < .01)/nrow(sigs.2Rb)
prop.sig[5]<-sum(sigs.2Rb$lfmm.rand < .01)/nrow(sigs.2Rb)
prop.sig[6]<-sum(sigs.2La$lfmm.prec < .01)/nrow(sigs.2La)
prop.sig[7]<-sum(sigs.2La$lfmm.hum < .01)/nrow(sigs.2La)
prop.sig[8]<-sum(sigs.2La$lfmm.temp < .01)/nrow(sigs.2La)
prop.sig[9]<-sum(sigs.2La$lfmm.corr < .01)/nrow(sigs.2La)
prop.sig[10]<-sum(sigs.2La$lfmm.rand < .01)/nrow(sigs.2La)
prop.sig[11]<-sum(sigs.geog$lfmm.prec < .01)/nrow(sigs.geog)
prop.sig[12]<-sum(sigs.geog$lfmm.hum < .01)/nrow(sigs.geog)
prop.sig[13]<-sum(sigs.geog$lfmm.temp < .01)/nrow(sigs.geog)
prop.sig[14]<-sum(sigs.geog$lfmm.corr < .01)/nrow(sigs.geog)
prop.sig[15]<-sum(sigs.geog$lfmm.rand < .01)/nrow(sigs.geog)
prop.sig[16]<-sum(sigs.species$lfmm.prec < .01)/nrow(sigs.species)
prop.sig[17]<-sum(sigs.species$lfmm.hum < .01)/nrow(sigs.species)
prop.sig[18]<-sum(sigs.species$lfmm.temp < .01)/nrow(sigs.species)
prop.sig[19]<-sum(sigs.species$lfmm.corr < .01)/nrow(sigs.species)
prop.sig[20]<-sum(sigs.species$lfmm.rand < .01)/nrow(sigs.species)

#bayescenv test 2Rb, 2La, geog, species
prop.sig[21]<-sum(sigs.2Rb$baye.prec < .01)/nrow(sigs.2Rb)
prop.sig[22]<-sum(sigs.2Rb$baye.hum < .01)/nrow(sigs.2Rb)
prop.sig[23]<-sum(sigs.2Rb$baye.temp < .01)/nrow(sigs.2Rb)
prop.sig[24]<-sum(sigs.2Rb$baye.corr < .01)/nrow(sigs.2Rb)
prop.sig[25]<-sum(sigs.2Rb$baye.rand < .01)/nrow(sigs.2Rb)
prop.sig[26]<-sum(sigs.2La$baye.prec < .01)/nrow(sigs.2La)
prop.sig[27]<-sum(sigs.2La$baye.hum < .01)/nrow(sigs.2La)
prop.sig[28]<-sum(sigs.2La$baye.temp < .01)/nrow(sigs.2La)
prop.sig[29]<-sum(sigs.2La$baye.corr < .01)/nrow(sigs.2La)
prop.sig[30]<-sum(sigs.2La$baye.rand < .01)/nrow(sigs.2La)
prop.sig[31]<-sum(sigs.geog$baye.prec < .01)/nrow(sigs.geog)
prop.sig[32]<-sum(sigs.geog$baye.hum < .01)/nrow(sigs.geog)
prop.sig[33]<-sum(sigs.geog$baye.temp < .01)/nrow(sigs.geog)
prop.sig[34]<-sum(sigs.geog$baye.corr < .01)/nrow(sigs.geog)
prop.sig[35]<-sum(sigs.geog$baye.rand < .01)/nrow(sigs.geog)
prop.sig[36]<-sum(sigs.species$baye.prec < .01)/nrow(sigs.species)
prop.sig[37]<-sum(sigs.species$baye.hum < .01)/nrow(sigs.species)
prop.sig[38]<-sum(sigs.species$baye.temp < .01)/nrow(sigs.species)
prop.sig[39]<-sum(sigs.species$baye.corr < .01)/nrow(sigs.species)
prop.sig[40]<-sum(sigs.species$baye.rand < .01)/nrow(sigs.species)

#r2VIM test 2Rb, 2La, geog, species
prop.sig[41]<-sum(sigs.2Rb$vim.prec > 1)/nrow(sigs.2Rb)
prop.sig[42]<-sum(sigs.2Rb$vim.hum > 1)/nrow(sigs.2Rb)
prop.sig[43]<-sum(sigs.2Rb$vim.temp > 1)/nrow(sigs.2Rb)
prop.sig[44]<-sum(sigs.2Rb$vim.corr > 1)/nrow(sigs.2Rb)
prop.sig[45]<-sum(sigs.2Rb$vim.rand > 1)/nrow(sigs.2Rb)
prop.sig[46]<-sum(sigs.2La$vim.prec > 1)/nrow(sigs.2La)
prop.sig[47]<-sum(sigs.2La$vim.hum > 1)/nrow(sigs.2La)
prop.sig[48]<-sum(sigs.2La$vim.temp > 1)/nrow(sigs.2La)
prop.sig[49]<-sum(sigs.2La$vim.corr > 1)/nrow(sigs.2La)
prop.sig[50]<-sum(sigs.2La$vim.rand > 1)/nrow(sigs.2La)
prop.sig[51]<-sum(sigs.geog$vim.prec > 1)/nrow(sigs.geog)
prop.sig[52]<-sum(sigs.geog$vim.hum > 1)/nrow(sigs.geog)
prop.sig[53]<-sum(sigs.geog$vim.temp > 1)/nrow(sigs.geog)
prop.sig[54]<-sum(sigs.geog$vim.corr > 1)/nrow(sigs.geog)
prop.sig[55]<-sum(sigs.geog$vim.rand > 1)/nrow(sigs.geog)
prop.sig[56]<-sum(sigs.species$vim.prec > 1)/nrow(sigs.species)
prop.sig[57]<-sum(sigs.species$vim.hum > 1)/nrow(sigs.species)
prop.sig[58]<-sum(sigs.species$vim.temp > 1)/nrow(sigs.species)
prop.sig[59]<-sum(sigs.species$vim.corr > 1)/nrow(sigs.species)
prop.sig[60]<-sum(sigs.species$vim.rand > 1)/nrow(sigs.species)

#cors test 2Rb, 2La, geog, species
prop.sig[61]<-sum(sigs.2Rb$cors.prec < .0001)/nrow(sigs.2Rb)
prop.sig[62]<-sum(sigs.2Rb$cors.hum < .0001)/nrow(sigs.2Rb)
prop.sig[63]<-sum(sigs.2Rb$cors.temp < .0001)/nrow(sigs.2Rb)
prop.sig[64]<-sum(sigs.2Rb$cors.corr < .0001)/nrow(sigs.2Rb)
prop.sig[65]<-sum(sigs.2Rb$cors.rand < .0001)/nrow(sigs.2Rb)
prop.sig[66]<-sum(sigs.2La$cors.prec < .0001)/nrow(sigs.2La)
prop.sig[67]<-sum(sigs.2La$cors.hum < .0001)/nrow(sigs.2La)
prop.sig[68]<-sum(sigs.2La$cors.temp < .0001)/nrow(sigs.2La)
prop.sig[69]<-sum(sigs.2La$cors.corr < .0001)/nrow(sigs.2La)
prop.sig[70]<-sum(sigs.2La$cors.rand < .0001)/nrow(sigs.2La)
prop.sig[71]<-sum(sigs.geog$cors.prec < .0001)/nrow(sigs.geog)
prop.sig[72]<-sum(sigs.geog$cors.hum < .0001)/nrow(sigs.geog)
prop.sig[73]<-sum(sigs.geog$cors.temp < .0001)/nrow(sigs.geog)
prop.sig[74]<-sum(sigs.geog$cors.corr < .0001)/nrow(sigs.geog)
prop.sig[75]<-sum(sigs.geog$cors.rand < .0001)/nrow(sigs.geog)
prop.sig[76]<-sum(sigs.species$cors.prec < .0001)/nrow(sigs.species)
prop.sig[77]<-sum(sigs.species$cors.hum < .0001)/nrow(sigs.species)
prop.sig[78]<-sum(sigs.species$cors.temp < .0001)/nrow(sigs.species)
prop.sig[79]<-sum(sigs.species$cors.corr < .0001)/nrow(sigs.species)
prop.sig[80]<-sum(sigs.species$cors.rand < .0001)/nrow(sigs.species)

#add to dataframe
f$comps<-test
f$prop.sig<-prop.sig

#plot the resulting relationship
#4C
gen.regions.plot<-ggplot(f, aes(x=abs(r), y=prop.sig, color=comps)) + 
  geom_point(cex=3, alpha=.6)+
  geom_smooth(method=lm, se=FALSE)+
  theme_classic()+
  labs(x="correlation between genetic and environmental divergence \n for a given region of genomic relatedness",
       y="proportion of significant GEA SNPs")
#ggsave("~/Downloads/corr.snps.genomic.regions.pdf", height=3, width=5.5)

#test significance of each correlation
cor.test(f$r[1:20],f$prop.sig[1:20])

## 
##  Pearson's product-moment correlation
## 
## data:  f$r[1:20] and f$prop.sig[1:20]
## t = -1.1491, df = 18, p-value = 0.2655
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6309606  0.2047751
## sample estimates:
##        cor 
## -0.2614363

cor.test(f$r[20:40],f$prop.sig[20:40])

## 
##  Pearson's product-moment correlation
## 
## data:  f$r[20:40] and f$prop.sig[20:40]
## t = -0.45492, df = 19, p-value = 0.6543
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5125222  0.3432671
## sample estimates:
##        cor 
## -0.1038014

cor.test(f$r[40:60],f$prop.sig[40:60])

## 
##  Pearson's product-moment correlation
## 
## data:  f$r[40:60] and f$prop.sig[40:60]
## t = 0.93515, df = 19, p-value = 0.3614
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2440175  0.5881901
## sample estimates:
##       cor 
## 0.2097658

cor.test(f$r[60:80],f$prop.sig[60:80])

## 
##  Pearson's product-moment correlation
## 
## data:  f$r[60:80] and f$prop.sig[60:80]
## t = 2.8018, df = 19, p-value = 0.01138
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1422216 0.7883768
## sample estimates:
##       cor 
## 0.5407113

h <- arrangeGrob(spat.plot, gen.autocorr.plot, gen.regions.plot, t.test.plot, nrow = 2)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

ggsave(h, filename="~/Desktop/anoph.phase2/corr.plots.pdf", width = 10, height = 6, units ="in")

Testing the effect of autocorrelation on GEAs

Devon DeRaad

4/10/2021

analyze effect of spatial autocorrelation on number of GEA outliers identified

analyze effect of genomic autocorrelation of environmental vairables on number of GEA outliers identified

test for significant difference in overall number of GEA outliers identified between real and randomly generated variables

visualize the genomic autocorrelation of each variable in each of the four discrete regions of genomic relatedness

calculate proportion of tested SNPs in each genomic region identified as significant for each GEA test (4 methods x 4 genomic regions x 5 environmental vairables = 80 data points)