library(ggplot2)
#plot admixture results
loc<-read.csv("~/Desktop/aph.data/aph.rad/retained.sampling.csv")
#setwd to admixture directory run on the cluster
setwd("~/Downloads/admixture/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#plot each run
for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#define function to get ggplot colors
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
#reorder samples to reflect clustering at K=6
runs[[5]]<-runs[[5]][c(33:46,1:19,21,23:32,22,20,57:73,81:95,74:80,47:56),]
runs[[6]]<-runs[[6]][c(33:46,1:19,21,23:32,22,20,57:73,81:95,74:80,47:56),]
#plot barplots for the two most relevant runs
par(mfrow=c(2,1))
for (i in 5:6){
barplot(t(as.matrix(runs[[i]])), col=gg_color_hue(i), ylab="Ancestry", border="black")
}

#FMNH 342048 is the only sample with > .1 mis-assignment at K=5
#runs[[5]]
#Read in vcf and subset to only woodhouse to do zoomed in admixture run
#vcfR <- read.vcfR("~/Desktop/aph.data/unlinked.filtered.recode.vcf")
#colnames(vcfR@gt)
#vcfR@fix[1:5,1:5]
##subset to only woodhouse samples
#vcfR@gt <- vcfR@gt[,c(1,21,58:96)]
#
##remove samples with no variable sites left
#source("~/Desktop/snpfiltR/min_mac.R")
#vcf<-min_mac(vcfR = vcfR, min.mac = 1)
#vcf
##write out for admixture analysis on the cluster
#write.vcf(vcf, file="~/Downloads/woodhouse.only.vcf.gz")
#####
#investigate results from only woodhouse's scrub-jay (sumichrasti aslo removed)
#setwd to admixture directory from run on the cluster
setwd("~/Downloads/woodhouse.admixture/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
geom_line(linetype = "dashed")+
geom_point()+
ylab("cross-validation error")+
xlab("K")+
scale_x_continuous(breaks = c(1:10))+
theme_classic()

#read in all ten runs and save each dataframe in a list
wood.runs<-list()
#read in log files
for (i in 1:10){
wood.runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
#plot
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(wood.runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#plot each run
for (i in 6:10){
barplot(t(as.matrix(wood.runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#match sample order
wood.samps<-read.table("binary_fileset.fam")[,1]
samps[c(33:46,1:19,21,23:32,22,20,57:73,81:95,74:80,47:56)]
## [1] A_coerulescens_396251 A_coerulescens_396254 A_coerulescens_396256
## [4] A_coerulescens_396263 A_coerulescens_396264 A_coerulescens_396265
## [7] A_insularis_334025 A_insularis_334029 A_insularis_334031
## [10] A_insularis_334032 A_insularis_334033 A_insularis_334034
## [13] A_insularis_334037 A_insularis_334038 A_californica_333849
## [16] A_californica_333854 A_californica_333855 A_californica_333857
## [19] A_californica_333860 A_californica_333862 A_californica_333871
## [22] A_californica_333873 A_californica_333874 A_californica_333914
## [25] A_californica_333917 A_californica_333931 A_californica_333932
## [28] A_californica_333934 A_californica_334002 A_californica_334012
## [31] A_californica_334015 A_californica_334017 A_californica_334019
## [34] A_californica_342037 A_californica_342050 A_californica_342066
## [37] A_californica_342072 A_californica_343428 A_californica_343430
## [40] A_californica_343432 A_californica_343438 A_californica_343451
## [43] A_californica_393615 A_californica_393721 A_californica_342048
## [46] A_woodhouseii_334171 A_woodhouseii_334062 A_woodhouseii_334063
## [49] A_woodhouseii_334086 A_woodhouseii_334088 A_woodhouseii_334096
## [52] A_woodhouseii_334132 A_woodhouseii_334133 A_woodhouseii_334134
## [55] A_woodhouseii_334142 A_woodhouseii_334153 A_woodhouseii_334161
## [58] A_woodhouseii_334188 A_woodhouseii_334190 A_woodhouseii_334196
## [61] A_woodhouseii_334210 A_woodhouseii_334211 A_woodhouseii_334217
## [64] A_woodhouseii_343458 A_woodhouseii_343461 A_woodhouseii_343476
## [67] A_woodhouseii_343480 A_woodhouseii_343481 A_woodhouseii_343483
## [70] A_woodhouseii_343497 A_woodhouseii_393605 A_woodhouseii_393606
## [73] A_woodhouseii_393697 A_woodhouseii_393698 A_woodhouseii_393702
## [76] A_woodhouseii_393712 A_woodhouseii_393713 A_woodhouseii_395768
## [79] A_woodhouseii_334230 A_woodhouseii_334241 A_woodhouseii_334242
## [82] A_woodhouseii_334243 A_woodhouseii_334244 A_woodhouseii_334247
## [85] A_woodhouseii_334250 A_sumichrasti_343502 A_sumichrasti_343503
## [88] A_sumichrasti_343510 A_sumichrasti_343511 A_sumichrasti_343512
## [91] A_sumichrasti_343514 A_sumichrasti_343515 A_sumichrasti_393635
## [94] A_sumichrasti_393638 A_sumichrasti_393640
## 95 Levels: A_californica_333849 A_californica_333854 ... A_woodhouseii_395768
wood.samps[c(1:18,26:40,19:25)]
## [1] A_woodhouseii_334171 A_woodhouseii_334062 A_woodhouseii_334063
## [4] A_woodhouseii_334086 A_woodhouseii_334088 A_woodhouseii_334096
## [7] A_woodhouseii_334132 A_woodhouseii_334133 A_woodhouseii_334134
## [10] A_woodhouseii_334142 A_woodhouseii_334153 A_woodhouseii_334161
## [13] A_woodhouseii_334188 A_woodhouseii_334190 A_woodhouseii_334196
## [16] A_woodhouseii_334210 A_woodhouseii_334211 A_woodhouseii_334217
## [19] A_woodhouseii_343458 A_woodhouseii_343461 A_woodhouseii_343476
## [22] A_woodhouseii_343480 A_woodhouseii_343481 A_woodhouseii_343483
## [25] A_woodhouseii_343497 A_woodhouseii_393605 A_woodhouseii_393606
## [28] A_woodhouseii_393697 A_woodhouseii_393698 A_woodhouseii_393702
## [31] A_woodhouseii_393712 A_woodhouseii_393713 A_woodhouseii_395768
## [34] A_woodhouseii_334230 A_woodhouseii_334241 A_woodhouseii_334242
## [37] A_woodhouseii_334243 A_woodhouseii_334244 A_woodhouseii_334247
## [40] A_woodhouseii_334250
## 40 Levels: A_woodhouseii_334062 A_woodhouseii_334063 ... A_woodhouseii_395768
#reorder samples to reflect clustering at K=6
wood.runs[[2]]<-wood.runs[[2]][c(1:18,26:40,19:25),]
wood.runs[[3]]<-wood.runs[[3]][c(1:18,26:40,19:25),]
#save all plots together
#pdf("~/Desktop/aph.data/admixture/all.admix.plots.pdf", width = 8, height=6)
#set number of rows/columns
par(mfrow=c(4,1))
#set margins
par(mar = c(3, 3, 0, 0), oma = c(1, 1, 1, 1))
#set line width
#opar <- par(lwd = 1)
for (i in 5:6){
barplot(t(as.matrix(runs[[i]])), col=gg_color_hue(i), ylab="Ancestry", border="black")
}
for (i in 2:3){
barplot(t(as.matrix(wood.runs[[i]])), col=gg_color_hue(i), ylab="Ancestry", border="black")
}

#dev.off()