library(ggplot2)
#plot admixture results

loc<-read.csv("~/Desktop/aph.data/aph.rad/retained.sampling.csv")
#setwd to admixture directory run on the cluster
setwd("~/Downloads/admixture/")

#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")

#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]

#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}

par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#plot each run
for (i in 6:10){
  barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#define function to get ggplot colors
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}

#reorder samples to reflect clustering at K=6
runs[[5]]<-runs[[5]][c(33:46,1:19,21,23:32,22,20,57:73,81:95,74:80,47:56),]
runs[[6]]<-runs[[6]][c(33:46,1:19,21,23:32,22,20,57:73,81:95,74:80,47:56),]

#plot barplots for the two most relevant runs
par(mfrow=c(2,1))
for (i in 5:6){
  barplot(t(as.matrix(runs[[i]])), col=gg_color_hue(i), ylab="Ancestry", border="black")
}

#FMNH 342048 is the only sample with > .1 mis-assignment at K=5
#runs[[5]]
#Read in vcf and subset to only woodhouse to do zoomed in admixture run
#vcfR <- read.vcfR("~/Desktop/aph.data/unlinked.filtered.recode.vcf")
#colnames(vcfR@gt)
#vcfR@fix[1:5,1:5]
##subset to only woodhouse samples
#vcfR@gt <- vcfR@gt[,c(1,21,58:96)]
#
##remove samples with no variable sites left
#source("~/Desktop/snpfiltR/min_mac.R")
#vcf<-min_mac(vcfR = vcfR, min.mac = 1)
#vcf
##write out for admixture analysis on the cluster
#write.vcf(vcf, file="~/Downloads/woodhouse.only.vcf.gz")
#####
#investigate results from only woodhouse's scrub-jay (sumichrasti aslo removed)
#setwd to admixture directory from run on the cluster
setwd("~/Downloads/woodhouse.admixture/")

#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")

#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in all ten runs and save each dataframe in a list
wood.runs<-list()
#read in log files
for (i in 1:10){
  wood.runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}

#plot
par(mfrow=c(5,1))
#plot each run
for (i in 1:5){
  barplot(t(as.matrix(wood.runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#plot each run
for (i in 6:10){
  barplot(t(as.matrix(wood.runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#match sample order
wood.samps<-read.table("binary_fileset.fam")[,1]
samps[c(33:46,1:19,21,23:32,22,20,57:73,81:95,74:80,47:56)]
##  [1] A_coerulescens_396251 A_coerulescens_396254 A_coerulescens_396256
##  [4] A_coerulescens_396263 A_coerulescens_396264 A_coerulescens_396265
##  [7] A_insularis_334025    A_insularis_334029    A_insularis_334031   
## [10] A_insularis_334032    A_insularis_334033    A_insularis_334034   
## [13] A_insularis_334037    A_insularis_334038    A_californica_333849 
## [16] A_californica_333854  A_californica_333855  A_californica_333857 
## [19] A_californica_333860  A_californica_333862  A_californica_333871 
## [22] A_californica_333873  A_californica_333874  A_californica_333914 
## [25] A_californica_333917  A_californica_333931  A_californica_333932 
## [28] A_californica_333934  A_californica_334002  A_californica_334012 
## [31] A_californica_334015  A_californica_334017  A_californica_334019 
## [34] A_californica_342037  A_californica_342050  A_californica_342066 
## [37] A_californica_342072  A_californica_343428  A_californica_343430 
## [40] A_californica_343432  A_californica_343438  A_californica_343451 
## [43] A_californica_393615  A_californica_393721  A_californica_342048 
## [46] A_woodhouseii_334171  A_woodhouseii_334062  A_woodhouseii_334063 
## [49] A_woodhouseii_334086  A_woodhouseii_334088  A_woodhouseii_334096 
## [52] A_woodhouseii_334132  A_woodhouseii_334133  A_woodhouseii_334134 
## [55] A_woodhouseii_334142  A_woodhouseii_334153  A_woodhouseii_334161 
## [58] A_woodhouseii_334188  A_woodhouseii_334190  A_woodhouseii_334196 
## [61] A_woodhouseii_334210  A_woodhouseii_334211  A_woodhouseii_334217 
## [64] A_woodhouseii_343458  A_woodhouseii_343461  A_woodhouseii_343476 
## [67] A_woodhouseii_343480  A_woodhouseii_343481  A_woodhouseii_343483 
## [70] A_woodhouseii_343497  A_woodhouseii_393605  A_woodhouseii_393606 
## [73] A_woodhouseii_393697  A_woodhouseii_393698  A_woodhouseii_393702 
## [76] A_woodhouseii_393712  A_woodhouseii_393713  A_woodhouseii_395768 
## [79] A_woodhouseii_334230  A_woodhouseii_334241  A_woodhouseii_334242 
## [82] A_woodhouseii_334243  A_woodhouseii_334244  A_woodhouseii_334247 
## [85] A_woodhouseii_334250  A_sumichrasti_343502  A_sumichrasti_343503 
## [88] A_sumichrasti_343510  A_sumichrasti_343511  A_sumichrasti_343512 
## [91] A_sumichrasti_343514  A_sumichrasti_343515  A_sumichrasti_393635 
## [94] A_sumichrasti_393638  A_sumichrasti_393640 
## 95 Levels: A_californica_333849 A_californica_333854 ... A_woodhouseii_395768
wood.samps[c(1:18,26:40,19:25)]
##  [1] A_woodhouseii_334171 A_woodhouseii_334062 A_woodhouseii_334063
##  [4] A_woodhouseii_334086 A_woodhouseii_334088 A_woodhouseii_334096
##  [7] A_woodhouseii_334132 A_woodhouseii_334133 A_woodhouseii_334134
## [10] A_woodhouseii_334142 A_woodhouseii_334153 A_woodhouseii_334161
## [13] A_woodhouseii_334188 A_woodhouseii_334190 A_woodhouseii_334196
## [16] A_woodhouseii_334210 A_woodhouseii_334211 A_woodhouseii_334217
## [19] A_woodhouseii_343458 A_woodhouseii_343461 A_woodhouseii_343476
## [22] A_woodhouseii_343480 A_woodhouseii_343481 A_woodhouseii_343483
## [25] A_woodhouseii_343497 A_woodhouseii_393605 A_woodhouseii_393606
## [28] A_woodhouseii_393697 A_woodhouseii_393698 A_woodhouseii_393702
## [31] A_woodhouseii_393712 A_woodhouseii_393713 A_woodhouseii_395768
## [34] A_woodhouseii_334230 A_woodhouseii_334241 A_woodhouseii_334242
## [37] A_woodhouseii_334243 A_woodhouseii_334244 A_woodhouseii_334247
## [40] A_woodhouseii_334250
## 40 Levels: A_woodhouseii_334062 A_woodhouseii_334063 ... A_woodhouseii_395768
#reorder samples to reflect clustering at K=6
wood.runs[[2]]<-wood.runs[[2]][c(1:18,26:40,19:25),]
wood.runs[[3]]<-wood.runs[[3]][c(1:18,26:40,19:25),]

#save all plots together
#pdf("~/Desktop/aph.data/admixture/all.admix.plots.pdf", width = 8, height=6)
#set number of rows/columns
par(mfrow=c(4,1))
#set margins
par(mar = c(3, 3, 0, 0), oma = c(1, 1, 1, 1))
#set line width
#opar <- par(lwd = 1)
for (i in 5:6){
  barplot(t(as.matrix(runs[[i]])), col=gg_color_hue(i), ylab="Ancestry", border="black")
}
for (i in 2:3){
  barplot(t(as.matrix(wood.runs[[i]])), col=gg_color_hue(i), ylab="Ancestry", border="black")
}

#dev.off()