Zosterops ADMIXTURE runs

library(vcfR)

   *****       ***   vcfR   ***       *****
   This is vcfR 1.14.0 
     browseVignettes('vcfR') # Documentation
     citation('vcfR') # Citation
   *****       *****      *****       *****
library(SNPfiltR)
This is SNPfiltR v.1.0.2

Detailed usage information is available at: devonderaad.github.io/SNPfiltR/ 

If you use SNPfiltR in your published work, please cite the following papers: 

DeRaad, D.A. (2022), SNPfiltR: an R package for interactive and reproducible SNP filtering. Molecular Ecology Resources, 22, 2443-2453. http://doi.org/10.1111/1755-0998.13618 

Knaus, Brian J., and Niklaus J. Grunwald. 2017. VCFR: a package to manipulate and visualize variant call format data in R. Molecular Ecology Resources, 17.1:44-53. http://doi.org/10.1111/1755-0998.12549
library(ggplot2)
library(RColorBrewer)

0.1 Filter input SNP files based on smaple inclusion and MAC to increase signal to noise ratio

#read in filtered vcf
vcfR <- read.vcfR("~/Desktop/cali.zosterops.rad/filtered.unlinked.snps.vcf.gz")
Scanning file to determine attributes.
File attributes:
  meta lines: 14
  header_line: 15
  variant count: 1554
  column count: 133

Meta line 14 read in.
All meta lines processed.
gt matrix initialized.
Character matrix gt created.
  Character matrix gt rows: 1554
  Character matrix gt cols: 133
  skip: 0
  nrows: 1554
  row_num: 0

Processed variant 1000
Processed variant: 1554
All variants processed
vcfR
***** Object of Class vcfR *****
124 samples
239 CHROMs
1,554 variants
Object size: 16.3 Mb
5.522 percent missing data
*****        *****         *****
#bring in sample info
#read in sample info csv
sample.info<-read.csv("~/Desktop/cali.zosterops.rad/zosterops.trimmed.RAD.sampling.csv")
sample.info<-sample.info[sample.info$ID %in% colnames(vcfR@gt),]
table(sample.info$Species)

erythropleurus       everetti      japonicus       nigrorum    palpebrosus 
             6              3             83              4              4 
       simplex 
            24 
#see sample order
colnames(vcfR@gt)
  [1] "FORMAT"        "ZJlo012"       "ZJlo015"       "ZJlo016"      
  [5] "ZJlo017"       "ZJlo021"       "ZJlo022"       "ZJlo024"      
  [9] "ZJlo031"       "ZJlo045"       "ZJlo046"       "ZJlo050"      
 [13] "ZJlo052"       "ZJlo055"       "ZJja001"       "ZJja002"      
 [17] "ZJja003"       "ZJja004"       "ZJja005"       "ZJja009"      
 [21] "ZJja010"       "ZJja012"       "ZJja013"       "ZJja014"      
 [25] "ZJja016"       "ZJja030"       "ZJal003"       "Zjal004"      
 [29] "ZJal005"       "ZJal010"       "ZJal011"       "ZJal012"      
 [33] "ZJal020"       "ZJst007"       "ZJst009"       "ZJst010"      
 [37] "ZJst012"       "ZJst014"       "ZJst015"       "ZJin001"      
 [41] "ZJin003"       "ZJsi002"       "ZJsi017"       "ZJsi032"      
 [45] "ZJsi023"       "ZJsi024"       "ZJsi027"       "ZJsi028"      
 [49] "ZJsi029"       "ZJsi031"       "ZPxx001"       "ZPxx002"      
 [53] "ZMxx002"       "ZMxx003"       "ZMxx004"       "ZMxx005"      
 [57] "ZMxx006"       "ZMOxx001"      "ZMOxx002"      "ZMOxx003"     
 [61] "ZMOxx005"      "ZMOha001"      "ZMOwh002"      "ZMOwh003"     
 [65] "ZMOwh004"      "ZMOwh005"      "ZMOwh006"      "ZMOvu002"     
 [69] "ZMOvu003"      "ZMOvu004"      "ZMOvu005"      "ZMOmo001"     
 [73] "ZMOpa001"      "ZMOpa002"      "ZNni001"       "ZERxx002"     
 [77] "ZERxx004"      "ZERxx005"      "Zsim23588"     "Zsim28142"    
 [81] "Zsim30897"     "Zpal23498"     "Zpal23522"     "Zmon20893"    
 [85] "Zmon20892"     "Zmon20902"     "Zmon20909"     "Zsim31166"    
 [89] "Zsim31159"     "Zmey17876"     "Zmey17877"     "Zmey17852"    
 [93] "Zmey17922"     "Zmey17920"     "Zmey17925"     "Zmey17923"    
 [97] "Zery28090"     "Zery28091"     "Zery28088"     "Zni10863"     
[101] "Zni14341"      "Zni19650"      "Zni17984"      "ZjaDOT-10981" 
[105] "ZjaDOT-5235"   "Z_LA_122866_2" "Z_LA_122577_2" "Z_LA_122188_2"
[109] "Zsim13809"     "Zsim13773"     "Zsim6797"      "Zsim10336"    
[113] "Zsim11362"     "Zsim11102"     "Zsim11220"     "Zmon20899"    
[117] "Zmon28375"     "Zev13949"      "Zev31650"      "Zev28451"     
[121] "Z_HI_BRY431"   "Z_HI_NAN290"   "Z_HI_WAI087"   "Z_HI_WAI078"  
[125] "Z_HI_SOL783"  
#remove singletons
vcf.2<-min_mac(vcfR, min.mac = 2)
37.84% of SNPs fell below a minor allele count of 2 and were removed from the VCF

#vcfR::write.vcf(vcf.2, file="~/Desktop/cali.zosterops.rad/nosingletons.vcf.gz")
vcf.2
***** Object of Class vcfR *****
124 samples
181 CHROMs
966 variants
Object size: 10.2 Mb
5.661 percent missing data
*****        *****         *****

0.2 Code to convert the vcf into appropriate file structure and run ADMIXTURE on the cluster

#!/bin/sh
#
#SBATCH --job-name=admixture.zost              # Job Name
#SBATCH --nodes=1             # 40 nodes
#SBATCH --ntasks-per-node=5               # 40 CPU allocation per Task
#SBATCH --partition=sixhour            # Name of the Slurm partition used
#SBATCH --chdir=/home/d669d153/work/zosterops.rad/admixture/all.samps    # Set working d$
#SBATCH --mem-per-cpu=1gb            # memory requested
#SBATCH --time=200
#use plink to convert vcf directly to bed format:
/home/d669d153/work/plink --vcf /home/d669d153/work/zosterops.rad/admixture/all.samps/filtered.unlinked.snps.vcf  --double-id --allow-extra-chr --make-bed --out binary_fileset
#fix chromosome names
cut -f2- binary_fileset.bim  > temp
awk 'BEGIN{FS=OFS="\t"}{print value 1 OFS $0}' temp > binary_fileset.bim
rm temp
#run admixture for a K of 1-10, using cross-validation, with 10 threads
for K in 1 2 3 4 5 6 7 8 9 10; 
do /home/d669d153/work/admixture_linux-1.3.0/admixture --cv -j5 -m EM binary_fileset.bed $K | tee log${K}.out;
done
#Which K iteration is optimal according to ADMIXTURE ?
grep -h CV log*.out > log.errors.txt

0.3 Repeat in a separate directory with singletons removed

#!/bin/sh
#
#SBATCH --job-name=admixture.zost              # Job Name
#SBATCH --nodes=1             # 40 nodes
#SBATCH --ntasks-per-node=5               # 40 CPU allocation per Task
#SBATCH --partition=sixhour            # Name of the Slurm partition used
#SBATCH --chdir=/home/d669d153/work/zosterops.rad/admixture/mac2    # Set working d$
#SBATCH --mem-per-cpu=1gb            # memory requested
#SBATCH --time=200
#use plink to convert vcf directly to bed format:
/home/d669d153/work/plink --vcf /home/d669d153/work/zosterops.rad/admixture/mac2/nosingletons.vcf  --double-id --allow-extra-chr --make-bed --out binary_fileset
#fix chromosome names
cut -f2- binary_fileset.bim  > temp
awk 'BEGIN{FS=OFS="\t"}{print value 1 OFS $0}' temp > binary_fileset.bim
rm temp
#run admixture for a K of 1-10, using cross-validation, with 10 threads
for K in 1 2 3 4 5 6 7 8 9 10; 
do /home/d669d153/work/admixture_linux-1.3.0/admixture --cv -j5 -m EM binary_fileset.bed $K | tee log${K}.out;
done
#Which K iteration is optimal according to ADMIXTURE ?
grep -h CV log*.out > log.errors.txt

0.4 check out the run with all samples included and no mac filter

#setwd to admixture directory run on the cluster
setwd("~/Desktop/cali.zosterops.rad/admixture/all.samps/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]
#reorder sampling df to match order of the plot
sample.info<-sample.info[match(samps, sample.info$ID),]
sample.info$ID == samps
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#show sample order
samps
  [1] "ZJlo012"       "ZJlo015"       "ZJlo016"       "ZJlo017"      
  [5] "ZJlo021"       "ZJlo022"       "ZJlo024"       "ZJlo031"      
  [9] "ZJlo045"       "ZJlo046"       "ZJlo050"       "ZJlo052"      
 [13] "ZJlo055"       "ZJja001"       "ZJja002"       "ZJja003"      
 [17] "ZJja004"       "ZJja005"       "ZJja009"       "ZJja010"      
 [21] "ZJja012"       "ZJja013"       "ZJja014"       "ZJja016"      
 [25] "ZJja030"       "ZJal003"       "Zjal004"       "ZJal005"      
 [29] "ZJal010"       "ZJal011"       "ZJal012"       "ZJal020"      
 [33] "ZJst007"       "ZJst009"       "ZJst010"       "ZJst012"      
 [37] "ZJst014"       "ZJst015"       "ZJin001"       "ZJin003"      
 [41] "ZJsi002"       "ZJsi017"       "ZJsi032"       "ZJsi023"      
 [45] "ZJsi024"       "ZJsi027"       "ZJsi028"       "ZJsi029"      
 [49] "ZJsi031"       "ZPxx001"       "ZPxx002"       "ZMxx002"      
 [53] "ZMxx003"       "ZMxx004"       "ZMxx005"       "ZMxx006"      
 [57] "ZMOxx001"      "ZMOxx002"      "ZMOxx003"      "ZMOxx005"     
 [61] "ZMOha001"      "ZMOwh002"      "ZMOwh003"      "ZMOwh004"     
 [65] "ZMOwh005"      "ZMOwh006"      "ZMOvu002"      "ZMOvu003"     
 [69] "ZMOvu004"      "ZMOvu005"      "ZMOmo001"      "ZMOpa001"     
 [73] "ZMOpa002"      "ZNni001"       "ZERxx002"      "ZERxx004"     
 [77] "ZERxx005"      "Zsim23588"     "Zsim28142"     "Zsim30897"    
 [81] "Zpal23498"     "Zpal23522"     "Zmon20893"     "Zmon20892"    
 [85] "Zmon20902"     "Zmon20909"     "Zsim31166"     "Zsim31159"    
 [89] "Zmey17876"     "Zmey17877"     "Zmey17852"     "Zmey17922"    
 [93] "Zmey17920"     "Zmey17925"     "Zmey17923"     "Zery28090"    
 [97] "Zery28091"     "Zery28088"     "Zni10863"      "Zni14341"     
[101] "Zni19650"      "Zni17984"      "ZjaDOT-10981"  "ZjaDOT-5235"  
[105] "Z_LA_122866_2" "Z_LA_122577_2" "Z_LA_122188_2" "Zsim13809"    
[109] "Zsim13773"     "Zsim6797"      "Zsim10336"     "Zsim11362"    
[113] "Zsim11102"     "Zsim11220"     "Zmon20899"     "Zmon28375"    
[117] "Zev13949"      "Zev31650"      "Zev28451"      "Z_HI_BRY431"  
[121] "Z_HI_NAN290"   "Z_HI_WAI087"   "Z_HI_WAI078"   "Z_HI_SOL783"  
#rename rows in sampling column
rownames(sample.info)<-1:nrow(sample.info)
#index out each species to reorder
orderbyspecies<-c(as.numeric(rownames(sample.info[sample.info$Species == "everetti",])),
  as.numeric(rownames(sample.info[sample.info$Species == "nigrorum",])),
  as.numeric(rownames(sample.info[sample.info$Species == "palpebrosus",])),
  as.numeric(rownames(sample.info[sample.info$Species == "erythropleurus",])),
  as.numeric(rownames(sample.info[sample.info$Species == "simplex",])),
  as.numeric(rownames(sample.info[sample.info$Species == "japonicus",]))
  )

#reorder sampling df
samps.info<-sample.info[orderbyspecies,]

#make df of q values by sample
df<-cbind(samps.info[,c(1,6,8)],runs[[6]][orderbyspecies,])
rownames(df)<-c(1:nrow(df))

#reorder runs
for(i in 1:10){runs[[i]]<-t(as.matrix(runs[[i]]))[,orderbyspecies]}

#plot reordered barplot of optimal K value
barplot(runs[[6]], col=c("#882255","#332288","#DDCC77","#88CCEE","#CC6677","grey"), ylab="Ancestry", border="black")

#reorder one final time to clean it up slightly more
barplot(runs[[6]][,c(1:69,120:124,70:86,91:95,110:116,87:90,96:109,117:119)], col=c("#882255","#332288","#DDCC77","#88CCEE","#CC6677","grey"), ylab="Ancestry", border="black")

#save barplot
#pdf("~/Desktop/cali.zosterops.rad/admixture.all.pdf", width = 9, height=3.5)
#barplot(runs[[6]][,c(1:69,120:124,70:86,91:95,110:116,87:90,96:109,117:119)], col=c("#882255","#332288","#DDCC77","#88CCEE","#CC6677","grey"), ylab="Ancestry", border="black", )
#dev.off()

0.5 With MAC filter

#setwd to admixture directory run on the cluster
setwd("~/Desktop/cali.zosterops.rad/admixture/mac2/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]

#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#reorder runs
for(i in 1:10){runs[[i]]<-t(as.matrix(runs[[i]]))[,orderbyspecies]}

#reorder one final time to clean it up slightly more
barplot(runs[[6]][,c(1:69,120:124,70:86,91:95,110:116,87:90,96:109,117:119)], col=c("#882255","#332288","#DDCC77","#88CCEE","#CC6677","grey"), ylab="Ancestry", border="black")

### MAC filter appears to make no difference, so we will just run with the full unlinked SNP dataset

0.6 Run hierarchical subsets

Because everetti and nigrorum are not split when they are run as part of the entire dataset, we will isolate them out of the vcf and re-run them separately to show that they are cleanly two separate things that are just not being split due to hierarchical structure

#see the samples in the vcf
colnames(vcfR@gt)
  [1] "FORMAT"        "ZJlo012"       "ZJlo015"       "ZJlo016"      
  [5] "ZJlo017"       "ZJlo021"       "ZJlo022"       "ZJlo024"      
  [9] "ZJlo031"       "ZJlo045"       "ZJlo046"       "ZJlo050"      
 [13] "ZJlo052"       "ZJlo055"       "ZJja001"       "ZJja002"      
 [17] "ZJja003"       "ZJja004"       "ZJja005"       "ZJja009"      
 [21] "ZJja010"       "ZJja012"       "ZJja013"       "ZJja014"      
 [25] "ZJja016"       "ZJja030"       "ZJal003"       "Zjal004"      
 [29] "ZJal005"       "ZJal010"       "ZJal011"       "ZJal012"      
 [33] "ZJal020"       "ZJst007"       "ZJst009"       "ZJst010"      
 [37] "ZJst012"       "ZJst014"       "ZJst015"       "ZJin001"      
 [41] "ZJin003"       "ZJsi002"       "ZJsi017"       "ZJsi032"      
 [45] "ZJsi023"       "ZJsi024"       "ZJsi027"       "ZJsi028"      
 [49] "ZJsi029"       "ZJsi031"       "ZPxx001"       "ZPxx002"      
 [53] "ZMxx002"       "ZMxx003"       "ZMxx004"       "ZMxx005"      
 [57] "ZMxx006"       "ZMOxx001"      "ZMOxx002"      "ZMOxx003"     
 [61] "ZMOxx005"      "ZMOha001"      "ZMOwh002"      "ZMOwh003"     
 [65] "ZMOwh004"      "ZMOwh005"      "ZMOwh006"      "ZMOvu002"     
 [69] "ZMOvu003"      "ZMOvu004"      "ZMOvu005"      "ZMOmo001"     
 [73] "ZMOpa001"      "ZMOpa002"      "ZNni001"       "ZERxx002"     
 [77] "ZERxx004"      "ZERxx005"      "Zsim23588"     "Zsim28142"    
 [81] "Zsim30897"     "Zpal23498"     "Zpal23522"     "Zmon20893"    
 [85] "Zmon20892"     "Zmon20902"     "Zmon20909"     "Zsim31166"    
 [89] "Zsim31159"     "Zmey17876"     "Zmey17877"     "Zmey17852"    
 [93] "Zmey17922"     "Zmey17920"     "Zmey17925"     "Zmey17923"    
 [97] "Zery28090"     "Zery28091"     "Zery28088"     "Zni10863"     
[101] "Zni14341"      "Zni19650"      "Zni17984"      "ZjaDOT-10981" 
[105] "ZjaDOT-5235"   "Z_LA_122866_2" "Z_LA_122577_2" "Z_LA_122188_2"
[109] "Zsim13809"     "Zsim13773"     "Zsim6797"      "Zsim10336"    
[113] "Zsim11362"     "Zsim11102"     "Zsim11220"     "Zmon20899"    
[117] "Zmon28375"     "Zev13949"      "Zev31650"      "Zev28451"     
[121] "Z_HI_BRY431"   "Z_HI_NAN290"   "Z_HI_WAI087"   "Z_HI_WAI078"  
[125] "Z_HI_SOL783"  
#isolate only everetti and nigrorum from the vcf
vcf.sub<-vcfR[,c(1,100,102,103,75,118:120)]
#check that worked properly
colnames(vcf.sub@gt)
[1] "FORMAT"   "Zni10863" "Zni19650" "Zni17984" "ZNni001"  "Zev13949" "Zev31650"
[8] "Zev28451"
#remove invariant sites
vcf.sub<-min_mac(vcf.sub, min.mac = 1)
74.13% of SNPs fell below a minor allele count of 1 and were removed from the VCF

#check how many SNPs remain
vcf.sub
***** Object of Class vcfR *****
7 samples
122 CHROMs
402 variants
Object size: 0.4 Mb
4.016 percent missing data
*****        *****         *****
#vcfR::write.vcf(vcf.sub, file="~/Desktop/cali.zosterops.rad/ev.ni.vcf.gz")

0.7 Run this subset in a unique directory on the cluster

#!/bin/sh
#
#SBATCH --job-name=admixture.zost              # Job Name
#SBATCH --nodes=1             # 40 nodes
#SBATCH --ntasks-per-node=5               # 40 CPU allocation per Task
#SBATCH --partition=sixhour            # Name of the Slurm partition used
#SBATCH --chdir=/home/d669d153/work/zosterops.rad/admixture/ev.ni    # Set working d$
#SBATCH --mem-per-cpu=1gb            # memory requested
#SBATCH --time=200
#use plink to convert vcf directly to bed format:
/home/d669d153/work/plink --vcf /home/d669d153/work/zosterops.rad/admixture/mac2/ev.ni.vcf  --double-id --allow-extra-chr --make-bed --out binary_fileset
#fix chromosome names
cut -f2- binary_fileset.bim  > temp
awk 'BEGIN{FS=OFS="\t"}{print value 1 OFS $0}' temp > binary_fileset.bim
rm temp
#run admixture for a K of 1-10, using cross-validation, with 10 threads
for K in 1 2 3 4 5 6 7 8 9 10; 
do /home/d669d153/work/admixture_linux-1.3.0/admixture --cv -j5 -m EM binary_fileset.bed $K | tee log${K}.out;
done
#Which K iteration is optimal according to ADMIXTURE ?
grep -h CV log*.out > log.errors.txt

0.8 Check out the subset results

#setwd to admixture directory run on the cluster
setwd("~/Desktop/cali.zosterops.rad/admixture/ev.ni/")
#read in log error values to determine optimal K
log<-read.table("log.errors.txt")[,c(3:4)]
#use double backslash to interpret the opening parentheses literally in the regular expression
log$V3<-gsub("\\(K=", "", log$V3)
log$V3<-gsub("):", "", log$V3)
#interpret K values as numerical
log$V3<-as.numeric(log$V3)
#rename columns
colnames(log)<-c("Kvalue","cross.validation.error")
#make plot showing the cross validation error across K values 1:10
ggplot(data=log, aes(x=Kvalue, y=cross.validation.error, group=1)) +
  geom_line(linetype = "dashed")+
  geom_point()+
  ylab("cross-validation error")+
  xlab("K")+
  scale_x_continuous(breaks = c(1:10))+
  theme_classic()
Warning: Removed 4 rows containing missing values (`geom_line()`).
Warning: Removed 5 rows containing missing values (`geom_point()`).

#read in input file in order to get list of input samples in order
samps<-read.table("binary_fileset.fam")[,1]

#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}
par(mfrow=c(1,1))
#plot each run
for (i in 1:5){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

for (i in 6:10){
barplot(t(as.matrix(runs[[i]])), col=rainbow(i), ylab="Ancestry", border="black")
}

#plot with appropriate color code
barplot(t(as.matrix(runs[[2]])), col=c("#117733","#DDCC77"), ylab="Ancestry", border="black")

#save barplot
#pdf("~/Desktop/cali.zosterops.rad/ev.ni.admix.pdf", width = 5.5, height=3.5)
#barplot(t(as.matrix(runs[[2]])), col=c("#117733","#DDCC77"), ylab="Ancestry", border="black")
#dev.off()