grosbeak triangle plot and associations

0.1 bring in data

Code
library(triangulaR)
library(vcfR)
library(ggdist)
library(ggExtra)
#read in data
v<-read.vcfR("~/Desktop/grosbeak.rad/grosbeak.filtered.snps.vcf.gz")

0.2 make triangle plot

Code
v
***** Object of Class vcfR *****
138 samples
1027 CHROMs
51,595 variants
Object size: 532.6 Mb
2.987 percent missing data
*****        *****         *****
Code
colnames(v@gt)
  [1] "FORMAT"                 "P_hybrid_44696"         "P_hybrid_44703"        
  [4] "P_hybrid_44707"         "P_hybrid_44708"         "P_hybrid_44709"        
  [7] "P_hybrid_44712"         "P_hybrid_44762"         "P_hybrid_44771"        
 [10] "P_hybrid_44781"         "P_hybrid_45171"         "P_hybrid_45173"        
 [13] "P_hybrid_45174"         "P_ludovicianus_11998"   "P_ludovicianus_21721"  
 [16] "P_ludovicianus_25286"   "P_ludovicianus_26595"   "P_ludovicianus_33988"  
 [19] "P_ludovicianus_34776"   "P_ludovicianus_34779"   "P_ludovicianus_34782"  
 [22] "P_ludovicianus_34830"   "P_ludovicianus_44704"   "P_ludovicianus_44705"  
 [25] "P_ludovicianus_44706"   "P_ludovicianus_44710"   "P_ludovicianus_44711"  
 [28] "P_ludovicianus_44713"   "P_ludovicianus_44714"   "P_ludovicianus_44715"  
 [31] "P_ludovicianus_44716"   "P_ludovicianus_44719"   "P_ludovicianus_44720"  
 [34] "P_ludovicianus_44722"   "P_ludovicianus_44723"   "P_ludovicianus_44726"  
 [37] "P_ludovicianus_44727"   "P_ludovicianus_44729"   "P_ludovicianus_44730"  
 [40] "P_ludovicianus_44731"   "P_ludovicianus_44732"   "P_ludovicianus_44733"  
 [43] "P_ludovicianus_44734"   "P_ludovicianus_44737"   "P_ludovicianus_44738"  
 [46] "P_ludovicianus_44739"   "P_ludovicianus_44740"   "P_ludovicianus_44741"  
 [49] "P_ludovicianus_44742"   "P_ludovicianus_44743"   "P_ludovicianus_44744"  
 [52] "P_ludovicianus_44745"   "P_ludovicianus_44746"   "P_ludovicianus_44747"  
 [55] "P_ludovicianus_44748"   "P_ludovicianus_44749"   "P_ludovicianus_44753"  
 [58] "P_ludovicianus_44754"   "P_ludovicianus_44761"   "P_ludovicianus_44775"  
 [61] "P_melanocephalus_34890" "P_melanocephalus_43110" "P_melanocephalus_43276"
 [64] "P_melanocephalus_44346" "P_melanocephalus_44347" "P_melanocephalus_44471"
 [67] "P_melanocephalus_44651" "P_melanocephalus_44660" "P_melanocephalus_44666"
 [70] "P_melanocephalus_44676" "P_melanocephalus_44677" "P_melanocephalus_44678"
 [73] "P_melanocephalus_44679" "P_melanocephalus_44680" "P_melanocephalus_44681"
 [76] "P_melanocephalus_44683" "P_melanocephalus_44684" "P_melanocephalus_44685"
 [79] "P_melanocephalus_44686" "P_melanocephalus_44687" "P_melanocephalus_44688"
 [82] "P_melanocephalus_44689" "P_melanocephalus_44692" "P_melanocephalus_44693"
 [85] "P_melanocephalus_44694" "P_melanocephalus_44695" "P_melanocephalus_44697"
 [88] "P_melanocephalus_44699" "P_melanocephalus_44752" "P_melanocephalus_44760"
 [91] "P_melanocephalus_44763" "P_melanocephalus_44764" "P_melanocephalus_44765"
 [94] "P_melanocephalus_44766" "P_melanocephalus_44769" "P_melanocephalus_44770"
 [97] "P_melanocephalus_44772" "P_melanocephalus_44773" "P_melanocephalus_44774"
[100] "P_melanocephalus_44776" "P_melanocephalus_44777" "P_melanocephalus_44778"
[103] "P_melanocephalus_44779" "P_melanocephalus_44780" "P_melanocephalus_44782"
[106] "P_melanocephalus_44787" "P_melanocephalus_44788" "P_melanocephalus_44789"
[109] "P_melanocephalus_44790" "P_melanocephalus_44791" "P_melanocephalus_44792"
[112] "P_melanocephalus_44793" "P_melanocephalus_44794" "P_melanocephalus_44795"
[115] "P_melanocephalus_44798" "P_melanocephalus_44799" "P_melanocephalus_44801"
[118] "P_melanocephalus_44802" "P_melanocephalus_44803" "P_melanocephalus_44804"
[121] "P_melanocephalus_44805" "P_melanocephalus_44806" "P_melanocephalus_44808"
[124] "P_melanocephalus_44809" "P_melanocephalus_44810" "P_melanocephalus_44837"
[127] "P_melanocephalus_44840" "P_melanocephalus_44843" "P_melanocephalus_44845"
[130] "P_melanocephalus_44846" "P_melanocephalus_44847" "P_melanocephalus_44848"
[133] "P_melanocephalus_44853" "P_melanocephalus_44854" "P_melanocephalus_45175"
[136] "P_melanocephalus_45200" "P_melanocephalus_45232" "P_melanocephalus_45709"
[139] "P_melanocephalus_45926"
Code
#read in sampling dataframe
samps<-read.csv("~/Desktop/grosbeak.data.csv")
samps<-samps[samps$passed.genomic.filtering == "TRUE",]
#make popmap
pm<-data.frame(id=samps$sample_id,pop=c(rep("P1", times=9),rep("P0",times=8),rep("hyb",times=121)))

# Create a new vcfR object composed only of sites above the given allele frequency difference threshold
fixed.vcf <- alleleFreqDiff(vcfR = v, pm = pm, p1 = "P1", p2 = "P0", difference = 1)
[1] "266 sites passed allele frequency difference threshold"
Code
# Calculate hybrid index and heterozygosity for each sample. Values are returned in a data.frame
hi.het.test <- hybridIndex(vcfR = fixed.vcf, pm = pm, p1 = "P1", p2 = "P0")
[1] "calculating hybrid indices and heterozygosities based on 266 sites"
Code
# View triangle plot
triangulaR::triangle.plot(hi.het.test)
Warning in geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.

Code
#plot color-coded by mtDNA haplotype
samps$sample_id == colnames(v@gt)[-1] #check if sample info table order matches the vcf
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [85] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
 [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE
Code
samps<-samps[match(colnames(v@gt)[-1],samps$sample_id),] #use 'match' to match orders
samps$sample_id == colnames(v@gt)[-1] #check if this worked
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE
Code
colvec<-gsub("1","#fff319",gsub("0","#ef3b2c",samps$mtDNA))
colvec[is.na(colvec)]<-"grey"

hi.het.test$colvec<-colvec
hi.het.test$mtDNA<-samps$mtDNA

ggplot(hi.het.test, aes(x=hybrid.index, y=heterozygosity, color=as.factor(mtDNA))) +
      geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black") +
      geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black") +
      geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black") +
      stat_function(fun = function(hi) 2*hi*(1-hi), xlim = c(0,1), color = "black", linetype = "dashed") +
      geom_point(cex = 3, alpha = 1)+
      guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
      xlab(paste("Hybrid Index"))+
      ylab(paste("Interclass Heterozygosity"))+
      labs(title = "") +
      scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
      ylim(c(-0.05,1.05)) +
      xlim(c(-0.05,1.05)) +
      theme_classic()
Warning in geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.

Code
hi.het.test$site<-samps$site

ggplot(hi.het.test, aes(x=hybrid.index, y=heterozygosity, color=as.factor(site))) +
      geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black") +
      geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black") +
      geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black") +
      stat_function(fun = function(hi) 2*hi*(1-hi), xlim = c(0,1), color = "black", linetype = "dashed") +
      geom_point(cex = 3, alpha = 1)+
      guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
      xlab(paste("Hybrid Index"))+
      ylab(paste("Interclass Heterozygosity"))+
      labs(title = "") +
      #scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
      ylim(c(-0.05,1.05)) +
      xlim(c(-0.05,1.05)) +
      theme_classic()
Warning in geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.
Warning in geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
  a single row.

Code
#save plot
g<-ggplot(hi.het.test, aes(x=hybrid.index, y=heterozygosity, color=as.factor(mtDNA))) +
      geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black") +
      geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black") +
      geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black") +
      stat_function(fun = function(hi) 2*hi*(1-hi), xlim = c(0,1), color = "black", linetype = "dashed") +
      geom_point(cex = 3, alpha = 1)+
      guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
      xlab(paste("Hybrid Index"))+
      ylab(paste("Interclass Heterozygosity"))+
      labs(title = "") +
      scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
      ylim(c(-0.05,1.05)) +
      xlim(c(-0.05,1.05)) +
      theme_classic()
#ggsave("~/Desktop/grosbeak.rad/triangle.plot.pdf", g, width = 6,height = 4,units = "in")

0.3 plot associations

Code
#setwd to the admixture directory you brought in from the cluster
setwd("~/Desktop/grosbeak.rad/admixture.mac")

#read in input file
sampling<-read.table("binary_fileset.fam")[,1]
#get list of input samples in order they appear
sampling
  [1] "P_hybrid_44696"         "P_hybrid_44703"         "P_hybrid_44707"        
  [4] "P_hybrid_44708"         "P_hybrid_44709"         "P_hybrid_44712"        
  [7] "P_hybrid_44762"         "P_hybrid_44771"         "P_hybrid_44781"        
 [10] "P_hybrid_45171"         "P_hybrid_45173"         "P_hybrid_45174"        
 [13] "P_ludovicianus_11998"   "P_ludovicianus_21721"   "P_ludovicianus_25286"  
 [16] "P_ludovicianus_26595"   "P_ludovicianus_33988"   "P_ludovicianus_34776"  
 [19] "P_ludovicianus_34779"   "P_ludovicianus_34782"   "P_ludovicianus_34830"  
 [22] "P_ludovicianus_44704"   "P_ludovicianus_44705"   "P_ludovicianus_44706"  
 [25] "P_ludovicianus_44710"   "P_ludovicianus_44711"   "P_ludovicianus_44713"  
 [28] "P_ludovicianus_44714"   "P_ludovicianus_44715"   "P_ludovicianus_44716"  
 [31] "P_ludovicianus_44719"   "P_ludovicianus_44720"   "P_ludovicianus_44722"  
 [34] "P_ludovicianus_44723"   "P_ludovicianus_44726"   "P_ludovicianus_44727"  
 [37] "P_ludovicianus_44729"   "P_ludovicianus_44730"   "P_ludovicianus_44731"  
 [40] "P_ludovicianus_44732"   "P_ludovicianus_44733"   "P_ludovicianus_44734"  
 [43] "P_ludovicianus_44737"   "P_ludovicianus_44738"   "P_ludovicianus_44739"  
 [46] "P_ludovicianus_44740"   "P_ludovicianus_44741"   "P_ludovicianus_44742"  
 [49] "P_ludovicianus_44743"   "P_ludovicianus_44744"   "P_ludovicianus_44745"  
 [52] "P_ludovicianus_44746"   "P_ludovicianus_44747"   "P_ludovicianus_44748"  
 [55] "P_ludovicianus_44749"   "P_ludovicianus_44753"   "P_ludovicianus_44754"  
 [58] "P_ludovicianus_44761"   "P_ludovicianus_44775"   "P_melanocephalus_34890"
 [61] "P_melanocephalus_43110" "P_melanocephalus_43276" "P_melanocephalus_44346"
 [64] "P_melanocephalus_44347" "P_melanocephalus_44471" "P_melanocephalus_44651"
 [67] "P_melanocephalus_44660" "P_melanocephalus_44666" "P_melanocephalus_44676"
 [70] "P_melanocephalus_44677" "P_melanocephalus_44678" "P_melanocephalus_44679"
 [73] "P_melanocephalus_44680" "P_melanocephalus_44681" "P_melanocephalus_44683"
 [76] "P_melanocephalus_44684" "P_melanocephalus_44685" "P_melanocephalus_44686"
 [79] "P_melanocephalus_44687" "P_melanocephalus_44688" "P_melanocephalus_44689"
 [82] "P_melanocephalus_44692" "P_melanocephalus_44693" "P_melanocephalus_44694"
 [85] "P_melanocephalus_44695" "P_melanocephalus_44697" "P_melanocephalus_44699"
 [88] "P_melanocephalus_44752" "P_melanocephalus_44760" "P_melanocephalus_44763"
 [91] "P_melanocephalus_44764" "P_melanocephalus_44765" "P_melanocephalus_44766"
 [94] "P_melanocephalus_44769" "P_melanocephalus_44770" "P_melanocephalus_44772"
 [97] "P_melanocephalus_44773" "P_melanocephalus_44774" "P_melanocephalus_44776"
[100] "P_melanocephalus_44777" "P_melanocephalus_44778" "P_melanocephalus_44779"
[103] "P_melanocephalus_44780" "P_melanocephalus_44782" "P_melanocephalus_44787"
[106] "P_melanocephalus_44788" "P_melanocephalus_44789" "P_melanocephalus_44790"
[109] "P_melanocephalus_44791" "P_melanocephalus_44792" "P_melanocephalus_44793"
[112] "P_melanocephalus_44794" "P_melanocephalus_44795" "P_melanocephalus_44798"
[115] "P_melanocephalus_44799" "P_melanocephalus_44801" "P_melanocephalus_44802"
[118] "P_melanocephalus_44803" "P_melanocephalus_44804" "P_melanocephalus_44805"
[121] "P_melanocephalus_44806" "P_melanocephalus_44808" "P_melanocephalus_44809"
[124] "P_melanocephalus_44810" "P_melanocephalus_44837" "P_melanocephalus_44840"
[127] "P_melanocephalus_44843" "P_melanocephalus_44845" "P_melanocephalus_44846"
[130] "P_melanocephalus_44847" "P_melanocephalus_44848" "P_melanocephalus_44853"
[133] "P_melanocephalus_44854" "P_melanocephalus_45175" "P_melanocephalus_45200"
[136] "P_melanocephalus_45232" "P_melanocephalus_45709" "P_melanocephalus_45926"
Code
#read in all ten runs and save each dataframe in a list
runs<-list()
#read in log files
for (i in 1:10){
  runs[[i]]<-read.table(paste0("binary_fileset.", i, ".Q"))
}

#isolate run 2 (best according to CV)
run2<-runs[[2]]
#add sample info in the correct order (same as input vcf)
run2$sample<-colnames(v@gt)[-1]

#
samps$sample_id == run2$sample #check if sample info table order matches the vcf
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE
Code
samps<-samps[match(run2$sample,samps$sample_id),] #use 'match' to match orders
samps$sample_id == run2$sample #check if this worked
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE
Code
#add q-values to the sampling data.frame now that orders match
samps$mac.lud.q<-run2$V1
samps$mac.mel.q<-run2$V2

#plot
ggplot(samps, aes(x=mac.mel.q, y=male.total/12, color=as.factor(mtDNA))) +
      geom_point(cex = 3, alpha = 1)+
      guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
      xlab(paste("melanocephalus ancestry"))+
      ylab(paste("melanocephalus phenotype"))+
      labs(title = "") +
      scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
      ylim(c(0,12)) +
      scale_y_continuous(breaks=seq(0, 1, 1/4)) +
      theme_classic()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
Code
samps$male.prop<-samps$male.total/12
#do linear regression
phengen.lm <- lm(mac.mel.q ~ male.prop, samps)
#print out the summary
summary(phengen.lm)

Call:
lm(formula = mac.mel.q ~ male.prop, data = samps)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.87505  0.02263  0.03650  0.03700  0.40788 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.03415    0.02151  -1.588    0.115    
male.prop    0.99714    0.02706  36.851   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1266 on 111 degrees of freedom
  (25 observations deleted due to missingness)
Multiple R-squared:  0.9244,    Adjusted R-squared:  0.9238 
F-statistic:  1358 on 1 and 111 DF,  p-value: < 2.2e-16
Code
#plot with best fit linear regression line laid over
p<-ggplot(samps, aes(x=mac.mel.q, y=male.prop, color=as.factor(mtDNA))) +
      geom_point(cex = 3, alpha = 1)+
      guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
      xlab(paste("melanocephalus genomic ancestry"))+
      ylab(paste("melanocephalus phenotype"))+
      labs(title = "") +
      scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
      #ylim(c(0,12)) +
      scale_y_continuous(breaks=seq(0, 1, 1/4)) +
      geom_abline(slope = coef(phengen.lm)[["male.prop"]], intercept = coef(phengen.lm)[["(Intercept)"]]) +
      theme_classic()+
      theme(legend.position="none")
      
ggMarginal(p, type = "histogram") 
Warning: Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).

Code
#save
pp<-ggMarginal(p, type = "histogram") 
Warning: Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
Code
#ggsave("~/Desktop/grosbeak.rad/geno.pheno.plot.pdf", pp, width = 4,height = 4,units = "in")

0.4

Code
#plot
ggplot(samps, aes(x=mtDNA, y=mac.mel.q, color=as.factor(mtDNA))) +
      geom_point(cex = 3, alpha = 1) +
      guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
      xlab(paste("melanocephalus ancestry"))+
      ylab(paste("melanocephalus phenotype"))+
      labs(title = "") +
      scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
      ylim(c(0,12)) +
      scale_y_continuous(breaks=seq(0, 1, 1/4)) +
      theme_classic()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).

Code
sub<-samps[!is.na(samps$mtDNA),]
ggplot(sub, aes(x = as.factor(mtDNA), y = mac.mel.q, color=as.factor(mtDNA))) + 
  ggdist::stat_halfeye(adjust = 10, width = .5, .width = 0, justification = -.3, point_colour = NA) + 
  geom_boxplot(width = .25, outlier.shape = NA) +
  geom_point(size = 1.5,alpha = .75,position = position_jitter(seed = 1, width = .02))+
  scale_color_manual(values = c("#ef3b2c","#fff319"), aesthetics = c("fill", "color")) +
  theme_classic()+
  coord_cartesian(xlim = c(1.3, 2.2), clip = "off")

Code
#save
gplot<-ggplot(sub, aes(x = as.factor(mtDNA), y = mac.mel.q, color=as.factor(mtDNA))) + 
  ggdist::stat_halfeye(adjust = 15, width = .75, .width = 0, justification = -.3, point_colour = NA) + 
  geom_boxplot(width = .25, outlier.shape = NA) +
  geom_point(size = 1.5,alpha = .75,position = position_jitter(seed = 1, width = .02))+
  scale_color_manual(values = c("#ef3b2c","#fff319"), aesthetics = c("fill", "color")) +
  theme_classic()+
  coord_cartesian(xlim = c(1.35, 2.3), clip = "off")+
  theme(legend.position="none")

#ggsave("~/Desktop/grosbeak.rad/mt.boxplot.plot.pdf", gplot, width = 4.2,height = 3.6,units = "in")

#get info for a table showing relative frequency of mitonuclear discordance
k<-c()
for (i in 1:nrow(sub)){
  if(sub$mtDNA[i] == 0 & sub$mac.lud.q[i] > .5){k[i]<-"ludo.ludo"}
  else if(sub$mtDNA[i] == 0 & sub$mac.lud.q[i] < .5){k[i]<-"ludo.mel"}
  else if(sub$mtDNA[i] == 1 & sub$mac.mel.q[i] > .5){k[i]<-"mel.mel"}
  else if(sub$mtDNA[i] == 1 & sub$mac.mel.q[i] < .5){k[i]<-"mel.ludo"}
}
table(k)
k
ludo.ludo  ludo.mel   mel.mel 
       44         2        72 
Code
mean(sub$mac.mel.q[sub$mtDNA == 0])
[1] 0.04359798
Code
mean(sub$mac.mel.q[sub$mtDNA == 1])
[1] 0.9695234
Code
#check if discordant genotypes are still rare in the center of the hybrid zone
table(k[sub$site == 7 | sub$site == 8])

ludo.ludo  ludo.mel   mel.mel 
        2         2        15 
Code
table(k[sub$site > 5 & sub$site < 10])

ludo.ludo  ludo.mel   mel.mel 
       11         2        23 
Code
chisq <- chisq.test(rbind(c(44,0),c(2,72)))
chisq

    Pearson's Chi-squared test with Yates' continuity correction

data:  rbind(c(44, 0), c(2, 72))
X-squared = 105.77, df = 1, p-value < 2.2e-16