Code
library(triangulaR)
library(vcfR)
library(ggdist)
library(ggExtra)
#read in data
<-read.vcfR("~/Desktop/grosbeak.rad/grosbeak.filtered.snps.vcf.gz") v
library(triangulaR)
library(vcfR)
library(ggdist)
library(ggExtra)
#read in data
<-read.vcfR("~/Desktop/grosbeak.rad/grosbeak.filtered.snps.vcf.gz") v
v
***** Object of Class vcfR *****
138 samples
1027 CHROMs
51,595 variants
Object size: 532.6 Mb
2.987 percent missing data
***** ***** *****
colnames(v@gt)
[1] "FORMAT" "P_hybrid_44696" "P_hybrid_44703"
[4] "P_hybrid_44707" "P_hybrid_44708" "P_hybrid_44709"
[7] "P_hybrid_44712" "P_hybrid_44762" "P_hybrid_44771"
[10] "P_hybrid_44781" "P_hybrid_45171" "P_hybrid_45173"
[13] "P_hybrid_45174" "P_ludovicianus_11998" "P_ludovicianus_21721"
[16] "P_ludovicianus_25286" "P_ludovicianus_26595" "P_ludovicianus_33988"
[19] "P_ludovicianus_34776" "P_ludovicianus_34779" "P_ludovicianus_34782"
[22] "P_ludovicianus_34830" "P_ludovicianus_44704" "P_ludovicianus_44705"
[25] "P_ludovicianus_44706" "P_ludovicianus_44710" "P_ludovicianus_44711"
[28] "P_ludovicianus_44713" "P_ludovicianus_44714" "P_ludovicianus_44715"
[31] "P_ludovicianus_44716" "P_ludovicianus_44719" "P_ludovicianus_44720"
[34] "P_ludovicianus_44722" "P_ludovicianus_44723" "P_ludovicianus_44726"
[37] "P_ludovicianus_44727" "P_ludovicianus_44729" "P_ludovicianus_44730"
[40] "P_ludovicianus_44731" "P_ludovicianus_44732" "P_ludovicianus_44733"
[43] "P_ludovicianus_44734" "P_ludovicianus_44737" "P_ludovicianus_44738"
[46] "P_ludovicianus_44739" "P_ludovicianus_44740" "P_ludovicianus_44741"
[49] "P_ludovicianus_44742" "P_ludovicianus_44743" "P_ludovicianus_44744"
[52] "P_ludovicianus_44745" "P_ludovicianus_44746" "P_ludovicianus_44747"
[55] "P_ludovicianus_44748" "P_ludovicianus_44749" "P_ludovicianus_44753"
[58] "P_ludovicianus_44754" "P_ludovicianus_44761" "P_ludovicianus_44775"
[61] "P_melanocephalus_34890" "P_melanocephalus_43110" "P_melanocephalus_43276"
[64] "P_melanocephalus_44346" "P_melanocephalus_44347" "P_melanocephalus_44471"
[67] "P_melanocephalus_44651" "P_melanocephalus_44660" "P_melanocephalus_44666"
[70] "P_melanocephalus_44676" "P_melanocephalus_44677" "P_melanocephalus_44678"
[73] "P_melanocephalus_44679" "P_melanocephalus_44680" "P_melanocephalus_44681"
[76] "P_melanocephalus_44683" "P_melanocephalus_44684" "P_melanocephalus_44685"
[79] "P_melanocephalus_44686" "P_melanocephalus_44687" "P_melanocephalus_44688"
[82] "P_melanocephalus_44689" "P_melanocephalus_44692" "P_melanocephalus_44693"
[85] "P_melanocephalus_44694" "P_melanocephalus_44695" "P_melanocephalus_44697"
[88] "P_melanocephalus_44699" "P_melanocephalus_44752" "P_melanocephalus_44760"
[91] "P_melanocephalus_44763" "P_melanocephalus_44764" "P_melanocephalus_44765"
[94] "P_melanocephalus_44766" "P_melanocephalus_44769" "P_melanocephalus_44770"
[97] "P_melanocephalus_44772" "P_melanocephalus_44773" "P_melanocephalus_44774"
[100] "P_melanocephalus_44776" "P_melanocephalus_44777" "P_melanocephalus_44778"
[103] "P_melanocephalus_44779" "P_melanocephalus_44780" "P_melanocephalus_44782"
[106] "P_melanocephalus_44787" "P_melanocephalus_44788" "P_melanocephalus_44789"
[109] "P_melanocephalus_44790" "P_melanocephalus_44791" "P_melanocephalus_44792"
[112] "P_melanocephalus_44793" "P_melanocephalus_44794" "P_melanocephalus_44795"
[115] "P_melanocephalus_44798" "P_melanocephalus_44799" "P_melanocephalus_44801"
[118] "P_melanocephalus_44802" "P_melanocephalus_44803" "P_melanocephalus_44804"
[121] "P_melanocephalus_44805" "P_melanocephalus_44806" "P_melanocephalus_44808"
[124] "P_melanocephalus_44809" "P_melanocephalus_44810" "P_melanocephalus_44837"
[127] "P_melanocephalus_44840" "P_melanocephalus_44843" "P_melanocephalus_44845"
[130] "P_melanocephalus_44846" "P_melanocephalus_44847" "P_melanocephalus_44848"
[133] "P_melanocephalus_44853" "P_melanocephalus_44854" "P_melanocephalus_45175"
[136] "P_melanocephalus_45200" "P_melanocephalus_45232" "P_melanocephalus_45709"
[139] "P_melanocephalus_45926"
#read in sampling dataframe
<-read.csv("~/Desktop/grosbeak.data.csv")
samps<-samps[samps$passed.genomic.filtering == "TRUE",]
samps#make popmap
<-data.frame(id=samps$sample_id,pop=c(rep("P1", times=9),rep("P0",times=8),rep("hyb",times=121)))
pm
# Create a new vcfR object composed only of sites above the given allele frequency difference threshold
<- alleleFreqDiff(vcfR = v, pm = pm, p1 = "P1", p2 = "P0", difference = 1) fixed.vcf
[1] "266 sites passed allele frequency difference threshold"
# Calculate hybrid index and heterozygosity for each sample. Values are returned in a data.frame
<- hybridIndex(vcfR = fixed.vcf, pm = pm, p1 = "P1", p2 = "P0") hi.het.test
[1] "calculating hybrid indices and heterozygosities based on 266 sites"
# View triangle plot
::triangle.plot(hi.het.test) triangulaR
Warning in geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
#plot color-coded by mtDNA haplotype
$sample_id == colnames(v@gt)[-1] #check if sample info table order matches the vcf samps
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[85] FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[133] FALSE FALSE FALSE FALSE FALSE FALSE
<-samps[match(colnames(v@gt)[-1],samps$sample_id),] #use 'match' to match orders
samps$sample_id == colnames(v@gt)[-1] #check if this worked samps
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE
<-gsub("1","#fff319",gsub("0","#ef3b2c",samps$mtDNA))
colvecis.na(colvec)]<-"grey"
colvec[
$colvec<-colvec
hi.het.test$mtDNA<-samps$mtDNA
hi.het.test
ggplot(hi.het.test, aes(x=hybrid.index, y=heterozygosity, color=as.factor(mtDNA))) +
geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black") +
geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black") +
geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black") +
stat_function(fun = function(hi) 2*hi*(1-hi), xlim = c(0,1), color = "black", linetype = "dashed") +
geom_point(cex = 3, alpha = 1)+
guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
xlab(paste("Hybrid Index"))+
ylab(paste("Interclass Heterozygosity"))+
labs(title = "") +
scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
ylim(c(-0.05,1.05)) +
xlim(c(-0.05,1.05)) +
theme_classic()
Warning in geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
$site<-samps$site
hi.het.test
ggplot(hi.het.test, aes(x=hybrid.index, y=heterozygosity, color=as.factor(site))) +
geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black") +
geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black") +
geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black") +
stat_function(fun = function(hi) 2*hi*(1-hi), xlim = c(0,1), color = "black", linetype = "dashed") +
geom_point(cex = 3, alpha = 1)+
guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
xlab(paste("Hybrid Index"))+
ylab(paste("Interclass Heterozygosity"))+
labs(title = "") +
#scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
ylim(c(-0.05,1.05)) +
xlim(c(-0.05,1.05)) +
theme_classic()
Warning in geom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
Warning in geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black"): All aesthetics have length 1, but the data has 138 rows.
ℹ Please consider using `annotate()` or provide this layer with data containing
a single row.
#save plot
<-ggplot(hi.het.test, aes(x=hybrid.index, y=heterozygosity, color=as.factor(mtDNA))) +
ggeom_segment(aes(x = 0.5, xend = 1, y = 1, yend = 0), color = "black") +
geom_segment(aes(x = 0, xend = 0.5, y = 0, yend = 1), color = "black") +
geom_segment(aes(x = 0, xend = 1, y = 0, yend = 0), color = "black") +
stat_function(fun = function(hi) 2*hi*(1-hi), xlim = c(0,1), color = "black", linetype = "dashed") +
geom_point(cex = 3, alpha = 1)+
guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
xlab(paste("Hybrid Index"))+
ylab(paste("Interclass Heterozygosity"))+
labs(title = "") +
scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
ylim(c(-0.05,1.05)) +
xlim(c(-0.05,1.05)) +
theme_classic()
#ggsave("~/Desktop/grosbeak.rad/triangle.plot.pdf", g, width = 6,height = 4,units = "in")
#setwd to the admixture directory you brought in from the cluster
setwd("~/Desktop/grosbeak.rad/admixture.mac")
#read in input file
<-read.table("binary_fileset.fam")[,1]
sampling#get list of input samples in order they appear
sampling
[1] "P_hybrid_44696" "P_hybrid_44703" "P_hybrid_44707"
[4] "P_hybrid_44708" "P_hybrid_44709" "P_hybrid_44712"
[7] "P_hybrid_44762" "P_hybrid_44771" "P_hybrid_44781"
[10] "P_hybrid_45171" "P_hybrid_45173" "P_hybrid_45174"
[13] "P_ludovicianus_11998" "P_ludovicianus_21721" "P_ludovicianus_25286"
[16] "P_ludovicianus_26595" "P_ludovicianus_33988" "P_ludovicianus_34776"
[19] "P_ludovicianus_34779" "P_ludovicianus_34782" "P_ludovicianus_34830"
[22] "P_ludovicianus_44704" "P_ludovicianus_44705" "P_ludovicianus_44706"
[25] "P_ludovicianus_44710" "P_ludovicianus_44711" "P_ludovicianus_44713"
[28] "P_ludovicianus_44714" "P_ludovicianus_44715" "P_ludovicianus_44716"
[31] "P_ludovicianus_44719" "P_ludovicianus_44720" "P_ludovicianus_44722"
[34] "P_ludovicianus_44723" "P_ludovicianus_44726" "P_ludovicianus_44727"
[37] "P_ludovicianus_44729" "P_ludovicianus_44730" "P_ludovicianus_44731"
[40] "P_ludovicianus_44732" "P_ludovicianus_44733" "P_ludovicianus_44734"
[43] "P_ludovicianus_44737" "P_ludovicianus_44738" "P_ludovicianus_44739"
[46] "P_ludovicianus_44740" "P_ludovicianus_44741" "P_ludovicianus_44742"
[49] "P_ludovicianus_44743" "P_ludovicianus_44744" "P_ludovicianus_44745"
[52] "P_ludovicianus_44746" "P_ludovicianus_44747" "P_ludovicianus_44748"
[55] "P_ludovicianus_44749" "P_ludovicianus_44753" "P_ludovicianus_44754"
[58] "P_ludovicianus_44761" "P_ludovicianus_44775" "P_melanocephalus_34890"
[61] "P_melanocephalus_43110" "P_melanocephalus_43276" "P_melanocephalus_44346"
[64] "P_melanocephalus_44347" "P_melanocephalus_44471" "P_melanocephalus_44651"
[67] "P_melanocephalus_44660" "P_melanocephalus_44666" "P_melanocephalus_44676"
[70] "P_melanocephalus_44677" "P_melanocephalus_44678" "P_melanocephalus_44679"
[73] "P_melanocephalus_44680" "P_melanocephalus_44681" "P_melanocephalus_44683"
[76] "P_melanocephalus_44684" "P_melanocephalus_44685" "P_melanocephalus_44686"
[79] "P_melanocephalus_44687" "P_melanocephalus_44688" "P_melanocephalus_44689"
[82] "P_melanocephalus_44692" "P_melanocephalus_44693" "P_melanocephalus_44694"
[85] "P_melanocephalus_44695" "P_melanocephalus_44697" "P_melanocephalus_44699"
[88] "P_melanocephalus_44752" "P_melanocephalus_44760" "P_melanocephalus_44763"
[91] "P_melanocephalus_44764" "P_melanocephalus_44765" "P_melanocephalus_44766"
[94] "P_melanocephalus_44769" "P_melanocephalus_44770" "P_melanocephalus_44772"
[97] "P_melanocephalus_44773" "P_melanocephalus_44774" "P_melanocephalus_44776"
[100] "P_melanocephalus_44777" "P_melanocephalus_44778" "P_melanocephalus_44779"
[103] "P_melanocephalus_44780" "P_melanocephalus_44782" "P_melanocephalus_44787"
[106] "P_melanocephalus_44788" "P_melanocephalus_44789" "P_melanocephalus_44790"
[109] "P_melanocephalus_44791" "P_melanocephalus_44792" "P_melanocephalus_44793"
[112] "P_melanocephalus_44794" "P_melanocephalus_44795" "P_melanocephalus_44798"
[115] "P_melanocephalus_44799" "P_melanocephalus_44801" "P_melanocephalus_44802"
[118] "P_melanocephalus_44803" "P_melanocephalus_44804" "P_melanocephalus_44805"
[121] "P_melanocephalus_44806" "P_melanocephalus_44808" "P_melanocephalus_44809"
[124] "P_melanocephalus_44810" "P_melanocephalus_44837" "P_melanocephalus_44840"
[127] "P_melanocephalus_44843" "P_melanocephalus_44845" "P_melanocephalus_44846"
[130] "P_melanocephalus_44847" "P_melanocephalus_44848" "P_melanocephalus_44853"
[133] "P_melanocephalus_44854" "P_melanocephalus_45175" "P_melanocephalus_45200"
[136] "P_melanocephalus_45232" "P_melanocephalus_45709" "P_melanocephalus_45926"
#read in all ten runs and save each dataframe in a list
<-list()
runs#read in log files
for (i in 1:10){
<-read.table(paste0("binary_fileset.", i, ".Q"))
runs[[i]]
}
#isolate run 2 (best according to CV)
<-runs[[2]]
run2#add sample info in the correct order (same as input vcf)
$sample<-colnames(v@gt)[-1]
run2
#
$sample_id == run2$sample #check if sample info table order matches the vcf samps
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE
<-samps[match(run2$sample,samps$sample_id),] #use 'match' to match orders
samps$sample_id == run2$sample #check if this worked samps
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[136] TRUE TRUE TRUE
#add q-values to the sampling data.frame now that orders match
$mac.lud.q<-run2$V1
samps$mac.mel.q<-run2$V2
samps
#plot
ggplot(samps, aes(x=mac.mel.q, y=male.total/12, color=as.factor(mtDNA))) +
geom_point(cex = 3, alpha = 1)+
guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
xlab(paste("melanocephalus ancestry"))+
ylab(paste("melanocephalus phenotype"))+
labs(title = "") +
scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
ylim(c(0,12)) +
scale_y_continuous(breaks=seq(0, 1, 1/4)) +
theme_classic()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
$male.prop<-samps$male.total/12
samps#do linear regression
<- lm(mac.mel.q ~ male.prop, samps)
phengen.lm #print out the summary
summary(phengen.lm)
Call:
lm(formula = mac.mel.q ~ male.prop, data = samps)
Residuals:
Min 1Q Median 3Q Max
-0.87505 0.02263 0.03650 0.03700 0.40788
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.03415 0.02151 -1.588 0.115
male.prop 0.99714 0.02706 36.851 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.1266 on 111 degrees of freedom
(25 observations deleted due to missingness)
Multiple R-squared: 0.9244, Adjusted R-squared: 0.9238
F-statistic: 1358 on 1 and 111 DF, p-value: < 2.2e-16
#plot with best fit linear regression line laid over
<-ggplot(samps, aes(x=mac.mel.q, y=male.prop, color=as.factor(mtDNA))) +
pgeom_point(cex = 3, alpha = 1)+
guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
xlab(paste("melanocephalus genomic ancestry"))+
ylab(paste("melanocephalus phenotype"))+
labs(title = "") +
scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
#ylim(c(0,12)) +
scale_y_continuous(breaks=seq(0, 1, 1/4)) +
geom_abline(slope = coef(phengen.lm)[["male.prop"]], intercept = coef(phengen.lm)[["(Intercept)"]]) +
theme_classic()+
theme(legend.position="none")
ggMarginal(p, type = "histogram")
Warning: Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
#save
<-ggMarginal(p, type = "histogram") pp
Warning: Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
Removed 25 rows containing missing values or values outside the scale range
(`geom_point()`).
#ggsave("~/Desktop/grosbeak.rad/geno.pheno.plot.pdf", pp, width = 4,height = 4,units = "in")
#plot
ggplot(samps, aes(x=mtDNA, y=mac.mel.q, color=as.factor(mtDNA))) +
geom_point(cex = 3, alpha = 1) +
guides(shape = guide_legend(override.aes = list(size = 5), order=2, label.theme= element_text(face="italic")))+
xlab(paste("melanocephalus ancestry"))+
ylab(paste("melanocephalus phenotype"))+
labs(title = "") +
scale_color_manual(values = c("#ef3b2c","#fff319"),na.value = "grey") +
ylim(c(0,12)) +
scale_y_continuous(breaks=seq(0, 1, 1/4)) +
theme_classic()
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).
<-samps[!is.na(samps$mtDNA),]
subggplot(sub, aes(x = as.factor(mtDNA), y = mac.mel.q, color=as.factor(mtDNA))) +
::stat_halfeye(adjust = 10, width = .5, .width = 0, justification = -.3, point_colour = NA) +
ggdistgeom_boxplot(width = .25, outlier.shape = NA) +
geom_point(size = 1.5,alpha = .75,position = position_jitter(seed = 1, width = .02))+
scale_color_manual(values = c("#ef3b2c","#fff319"), aesthetics = c("fill", "color")) +
theme_classic()+
coord_cartesian(xlim = c(1.3, 2.2), clip = "off")
#save
<-ggplot(sub, aes(x = as.factor(mtDNA), y = mac.mel.q, color=as.factor(mtDNA))) +
gplot::stat_halfeye(adjust = 15, width = .75, .width = 0, justification = -.3, point_colour = NA) +
ggdistgeom_boxplot(width = .25, outlier.shape = NA) +
geom_point(size = 1.5,alpha = .75,position = position_jitter(seed = 1, width = .02))+
scale_color_manual(values = c("#ef3b2c","#fff319"), aesthetics = c("fill", "color")) +
theme_classic()+
coord_cartesian(xlim = c(1.35, 2.3), clip = "off")+
theme(legend.position="none")
#ggsave("~/Desktop/grosbeak.rad/mt.boxplot.plot.pdf", gplot, width = 4.2,height = 3.6,units = "in")
#get info for a table showing relative frequency of mitonuclear discordance
<-c()
kfor (i in 1:nrow(sub)){
if(sub$mtDNA[i] == 0 & sub$mac.lud.q[i] > .5){k[i]<-"ludo.ludo"}
else if(sub$mtDNA[i] == 0 & sub$mac.lud.q[i] < .5){k[i]<-"ludo.mel"}
else if(sub$mtDNA[i] == 1 & sub$mac.mel.q[i] > .5){k[i]<-"mel.mel"}
else if(sub$mtDNA[i] == 1 & sub$mac.mel.q[i] < .5){k[i]<-"mel.ludo"}
}table(k)
k
ludo.ludo ludo.mel mel.mel
44 2 72
mean(sub$mac.mel.q[sub$mtDNA == 0])
[1] 0.04359798
mean(sub$mac.mel.q[sub$mtDNA == 1])
[1] 0.9695234
#check if discordant genotypes are still rare in the center of the hybrid zone
table(k[sub$site == 7 | sub$site == 8])
ludo.ludo ludo.mel mel.mel
2 2 15
table(k[sub$site > 5 & sub$site < 10])
ludo.ludo ludo.mel mel.mel
11 2 23
<- chisq.test(rbind(c(44,0),c(2,72)))
chisq chisq
Pearson's Chi-squared test with Yates' continuity correction
data: rbind(c(44, 0), c(2, 72))
X-squared = 105.77, df = 1, p-value < 2.2e-16