library(ggplot2)library(ggExtra)#load dataset from publicly available repositorysamps<-read.csv("https://raw.githubusercontent.com/DevonDeRaad/grosbeak.rad.hybridtransect/refs/heads/main/data/grosbeak.sample.info.csv")#plot overall phenotype against ancestry for males#isolate samples with "male total phenotype score" (113 out of 138)male.tot<-samps[!is.na(samps$male.total),]#plotplot(male.tot$mac.mel.q,male.tot$male.total)
0.2 Check to make sure that date collected is not strongly influencing testis size
Code
#subset to only malesmale<-samps[samps$sex =="male",]#with testis measurementsmale<-samps[!is.na(samps$Rtestiswidth),]#plotplot(male$day.year, male$total.testis.area)#calculate statscor(male$day.year,male$total.testis.area)
[1] 0.1060319
Code
lm<-lm(total.testis.area ~ day.year, data = male)summary(lm)
Call:
lm(formula = total.testis.area ~ day.year, data = male)
Residuals:
Min 1Q Median 3Q Max
-120.490 -30.779 -3.303 35.510 131.594
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 68.9564 100.4290 0.687 0.494
day.year 0.7290 0.6489 1.123 0.264
Residual standard error: 49.11 on 111 degrees of freedom
Multiple R-squared: 0.01124, Adjusted R-squared: 0.002335
F-statistic: 1.262 on 1 and 111 DF, p-value: 0.2637
Code
#plot with best fit linear regression line laid overp<-ggplot(male, aes(x=day.year, y=total.testis.area)) +geom_point(cex =3, alpha = .5)+guides(shape =guide_legend(override.aes =list(size =5), order=2, label.theme=element_text(face="italic")))+xlab(paste("Day of the year (0 - 365)"))+ylab(paste("Testis area"))+labs(title ="") +scale_color_manual(values =c("#ef3b2c","#fff319"),na.value ="grey") +#ylim(c(0,12)) +#scale_y_continuous(breaks=seq(0, 1, 1/4)) +geom_abline(slope =coef(lm)[["day.year"]], intercept =coef(lm)[["(Intercept)"]]) +theme_classic()+theme(legend.position="none")ggMarginal(p, type ="histogram")
0.3 Now test for ancestry / testis area association
Code
#calculate 'minor parent ancestry'x<-c()for (i in1:nrow(male)){ x[i]<-min(male$mac.lud.q[i],male$mac.mel.q[i])}hist(x, breaks=20)
Code
male$minor.ancestry<-x#plot the association between 'minor parent ancestry' (essentially admixture proportion) and testis sizeplot(male$minor.ancestry, male$total.testis.area)
Code
#try binning for hybrid vs notmean(male$total.testis.area[male$minor.ancestry <.02])
---title: "grosbeak ancestry associations"format: html: code-fold: show code-tools: truetoc: truetoc-title: Document Contentsnumber-sections: trueembed-resources: true---### Investigate genotype/phenotype associations```{r}library(ggplot2)library(ggExtra)#load dataset from publicly available repositorysamps<-read.csv("https://raw.githubusercontent.com/DevonDeRaad/grosbeak.rad.hybridtransect/refs/heads/main/data/grosbeak.sample.info.csv")#plot overall phenotype against ancestry for males#isolate samples with "male total phenotype score" (113 out of 138)male.tot<-samps[!is.na(samps$male.total),]#plotplot(male.tot$mac.mel.q,male.tot$male.total)plot(male.tot$mac.mel.q,male.tot$tarsus)plot(male.tot$mac.mel.q,male.tot$wing)plot(male.tot$mac.mel.q,male.tot$nape)plot(male.tot$mac.mel.q,male.tot$back)plot(male.tot$mac.mel.q,male.tot$rump)plot(male.tot$mac.mel.q,male.tot$breast.underwing.coverts)plot(male.tot$mac.mel.q,male.tot$flanks)```### Check to make sure that date collected is not strongly influencing testis size```{r}#subset to only malesmale<-samps[samps$sex =="male",]#with testis measurementsmale<-samps[!is.na(samps$Rtestiswidth),]#plotplot(male$day.year, male$total.testis.area)#calculate statscor(male$day.year,male$total.testis.area)lm<-lm(total.testis.area ~ day.year, data = male)summary(lm)#plot with best fit linear regression line laid overp<-ggplot(male, aes(x=day.year, y=total.testis.area)) +geom_point(cex =3, alpha = .5)+guides(shape =guide_legend(override.aes =list(size =5), order=2, label.theme=element_text(face="italic")))+xlab(paste("Day of the year (0 - 365)"))+ylab(paste("Testis area"))+labs(title ="") +scale_color_manual(values =c("#ef3b2c","#fff319"),na.value ="grey") +#ylim(c(0,12)) +#scale_y_continuous(breaks=seq(0, 1, 1/4)) +geom_abline(slope =coef(lm)[["day.year"]], intercept =coef(lm)[["(Intercept)"]]) +theme_classic()+theme(legend.position="none")ggMarginal(p, type ="histogram") #savepp<-ggMarginal(p, type ="histogram") #ggsave("~/Desktop/grosbeak.rad/testis.date.plot.pdf", pp, width = 6,height = 4,units = "in")```### Now test for ancestry / testis area association```{r}#calculate 'minor parent ancestry'x<-c()for (i in1:nrow(male)){ x[i]<-min(male$mac.lud.q[i],male$mac.mel.q[i])}hist(x, breaks=20)male$minor.ancestry<-x#plot the association between 'minor parent ancestry' (essentially admixture proportion) and testis sizeplot(male$minor.ancestry, male$total.testis.area)#try binning for hybrid vs notmean(male$total.testis.area[male$minor.ancestry <.02])mean(male$total.testis.area[male$minor.ancestry >.02])#try different cutoffmean(male$total.testis.area[male$minor.ancestry <.2])mean(male$total.testis.area[male$minor.ancestry >.2])#try separating by class and speciesmean(male$total.testis.area[male$mac.lud.q < .05])mean(male$total.testis.area[male$mac.lud.q > .05& male$mac.lud.q < .5])mean(male$total.testis.area[male$mac.lud.q > .5& male$mac.lud.q < .95])mean(male$total.testis.area[male$mac.lud.q > .95])#make a column assigning samples as pure parental or hybridmale$class<-"x"male$class[male$mac.lud.q < .05]<-"melanocephalus"male$class[male$mac.lud.q > .05& male$mac.lud.q < .5]<-"melanocephalus hybrid"male$class[male$mac.lud.q > .5& male$mac.lud.q < .95]<-"ludovicianus hybrid"male$class[male$mac.lud.q > .95]<-"ludovicianus"#plot the boxplotsggplot(male, aes(class, total.testis.area)) +geom_boxplot() +theme_classic() +theme(axis.text.x =element_text(angle =45, hjust =1))#boxplot(total.testis.area ~ class, data = male)#no apparent differences based on ancestry class#plot individual testis measurementsplot(male$minor.ancestry, male$Ltestislength)plot(male$minor.ancestry, male$Ltestiswidth)plot(male$minor.ancestry, male$Rtestislength)plot(male$minor.ancestry, male$Rtestiswidth)#calculate statscor(male$minor.ancestry,male$total.testis.area)lm<-lm(total.testis.area ~ minor.ancestry, data = male)summary(lm)```### Plot final figure```{r}#plot with best fit linear regression line laid overp<-ggplot(male, aes(x=minor.ancestry, y=total.testis.area)) +geom_point(cex =3, alpha = .5)+guides(shape =guide_legend(override.aes =list(size =5), order=2, label.theme=element_text(face="italic")))+xlab(paste("Minor parent ancestry proportion"))+ylab(paste("Testis area"))+labs(title ="") +scale_color_manual(values =c("#ef3b2c","#fff319"),na.value ="grey") +#ylim(c(0,12)) +#scale_y_continuous(breaks=seq(0, 1, 1/4)) +geom_abline(slope =coef(lm)[["minor.ancestry"]], intercept =coef(lm)[["(Intercept)"]]) +theme_classic()+theme(legend.position="none")ggMarginal(p, type ="histogram")#savepp<-ggMarginal(p, type ="histogram") #ggsave("~/Desktop/grosbeak.rad/testis.ancestry.plot.pdf", pp, width = 4,height = 4,units = "in")```