Focusing the GWAS Lens on days to flower using latent variable phenotypes derived from global multi-environment trials
The Plant Genome. (2022) e20269.
Introduction
This vignette contains the code and analysis done for the paper:
- Sandesh Neupane, Derek M. Wright, Raul O. Martinez, Jakob Butler, Jim L. Weller, Kirstin E. Bett. Focusing the GWAS Lens on days to flower using latent variable phenotypes derived from global multi-environment trials. The Plant Genome. (2022) e20269. doi.org/10.1002/tpg2.20269
- https://github.com/derekmichaelwright/AGILE_LDP_GWAS_Phenology
which is follow-up to:
- Derek M. Wright, Sandesh Neupane, Taryn Heidecker, Teketel A. Haile, Crystal Chan, Clarice J. Coyne, Rebecca J. McGee, Sripada Udupa, Fatima Henkrar, Eleonora Barilli, Diego Rubiales, Tania Gioia, Giuseppina Logozzo, Stefania Marzario, Reena Mehra, Ashutosh Sarker, Rajeev Dhakal, Babul Anwar, Debashish Sarker, Albert Vandenberg & Kirstin E. Bett. Understanding photothermal interactions can help expand production range and increase genetic diversity of lentil (Lens culinaris Medik.). Plants, People, Planet. (2021) 3(2): 171-181. doi.org/10.1002/ppp3.10158
- https://github.com/derekmichaelwright/AGILE_LDP_Phenology
This work done as part of the AGILE project at the University of Saskatchewan.
GWAS
# Genotypes
<- read.csv("myG_LDP.csv", header = F)
myG # Phenotypes
<- read.csv("myY.csv")
myY # CoVariates
<- myY[,c("Name","b","c")] myCV
# Load library
library(GAPIT3) # devtools::install_github("jiabowang/GAPIT3",force=TRUE)
#
setwd("Results")
<- GAPIT(
myGAPIT Y = myY,
G = myG,
model = c("MLM","MLMM","FarmCPU","Blink"),
PCA.total = 4
)# GWAS with b covariate (Results in `Results_b/`)
setwd("Results_b")
<- GAPIT(
myGAPIT Y = myY %>% select(-PTModel_b),
G = myG,
CV = myCV[,c("Name","PTModel_b")]
model = c("MLM","MLMM","FarmCPU","Blink"),
PCA.total = 0
)# GWAS with c covariate (Results in `Results_c/`)
setwd("Results_c")
<- GAPIT(
myGAPIT Y = myY %>% select(-PTModel_c),
G = myG,
CV = myCV[,c("Name","PTModel_c")]
model = c("MLM","MLMM","FarmCPU","Blink"),
PCA.total = 0
)
Prepare Post GWAS Data
# Load libraries
library(tidyverse)
library(ggbeeswarm)
library(ggpubr)
library(ggtext)
library(GGally)
library(plotly)
library(htmlwidgets)
# Genotype and Phenotype
<- read.csv("myG_LDP.csv", header = T)
myG <- read.csv("myY.csv")
myY # Genotype metadata - Cluster groups for the LDP (Wright et al., 2020)
<- read.csv("myLDP.csv") %>%
myLDP mutate(Cluster = factor(Cluster))
# List of flowering time genes
<- read.csv("Lentil_FT_Genes.csv", fileEncoding = "UTF-8") %>%
myFTGenes rename(Chromosome=Chr) %>%
mutate(Expt = "FT genes", Model = "MLM", Position = as.numeric(Start))
# Nucleotide symbols
#myN <- read.csv("IUPAC_Nucleotide_Code.csv")
# ggplot theme
<- theme_bw() +
theme_AGL theme(strip.background = element_rect(colour = "black", fill = NA, size = 0.5),
panel.background = element_rect(colour = "black", fill = NA, size = 0.5),
panel.border = element_rect(colour = "black", size = 0.5),
panel.grid = element_line(color = alpha("black", 0.1), size = 0.5),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank())
#
<- -log10(0.05/nrow(myG))
threshold <- -log10(0.000005)
threshold2 <- c("MLM", "MLMM", "FarmCPU", "Blink")
myModels <- c("darkgreen","darkred","darkblue","darkslategray4")
myColors_Model <- c("azure4","darkgreen","darkorange3","darkblue")
myColors_Macro <- c("darkred", "darkorange3", "darkgoldenrod2", "deeppink3",
myColors_Cluster "steelblue", "darkorchid4", "cornsilk4", "darkgreen")
<- c(
myMs "Lcu.2RBY.Chr2p42556949", #"Lcu.2RBY.Chr2p42543877",
"Lcu.2RBY.Chr5p1069654", #"Lcu.2RBY.Chr5p1063138",
"Lcu.2RBY.Chr6p2528817",
"Lcu.2RBY.Chr6p307256203")#"Lcu.2RBY.Chr6p306914970") #
<- read.csv("Results/GAPIT.MLM.Ro17_DTF.GWAS.Results.csv") %>%
myGM filter(SNP %in% myMs)
#
<- c(
myLabels "PC1", "PC2", "PC3",
"*a*", "*b*", "*c*",
"Ro16_DTF", "Ro17_DTF", "Su16_DTF", "Su17_DTF", "Su18_DTF", "Us18_DTF",
"In16_DTF", "In17_DTF", "Ba16_DTF", "Ba17_DTF", "Ne16_DTF", "Ne17_DTF",
"Mo16_DTF", "Mo17_DTF", "Sp16_DTF", "Sp17_DTF", "It16_DTF", "It17_DTF",
"Su17_*Tf*", "Ba17_*Tf*", "It17_*Tf*",
"Su17_*Tb*", "Ba17_*Tb*", "It17_*Tb*",
"Su17_*Pf*", "Ba17_*Pf*", "It17_*Pf*",
"Su17_*Pc*", "Ba17_*Pc*", "It17_*Pc*")
<- c(
myTraits "PCA_PC1", "PCA_PC2", "PCA_PC3",
"PTModel_a", "PTModel_b", "PTModel_c",
"Ro16_DTF", "Ro17_DTF", "Su16_DTF", "Su17_DTF", "Su18_DTF", "Us18_DTF",
"In16_DTF", "In17_DTF", "Ba16_DTF", "Ba17_DTF", "Ne16_DTF", "Ne17_DTF",
"Mo16_DTF", "Mo17_DTF", "Sp16_DTF", "Sp17_DTF", "It16_DTF", "It17_DTF",
"Su17_Tf", "Ba17_Tf", "It17_Tf",
"Su17_Tb", "Ba17_Tb", "It17_Tb",
"Su17_Pf", "Ba17_Pf", "It17_Pf",
"Su17_Pc", "Ba17_Pc", "It17_Pc")
Supplemental Table 1
Supplemental Table 1: GWAS results for SNPs significantly associated with the traits of interest used in this study. Traits include days from sowing to flower (DTF), the first three principal components from a principal component analysis (PCA) of the DTF data (PC1, PC2, PC3), the a, b and c coefficients from a photothermal model (PTModel), the nominal base temperature (Tb), nominal base photoperiod (Pc), thermal sum required for flowering (Tf) and the photoperiod sum required for flowering (Pf). Rosthern, Canada 2016 and 2017 (Ro16 and Ro17), Sutherland, Canada 2016, 2017 and 2018 (Su16, Su17 and Su18), Central Ferry, USA 2018 (Us18), Bhopal, India 2016 and 2017 (In16 and In17), Jessore, Bangladesh 2016 and 2017 (Ba16 and Ba17), Bardiya, Nepal 2016 and 2017 (Ne16 and Ne17), Marchouch, Morocco 2016 and 2017 (Mo16 and Mo17), Cordoba, Spain (Sp16 and Sp17), Metaponto, Italy 2016 and 2017 (It16 and It17). For further details see Wright et al. (2020). Traits run with the b or c coefficients as a covariate are indicated with the “-b” and “-c” suffix in the trait column.
# Create functions
<- function (x) { x[, colSums(is.na(x)) < nrow(x)] }
dropNAcol <- function(folder = NULL, file = NULL,
GWAS_PeakTable g.range = 3000000, rowread = 2000) {
# Prep data
<- substr(file, gregexpr("GAPIT.", file)[[1]][1]+6,
trait gregexpr(".GWAS.Results.csv", file)[[1]][1]-1 )
<- substr(trait, 1, gregexpr("\\.", trait)[[1]][1]-1 )
model <- substr(trait, gregexpr("\\.", trait)[[1]][1]+1, nchar(trait) )
trait <- substr(trait, 1, gregexpr("_", trait)[[1]][1]-1)
expt <- substr(trait, gregexpr("_", trait)[[1]][1]+1, nchar(trait) )
trait #
<- NULL
output #
<- read.csv(paste0(folder, file), nrows = rowread) %>%
rr mutate(`-log10(p)` = -log10(P.value))
#
if(sum(colnames(rr)=="nobs")>0) { rr <- select(rr, -nobs) }
#
if(!is.null(threshold2)) {
<- rr %>% filter(-log10(P.value) >= threshold2)
rx else{ rx <- rr %>% filter(-log10(P.value) > threshold) }
} #
if(nrow(rx) == 0) {
1,] <- NA
rx[1,"Chromosome"] <- 1
rx[<- bind_rows(output, rx)
output else {
} while(nrow(rx) > 0) {
<- rx %>% group_by(Chromosome) %>%
rp top_n(., n = 1, `-log10(p)`) %>% ungroup()
<- bind_rows(output, rp)
output for(i in 1:nrow(rp)) {
<- rp$Position[i] - g.range
g.range1 <- rp$Position[i] + g.range
g.range2 if(g.range1 < 0) { g.range1 <- 0 }
<- rx[rx$Chromosome != rp$Chromosome[i] |
rx $Position < g.range1 |
rx$Position > g.range2,]
rx
}if(!is.null(threshold2)) {
<- rx %>% filter(-log10(P.value) > threshold2)
rx else{ rx <- rx %>% filter(-log10(P.value) > threshold) }
}
}
}<- output %>%
output mutate(Model = model, Trait = trait, Expt = expt,
FT_Genes = NA, FT_Pos = NA, FT_End = NA,
FT_Distance = NA, FT_Distances = NA)
#
if(sum(!is.na(output$Position)) > 0) {
for(i in 1:nrow(output)) {
<- output$Position[i] - g.range
g.range1 <- output$Position[i] + g.range
g.range2 <- myFTGenes[myFTGenes$Chromosome == output$Chromosome[i] &
ftg $Position > g.range1 &
myFTGenes$Position < g.range2,]
myFTGenes<- ftg %>%
ftg mutate(Distance = abs(Position - output$Position[i])) %>%
arrange(Distance)
$FT_Genes[i] <- paste(ftg$Name, collapse = " ; ")
output$FT_Pos[i] <- paste(ftg$Position, collapse = " ; ")
output$FT_End[i] <- paste(ftg$End, collapse = " ; ")
output$FT_Distances[i] <- paste(ftg$Distance, collapse = " ; ")
output$FT_Distance[i] <- ftg$Distance[1]
output
}
}%>%
output select(SNP, Chromosome, Position, Model, Expt, Trait,
`-log10(p)`, effect, MAF=maf,
P.value,
Rsquare.of.Model.without.SNP, Rsquare.of.Model.with.SNP,
FDR_Adjusted_P.values, %>%
FT_Genes, FT_Pos, FT_End, FT_Distance, FT_Distances) filter(!(`-log10(p)` < threshold & Model != "MLM"))
}# Table for significant markers
<- NULL
myP # No covariate
<- grep(".GWAS.Results", list.files("Results"))
fnames <- list.files("Results")[fnames]
fnames for(i in fnames) {
<- bind_rows(myP, GWAS_PeakTable(folder = "Results/", file = i) %>% dropNAcol())
myP
}# covariate = b
<- grep(".GWAS.Results", list.files("Results_b"))
fnames <- list.files("Results_b")[fnames]
fnames for(i in fnames) {
<- bind_rows(myP, GWAS_PeakTable(folder = "Results_b/", file = i) %>%
myP mutate(Trait = paste0(Trait,"-b")) %>% dropNAcol())
}# covariate = c
<- grep(".GWAS.Results", list.files("Results_c"))
fnames <- list.files("Results_c")[fnames]
fnames for(i in fnames) {
<- bind_rows(myP, GWAS_PeakTable(folder = "Results_c/", file = i) %>%
myP mutate(Trait = paste0(Trait,"-c")) %>% dropNAcol())
}#
<- myP %>% filter(!is.na(SNP)) %>%
myP arrange(Chromosome, Position, P.value, Trait) %>%
mutate(Chromosome = factor(Chromosome, levels = 1:7),
Model = factor(Model, levels = myModels))
# Save
write.csv(myP, "Supplemental_Table_01.csv", row.names = F)
Figure 1
Figure 1: Genome-wide association results for days from sowing to flower (DTF) in Sutherland, Canada 2017 (Su17), Jessore, Bangladesh 2017 (Ba17), Bardiya, Nepal 2017 (Ne17) and Metaponto, Italy 2017 (It17) for a lentil diversity panel. (a) Histograms of DTF. (b) Manhattan and QQ plots for GWAS results using MLM, MLMM, FarmCPU and Blink models. Vertical lines represent specific base pair locations to facilitate comparisons across traits.
# Prep data
<- c("Su17_DTF", "Ba17_DTF", "Ne17_DTF", "It17_DTF")
expttraits <- expttraits
expts for(i in 1:length(expts)) {
<- substr(expts[i], 1, gregexpr("_", expts)[[i]][1]-1)
expts[i]
}#
<- myY %>%
yy gather(ExptTrait, Value, 2:ncol(.)) %>%
filter(ExptTrait %in% expttraits) %>%
mutate(Model = factor("Days To Flower"),
ExptTrait = factor(ExptTrait, levels = expttraits),
Expt = plyr::mapvalues(ExptTrait, expttraits, expts),
Expt = factor(Expt, levels = expts))
#
<- NULL
xx for(i in expttraits) {
<- grep(paste0(i,".GWAS.Results"), list.files("Results/"))
fnames <- list.files("Results/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results/", j)) %>%
xj mutate(Model = mod, ExptTrait = i,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xx, xj)
xx
}
}#
<- xx %>%
xx mutate(Model = factor(Model, levels = myModels),
ExptTrait = factor(ExptTrait, levels = expttraits),
Expt = plyr::mapvalues(ExptTrait, expttraits, expts),
Expt = factor(Expt, levels = expts)) %>%
arrange(desc(Model))
#
<- xx %>% mutate(`-log10(p)` = ifelse(`-log10(p)` > 10, 10, `-log10(p)`))
xx #
<- xx %>% filter(`-log10(p)` < threshold2)
x1 <- xx %>% filter(`-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(`-log10(p)` > threshold)
x3 # Plot phenotypes
<- ggplot(yy, aes(x = Value)) +
mp1 geom_histogram(fill = "darkgreen", binwidth = 2) +
facet_grid(Expt ~ "DTF") +
+
theme_AGL xlim(c(35,160)) +
labs(x = "Days after planting", y = "Count")
#
<- c(0, 3, threshold2, threshold, 8, 10)
mybreaks <- c(0, 3, round(threshold2,1), round(threshold,1), 8, ">10")
mylabels # Plot GWAS
<- ggplot(x1, aes(x = Position / 100000000, y = `-log10(p)`,
mp2 shape = Model, fill = Model)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
facet_grid(Expt ~ Chromosome, scales = "free", space = "free_x") +
scale_x_continuous(breaks = 0:7) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(legend.position = "none",
strip.text.y = element_blank() ,
strip.background.y = element_blank(),
axis.title.y = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "100 Mbp")
# Plot QQ
<- ggplot(x1, aes(x = `-log10(p)_Exp`, y = `-log10(p)`,
mp3 shape = Model, fill = Model)) +
geom_point(size = 0.5, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25, color = alpha("white", 0)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_abline() +
scale_x_continuous(breaks = mybreaks, labels = mylabels) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
facet_grid(Expt ~ "QQ", scales = "free_y") +
+
theme_AGL theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(y = NULL, x = "Expected")
# Append
<- get_legend(mp2, position = "bottom")
mpl <- ggarrange(mp1, mp2, mp3, ncol = 3, align = "h", widths = c(2,7, 1.5),
mp legend.grob = mpl, legend = "bottom", common.legend = T)
# Save
ggsave("Figure_01.jpg", mp, width = 12, height = 7, dpi = 600, bg = "white")
Figure 2
Figure 2: Summary of genome-wide association results using MLM, MLMM, FarmCPU and Blink models on single environment and multi-environment traits related to days from sowing to flower (DTF) in a lentil diversity panel. Larger points represent a significant association (-log10(p) > 6.7) with a trait of interest under one of the GWAS models, while smaller points represent a suggestive association (-log10(p) > 5.3). PC1, PC2, PC3, represent the first three principal components of an analysis of 18 site-years of DTF data (Wright et al., 2020) in Rosthern, Canada 2016 and 2017 (Ro16 and Ro17), Sutherland, Canada 2016, 2017 and 2018 (Su16, Su17 and Su18), Central Ferry, USA 2018 (Us18), Bhopal, India 2016 and 2017 (In16 and In17), Jessore, Bangladesh 2016 and 2017 (Ba16 and Ba17), Bardiya, Nepal 2016 and 2017 (Ne16 and Ne17), Marchouch, Morocco 2016 and 2017 (Mo16 and Mo17), Cordoba, Spain (Sp16 and Sp17), Metaponto, Italy 2016 and 2017 (It16 and It17). a, b and c are coefficients from a photothermal model (Wright et al., 2020) and used to calculate the nominal base temperature (Tb), nominal base photoperiod (Pc), thermal sum required for flowering (Tf) and the photoperiod sum required for flowering (Pf). Colors are representative of macroenvironments: Temperate (green), South Asian (orange) Mediterranean (blue) and multi-environment traits (grey). Vertical lines represent specific base pair locations to facilitate comparisons across traits.
# Create function
<- function(traits, labels = traits,
gg_gwas_summary width = 12, height = 8,
hlines, title = NULL, filename) {
# Prep data
<- c("<b style='color:black'>{ExptTrait}</b>",
me "<b style='color:darkgreen'>{ExptTrait}</b>",
"<b style='color:darkorange3'>{ExptTrait}</b>",
"<b style='color:darkblue'>{ExptTrait}</b>")
#
<- c("Ro16","Ro17","Su16","Su17","Su18","Us18")
e1 <- c("Mo16","Mo17","Sp16","Sp17","It16","It17")
e2 <- c("In16","In17","Ba16","Ba17","Ne16","Ne17")
e3 <- c("Multi Environment","Temperate", "South Asia", "Mediterranean")
myMacroEnvs #
<- myP %>%
xx filter(paste(Expt, Trait, sep = "_") %in% traits) %>%
mutate(ExptTrait = paste(Expt, Trait, sep = "_"),
ExptTrait = plyr::mapvalues(ExptTrait, traits, labels),
Model = factor(Model, levels = myModels),
MacroEnv = NA,
MacroEnv = ifelse(Expt %in% e1, "Temperate", MacroEnv),
MacroEnv = ifelse(Expt %in% e2, "Mediterranean", MacroEnv),
MacroEnv = ifelse(Expt %in% e3, "South Asia", MacroEnv),
MacroEnv = ifelse(is.na(MacroEnv), "Multi Environment", MacroEnv),
cat_col = NA,
cat_col = ifelse(MacroEnv == "Multi Environment", glue::glue(me[1]), cat_col),
cat_col = ifelse(MacroEnv == "Temperate", glue::glue(me[2]), cat_col),
cat_col = ifelse(MacroEnv == "South Asia", glue::glue(me[3]), cat_col),
cat_col = ifelse(MacroEnv == "Mediterranean", glue::glue(me[4]), cat_col),
cat_col = factor(cat_col, levels = rev(cat_col[match(labels, ExptTrait)])),
ExptTrait = factor(ExptTrait, levels = rev(labels)),
MacroEnv = factor(MacroEnv, levels = myMacroEnvs)) %>%
arrange(ExptTrait)
<- xx %>% filter(`-log10(p)` > threshold)
x1 <- xx %>% filter(`-log10(p)` < threshold)
x2 # Plot
<- ggplot(x1, aes(x = Position / 100000000, y = cat_col,
mp shape = Model, fill = MacroEnv)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_point(data = x2, size = 1, color = "black", alpha = 0.5,
aes(key1 = SNP, key2 = FT_Genes, key3 = FT_Distances)) +
geom_point(size = 2, color = "black", alpha = 0.5,
aes(key1 = SNP, key2 = FT_Genes, key3 = FT_Distances)) +
geom_hline(yintercept = hlines, alpha = 0.7) +
facet_grid(. ~ Chromosome, drop = F, scales = "free_x", space = "free_x") +
scale_fill_manual(values = myColors_Macro, guide = "none") +
scale_shape_manual(values = c(22:25), breaks = myModels) +
scale_y_discrete(drop = F) +
scale_x_continuous(breaks = 0:6, minor_breaks = 0:6) +
+
theme_AGL theme(legend.position = "bottom",
axis.text.y = element_markdown(),
strip.text.y = element_markdown(),
plot.title = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 4, fill = "black"))) +
labs(title = title, y = NULL, x = "100 Mbp")
# Save
ggsave(paste0(filename, ".jpg"), mp, width = width, height = height, dpi = 600)
# Save HTML
<- ggplotly(mp) %>% layout(showlegend = FALSE)
mp saveWidget(as_widget(mp),
paste0(filename,"_plotly.html"),
knitrOptions = list(fig.width = width, fig.height = height),
selfcontained = T)
}# Plot GWAS results
gg_gwas_summary(traits = myTraits, labels = myLabels,
hlines = c(3.5,6.5,9.5,12.5,18.5,24.5,30.5,33.5),
width = 12, height = 9, filename = "Figure_02")
Additional Figure 1
# Plot GWAS results with b covariate
gg_gwas_summary(title = "Covariate = *b*",
traits = paste0(myTraits, "-b"),
labels = myLabels,
hlines = c(3.5,6.5,9.5,12.5,18.5,24.5,30.5,32.5),
width = 12, height = 9, filename = "Additional/Additional_Figure_01")
Additional Figure 2
# Plot GWAS results with c covariate
gg_gwas_summary(title = "Covariatie = *c*",
traits = paste0(myTraits, "-c"),
labels = myLabels,
hlines = c(3.5,6.5,9.5,12.5,18.5,24.5,30.5,32.5),
width = 12, height = 9, filename = "Additional/Additional_Figure_02")
Figure 3
Figure 3: Genome-wide association results for the first three principal components (PC1, PC2 and PC3) the b and c coefficients from the photothermal model developed by Wright et al., (2020), and the nominal base temperature (Tb) in Sutherland, Canada 2017 (Su17) for a lentil diversity panel. (a) Histograms of each trait, scaled from 0 to 1. (b) Manhattan and QQ plots for GWAS results using MLM, MLMM, FarmCPU and Blink models. Vertical lines represent specific base pair locations to facilitate comparisons across traits.
# Prep data
<- c("PCA_PC1", "PTModel_b", "PCA_PC2", "PTModel_c", "PCA_PC3", "Su17_Tb")
expttraits <- c("PC1", "*b*", "PC2", "*c*", "PC3", "Su17_*Tb*")
expts #
<- myY
yy for(i in expttraits) { yy[,i] <- scales::rescale(yy[,i], c(0, 1)) }
<- yy %>%
yy gather(ExptTrait, Value, 2:ncol(.)) %>%
filter(ExptTrait %in% expttraits) %>%
mutate(ExptTrait = plyr::mapvalues(ExptTrait, expttraits, expts),
ExptTrait = factor(ExptTrait, levels = expts))
#
<- NULL
xx for(i in expttraits) {
<- grep(paste0(i,".GWAS.Results"), list.files("Results/"))
fnames <- list.files("Results/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results/", j)) %>%
xj mutate(Model = mod, ExptTrait = i,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xx, xj)
xx
}
}#
<- xx %>%
xx mutate(Model = factor(Model, levels = myModels),
ExptTrait = plyr::mapvalues(ExptTrait, expttraits, expts),
ExptTrait = factor(ExptTrait, levels = expts)) %>%
arrange(desc(Model))
#
<- xx %>% mutate(`-log10(p)` = ifelse(`-log10(p)` > 10, 10, `-log10(p)`))
xx #
<- xx %>% filter(`-log10(p)` < threshold2)
x1 <- xx %>% filter(`-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(`-log10(p)` > threshold)
x3 # Plot phenotypes
<- ggplot(yy, aes(x = Value)) +
mp1 geom_histogram(fill = "darkgreen") +
facet_grid(ExptTrait ~ "Phenotype", scales = "free") +
+
theme_AGL theme(strip.text.y = ggtext::element_markdown()) +
labs(x = "Scaled Value", y = "Count")
#
<- c(0, 3, threshold2, threshold, 8, 10)
mybreaks <- c(0, 3, round(threshold2,1), round(threshold,1), 8, ">10")
mylabels # Plot GWAS
<- ggplot(x1, aes(x = Position / 100000000, y = `-log10(p)`,
mp2 shape = Model, fill = Model)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
facet_grid(ExptTrait ~ Chromosome, scales = "free", space = "free_x") +
scale_x_continuous(breaks = 0:7) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(legend.position = "none",
strip.text.y = element_blank() ,
strip.background.y = element_blank(),
axis.title.y = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 4))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "100 Mbp")
# Plot QQ
<- ggplot(x1, aes(x = `-log10(p)_Exp`, y = `-log10(p)`,
mp3 shape = Model, fill = Model)) +
geom_point(size = 0.5, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25, color = alpha("white", 0)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_abline() +
scale_x_continuous(breaks = mybreaks, labels = mylabels) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
facet_grid(ExptTrait ~ "QQ", scales = "free_y") +
+
theme_AGL theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
strip.text.y = ggtext::element_markdown()) +
labs(y = NULL, x = "Expected")
# Append
<- get_legend(mp2, position = "bottom")
mpl <- ggarrange(mp1, mp2, mp3, ncol = 3, widths = c(2,7,1.5), align = "h",
mp legend.grob = mpl, legend = "bottom", common.legend = T)
# Save
ggsave("Figure_03.jpg", mp, width = 12, height = 10, dpi = 600, bg = "white")
Figure 3 Subplots
# Create function
<- function(expttraits = c("PC1", "*b*")) {
fig3_Subplot # Prep data
<- xx %>% filter(ExptTrait %in% expttraits, `-log10(p)` < threshold2)
x1 <- xx %>% filter(ExptTrait %in% expttraits, `-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(ExptTrait %in% expttraits, `-log10(p)` > threshold)
x3 # Plot phenotypes
<- ggplot(yy %>% filter(ExptTrait %in% expttraits), aes(x = Value)) +
mp1 geom_histogram(fill = "darkgreen") +
facet_grid(ExptTrait ~ "Phenotype", scales = "free") +
+
theme_AGL theme(strip.text.y = ggtext::element_markdown()) +
labs(x = "Scaled Value", y = "Count")
#
<- c(0, 3, threshold2, threshold, 8, 10)
mybreaks <- c(0, 3, round(threshold2,1), round(threshold,1), 8, ">10")
mylabels # Plot GWAS
<- ggplot(x1, aes(x = Position / 100000000, y = `-log10(p)`,
mp2 shape = Model, fill = Model)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
facet_grid(ExptTrait ~ Chromosome, scales = "free", space = "free_x") +
scale_x_continuous(breaks = 1:7) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(legend.position = "none",
strip.text.y = element_blank() ,
strip.background.y = element_blank(),
axis.title.y = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "100 Mbp")
# Plot QQ
<- ggplot(x1, aes(x = `-log10(p)_Exp`, y = `-log10(p)`, shape = Model,
mp3 fill = Model)) +
geom_point(size = 0.5, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25, color = alpha("white", 0)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_abline() +
scale_x_continuous(breaks = mybreaks, labels = mylabels) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
facet_grid(ExptTrait ~ "QQ", scales = "free_y") +
+
theme_AGL theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
strip.text.y = ggtext::element_markdown()) +
labs(y = NULL, x = "Expected")
# Append
<- get_legend(mp2, position = "bottom")
mpl <- ggarrange(mp1, mp2, mp3, ncol = 3, widths = c(2,7,1.5), align = "h",
mp legend.grob = mpl, legend = "bottom", common.legend = T)
mp
}# Plot temperature related
ggsave("Additional/Figure_03_1.jpg",
fig3_Subplot(expttraits = c("PC1", "*b*")),
width = 12, height = 4, dpi = 600, bg = "white")
# Plot photoperiod related
ggsave("Additional/Figure_03_2.jpg",
fig3_Subplot(expttraits = c("PC2", "*c*")),
width = 12, height = 4, dpi = 600, bg = "white")
# Plot PC3
ggsave("Additional/Figure_03_3.jpg",
fig3_Subplot(expttraits = c("PC3", "Su17_*Tb*")),
width = 12, height = 4, dpi = 600, bg = "white")
Figure 4
Figure 4: Genome-wide association results for days from sowing to flower (DTF) with and without covariates for a lentil diversity panel. Manhattan and QQ plots for DTF in (a) Rosthern, Canada 2017 (Ro17) and (b) Cordoba, Spain 2017 (Sp17), using MLM, MLMM, FarmCPU and Blink models. The middle panel shows GWAS results without a covariate, while the top and bottom panel show GWAS results using the c and b (temperature and photoperiod) coefficients from the photothermal model developed by Wright et al. 2020, respectively. Vertical lines represent specific base pair locations to facilitate comparisons across traits.
# Prep data
<- myTraits
expttraits <- expttraits
expts for(i in 1:length(expts)) {
<- substr(expts[i], 1, gregexpr("_", expts)[[i]][1]-1)
expts[i]
}# expttraits <- c("Sp17_DTF", "Ro17_DTF")
for(i in expttraits) {
<- NULL; xb <- NULL; xc <- NULL
x1 #
<- grep(paste0(i,".GWAS.Results"), list.files("Results/"))
fnames <- list.files("Results/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results/", j)) %>%
xj mutate(Model = mod, Facet = i,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(x1, xj)
x1
}#
<- grep(paste0(i,".GWAS.Results"), list.files("Results_b/"))
fnames <- list.files("Results_b/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results_b/", j)) %>%
xj mutate(Model = mod, Facet = "CV = *b*",
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xb, xj)
xb
}#
<- grep(paste0(i,".GWAS.Results"), list.files("Results_c/"))
fnames <- list.files("Results_c/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results_c/", j)) %>%
xj mutate(Model = mod, Facet = "CV = *c*",
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xc, xj)
xc
}#
<- bind_rows(x1, xb, xc) %>%
xx mutate(Model = factor(Model, levels = myModels),
Facet = factor(Facet, levels = c("CV = *c*", i, "CV = *b*"))) %>%
arrange(Facet, desc(Model))
#
<- xx %>% mutate(`-log10(p)` = ifelse(`-log10(p)` > 10, 10, `-log10(p)`))
xx #
<- xx %>% filter(`-log10(p)` < threshold2)
x1 <- xx %>% filter(`-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(`-log10(p)` > threshold)
x3 #
<- c(0, 3, threshold2, threshold, 8, 10)
mybreaks <- c(0, 3, round(threshold2,1), round(threshold,1), 8, ">10")
mylabels # Plot GWAS
<- ggplot(x1, aes(x = Position / 100000000, y = `-log10(p)`,
mp1 shape = Model, fill = Model)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
facet_grid(Facet ~ Chromosome, scales = "free", space = "free_x") +
scale_x_continuous(breaks = 0:7) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(strip.text.y = element_blank() ,
strip.background.y = element_blank(),
axis.title.y = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 4))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "100 Mbp")
# Plot QQ
<- ggplot(x1, aes(x = `-log10(p)_Exp`, y = `-log10(p)`,
mp2 shape = Model, fill = Model)) +
geom_point(size = 0.5, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25, color = alpha("white", 0)) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_abline() +
scale_x_continuous(breaks = mybreaks, labels = mylabels) +
scale_y_continuous(breaks = mybreaks, labels = mylabels) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
facet_grid(Facet ~ "QQ", scales = "free_y") +
+
theme_AGL theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
strip.text.y = ggtext::element_markdown()) +
labs(y = NULL, x = "Expected")
# Append
<- ggarrange(mp1, mp2, ncol = 2, widths = c(7,1.5), align = "h",
mp legend = "bottom", common.legend = T)
# Save
ggsave(paste0("Additional/CV/CV_", i, ".jpg"), mp,
width = 12, height = 6, dpi = 600, bg = "white")
}
# Prep (a)
<- magick::image_read("Additional/CV/CV_Ro17_DTF.jpg") %>%
im1 ::image_annotate("(a)", size = 70, weight = 600, location = "+30+10")
magick# Prep (b)
<- magick::image_read("Additional/CV/CV_Sp17_DTF.jpg") %>%
im2 ::image_annotate("(b)", size = 70, weight = 600, location = "+30+10")
magick# Append
<- magick::image_append(c(im1, im2), stack = T)
im # Save
::image_write(im, "Figure_04.jpg") magick
Figure 5
Figure 5: Allelic effects of selected markers on days from sowing to flower (DTF) across contrasting locations in a lentil diversity panel. Sutherland, Canada 2017 (Su17), Jessore, Bangladesh 2017 (Ba17), Bardiya, Nepal 2017 (Ne17) and Metaponto, Italy 2017 (It17). Colors are based on a hierarchical clustering of principal components done by Wright et al. (2020) using 18 site-years of DTF data across the three major lentil growing macroenvironments. (a), (b) and (c) correspond to SNPs at the vertical lines on Figures 1, 2, 3 & 4.
# Prep data chr 2 + 5 markers
<- c("Su17_DTF", "Ba17_DTF", "Ne17_DTF", "It17_DTF")
expttraits <- substr(expttraits, 1, 4)
expts <- myMs[c(1,2)]
markers <- myG %>% filter(rs %in% markers) %>%
gg column_to_rownames("rs") %>%
select(11:ncol(.)) %>%
t() %>% as.data.frame()
for(i in 1:nrow(gg)) {
$Markers[i] <- paste(gg[i,1:length(markers)], collapse = " - ")
gg
}<- gg %>% rownames_to_column("Name")
gg #
<- myY %>% select(Name, expttraits) %>%
yy gather(ExptTrait, Value, 2:ncol(.)) %>%
separate(ExptTrait, c("Expt","Trait"), remove = F)
#
<- left_join(yy, gg, by = "Name") %>%
xx left_join(myLDP, by = "Name") %>%
mutate(Expt = factor(Expt, levels = expts))
#
<- xx %>%
x1 filter(!grepl("M|R|W|S|Y|K|N", Markers), ExptTrait %in% expttraits) %>%
arrange(Markers) %>%
mutate(Markers = factor(Markers, levels = c("C - A","C - C","T - A","T - C")))
# Plot chr 2 + 5 markers
<- ggplot(x1, aes(x = Markers, y = Value)) +
mp1 geom_quasirandom(aes(color = Cluster, key1 = Name, key2 = Origin)) +
facet_wrap(Expt ~ ., ncol = 6, scales = "free_y") +
scale_color_manual(values = myColors_Cluster) +
guides(color = guide_legend(nrow = 1, override.aes = list(size = 3))) +
+
theme_AGL theme(legend.position = "bottom",
panel.grid.major.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(x = "Lcu.2RBY.Chr2p42556949 - ( C | T )\nLcu.2RBY.Chr5p1069654 - ( A | C )",
y = "DTF")
# Prep data FTb
<- myMs[3]
markers <- myG %>% filter(rs %in% markers) %>%
gg column_to_rownames("rs") %>%
select(11:ncol(.)) %>%
t() %>% as.data.frame()
for(i in 1:nrow(gg)) {
$Markers[i] <- paste(gg[i,1:length(markers)], collapse = " - ")
gg
}<- gg %>% rownames_to_column("Name")
gg #
<- myY %>% select(Name, expttraits) %>%
yy gather(ExptTrait, Value, 2:ncol(.)) %>%
separate(ExptTrait, c("Expt","Trait"), remove = F)
#
<- left_join(yy, gg, by = "Name") %>%
xx left_join(myLDP, by = "Name") %>%
mutate(Expt = factor(Expt, levels = expts))
#
<- xx %>%
x1 filter(!grepl("M|R|W|S|Y|K|N", Markers)) %>%
arrange(Markers)
# Plot FTb
<- ggplot(x1 %>% filter(ExptTrait %in% expttraits), aes(x = Markers, y = Value)) +
mp2 geom_quasirandom(aes(color = Cluster, key1 = Name, key3 = Origin)) +
facet_wrap(Expt ~ ., ncol = 6, scales = "free_y") +
scale_color_manual(values = myColors_Cluster) +
guides(color = guide_legend(nrow = 1, override.aes = list(size = 3))) +
+
theme_AGL theme(legend.position = "bottom",
panel.grid.major.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(x = "Lcu.2RBY.Chr6p2528817",
y = "DTF")
# Prep data FTa
<- myMs[4]
markers <- myG %>% filter(rs %in% markers) %>%
gg column_to_rownames("rs") %>%
select(11:ncol(.)) %>%
t() %>% as.data.frame()
for(i in 1:nrow(gg)) {
$Markers[i] <- paste(gg[i,1:length(markers)], collapse = " - ")
gg
}<- gg %>% rownames_to_column("Name")
gg #
<- myY %>% select(Name, expttraits) %>%
yy gather(ExptTrait, Value, 2:ncol(.)) %>%
separate(ExptTrait, c("Expt","Trait"), remove = F)
#
<- left_join(yy, gg, by = "Name") %>%
xx left_join(myLDP, by = "Name") %>%
mutate(Expt = factor(Expt, levels = expts))
#
<- xx %>%
x1 filter(!grepl("M|R|W|S|Y|K|N", Markers)) %>%
arrange(Markers)
# Plot FTa
<- ggplot(x1 %>% filter(ExptTrait %in% expttraits), aes(x = Markers, y = Value)) +
mp3 geom_quasirandom(aes(color = Cluster, key1 = Name, key3 = Origin)) +
facet_wrap(Expt ~ ., ncol = 6, scales = "free_y") +
scale_color_manual(values = myColors_Cluster) +
guides(color = guide_legend(nrow = 1, override.aes = list(size = 3))) +
+
theme_AGL theme(legend.position = "bottom",
panel.grid.major.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(x = "Lcu.2RBY.Chr6p307256203",
y = "DTF")
# Append
<- ggarrange(mp1, mp2, mp3, ncol = 1, labels = c("(a)","(b)","(c)"),
mp heights = c(1.2,1,1), legend = "bottom", common.legend = T)
# Save
ggsave("Figure_05.jpg", mp, width = 8, height = 8, dpi = 600, bg = "white")
ggsave("Additional/Figure_05_a.jpg", mp1, width = 8, height = 4, dpi = 600)
ggsave("Additional/Figure_05_b.jpg", mp2, width = 8, height = 4, dpi = 600)
ggsave("Additional/Figure_05_c.jpg", mp3, width = 8, height = 4, dpi = 600)
# Plot chr 2 + 5 markers
<- ggplotly(mp1)
mp1 saveWidget(as_widget(mp1),
paste0("Additional/Figure_05_a_plotly.html"),
selfcontained = T)
# Plot FTb
<- ggplotly(mp2)
mp2 saveWidget(as_widget(mp2),
paste0("Additional/Figure_05_b_plotly.html"),
selfcontained = T)
# Plot FTa
<- ggplotly(mp3)
mp3 saveWidget(as_widget(mp3),
paste0("Additional/Figure_05_c_plotly.html"),
selfcontained = T)
Supplemental Figure 1
Supplemental Figure 1: Summary of genome-wide association results using MLM, MLMM, FarmCPU and Blink models on days from sowing to flower (DTF) in a lentil diversity panel using no covariates or with the either the b or c coefficient as a covariate. Larger points represent a significant association (-log10(p) > 6.7) with a trait of interest under one of the GWAS models, while smaller points represent a suggestive association (-log10(p) > 5.3). Rosthern, Canada 2016 and 2017 (Ro16 and Ro17), Sutherland, Canada 2016, 2017 and 2018 (Su16, Su17 and Su18), Central Ferry, USA 2018 (Us18), Bhopal, India 2016 and 2017 (In16 and In17), Jessore, Bangladesh 2016 and 2017 (Ba16 and Ba17), Bardiya, Nepal 2016 and 2017 (Ne16 and Ne17), Marchouch, Morocco 2016 and 2017 (Mo16 and Mo17), Cordoba, Spain (Sp16 and Sp17), Metaponto, Italy 2016 and 2017 (it16 and It17). b and c are coefficients derived from a photothermal model (Wright et al. (2020). Colors are representative of macroenvironments: Temperate (green), South Asian (orange) and Mediterranean (blue). Vertical lines represent specific base pair locations to facilitate comparisons across traits.
# Create function
<- function(filename, myTs, hlines, width, height) {
gg_gwas_cv_summary # Prep data
<- c(myTs, paste0(myTs,"-b"), paste0(myTs, "-c"))
myTs <- gsub("-b","-*b*", myTs)
myLs <- gsub("-c","-*c*", myLs)
myLs #
<- c("<b style='color:black'>{ExptTrait}</b>",
me "<b style='color:darkgreen'>{ExptTrait}</b>",
"<b style='color:darkorange3'>{ExptTrait}</b>",
"<b style='color:darkblue'>{ExptTrait}</b>")
#
<- c("Ro16","Ro17","Su16","Su17","Su18","Us18")
e1 <- c("Mo16","Mo17","Sp16","Sp17","It16","It17")
e2 <- c("In16","In17","Ba16","Ba17","Ne16","Ne17")
e3 <- c("Temperate", "South Asia", "Mediterranean")
myMacroEnvs #
<- myP %>%
xx filter(paste(Expt, Trait, sep = "_") %in% myTs) %>%
mutate(ExptTrait = paste(Expt, Trait, sep = "_"),
CV = "No Covariate",
CV = ifelse(grepl("-b", Trait), "CV = *b*", CV),
CV = ifelse(grepl("-c", Trait), "CV = *c*", CV),
CV = factor(CV, levels = c("CV = *c*","No Covariate","CV = *b*")),
Trait = gsub("-b|-c", "", Trait),
ExptTrait = gsub("-b|-c", "", ExptTrait),
Model = factor(Model, levels = myModels),
MacroEnv = NA,
MacroEnv = ifelse(Expt %in% e1, "Temperate", MacroEnv),
MacroEnv = ifelse(Expt %in% e2, "Mediterranean", MacroEnv),
MacroEnv = ifelse(Expt %in% e3, "South Asia", MacroEnv),
MacroEnv = ifelse(is.na(MacroEnv), "Multi Environment", MacroEnv),
cat_col = NA,
cat_col = ifelse(MacroEnv == "Multi Environment", glue::glue(me[1]), cat_col),
cat_col = ifelse(MacroEnv == "Temperate", glue::glue(me[2]), cat_col),
cat_col = ifelse(MacroEnv == "South Asia", glue::glue(me[3]), cat_col),
cat_col = ifelse(MacroEnv == "Mediterranean", glue::glue(me[4]), cat_col),
cat_col = factor(cat_col, levels = rev(cat_col[match(myTs, ExptTrait)])),
MacroEnv = factor(MacroEnv, levels = myMacroEnvs)) %>%
arrange(ExptTrait)
#
<- xx %>% filter(`-log10(p)` > threshold)
x1 <- xx %>% filter(`-log10(p)` < threshold)
x2 # Plot
<- ggplot(x1, aes(x = Position / 100000000, y = cat_col,
mp shape = Model, fill = MacroEnv)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_point(data = x2, size = 1, color = "black", alpha = 0.5,
aes(key1 = SNP, key2 = FT_Genes, key3 = FT_Distances)) +
geom_point(size = 2, alpha = 0.5,
aes(key1 = SNP, key2 = FT_Genes, key3 = FT_Distances)) +
geom_hline(yintercept = hlines, alpha = 0.5) +
facet_grid(CV ~ Chromosome, drop = F, scales = "free_x", space = "free_x") +
scale_fill_manual(values = myColors_Macro[2:4], guide = "none") +
scale_shape_manual(values = c(22:25)) +
scale_y_discrete(drop = F) +
+
theme_AGL theme(legend.position = "bottom",
axis.text.y = element_markdown(),
strip.text.y = element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 4, fill = "black"))) +
labs(y = NULL, x = "100 Mbp")
# Save
ggsave(paste0(filename, ".jpg"), mp, width = width, height = height, dpi = 600)
# Save HTML
<- ggplotly(mp) %>% layout(showlegend = FALSE)
mp saveWidget(as_widget(mp),
paste0(filename, "_plotly.html"),
knitrOptions = list(fig.width = width, fig.height = height),
selfcontained = T)
}# Plot GWAS results
gg_gwas_cv_summary(filename = "Supplemental_Figure_01",
myTs = c("Ro16_DTF", "Ro17_DTF", "Su16_DTF", "Su17_DTF", "Su18_DTF", "Us18_DTF",
"In16_DTF", "In17_DTF", "Ba16_DTF", "Ba17_DTF", "Ne16_DTF", "Ne17_DTF",
"Mo16_DTF", "Mo17_DTF", "Sp16_DTF", "Sp17_DTF", "It16_DTF", "It17_DTF"),
hlines = c(6.5,12.5), width = 12, height = 9)
Additional Figure 3
# Plot GWAS results
gg_gwas_cv_summary(filename = "Additional/Additional_Figure_03",
myTs = c("Su17_Tf", "Ba17_Tf", "It17_Tf",
"Su17_Tb", "Ba17_Tb", "It17_Tb",
"Su17_Pf", "Ba17_Pf", "It17_Pf",
"Su17_Pc", "Ba17_Pc", "It17_Pc"),
hlines = c(3.5,6.5,9.6), width = 12, height = 9)
Additional Figure 4
# Plot GWAS results
gg_gwas_cv_summary(filename = "Additional/Additional_Figure_04",
myTs = c("PCA_PC1", "PCA_PC2", "PCA_PC3",
"PTModel_a", "PTModel_b", "PTModel_c"),
hlines = c(3.5), width = 12, height = 6)
Supplemental Figure 2
Supplemental Figure 2: Correlations along with the corresponding correlation coefficients (R2) for the first three principal components (PC1, PC2 & PC3) of an analysis of 18 site-years of days from sowing to flower (DTF) data (Wright et al., 2020), coefficients from a photothermal model (a, b & c), DTF, photoperiod sum required for flowering (Pf), nominal base photoperiod (Pc), thermal sum required for flowering (Tf) and nominal base temperature (Tb) in Sutherland, Canada 2017 (SU17), Jessore, Bangladesh 2017 (Ba17) and Metaponto, Italy 2017 (It17). Colors are based on a hierarchical clustering of the PCAs.
# Create plotting functions
<- function(data, mapping, cols = myColors_Cluster, ...) {
my_lower ggplot(data = data, mapping = mapping) +
geom_point(alpha = 0.5, size = 0.5, aes(color = Cluster)) +
scale_color_manual(values = cols) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank())
}<- function(data, mapping, cols = myColors_Cluster, ...) {
my_middle ggplot(data = data, mapping = mapping) +
geom_density(alpha = 0.5) +
scale_color_manual(name = NULL, values = cols) +
scale_fill_manual(name = NULL, values = cols) +
guides(color = F, fill = guide_legend(nrow = 3, byrow = T)) +
theme_bw() +
theme(axis.text = element_blank(),
axis.ticks = element_blank())
}# See: https://github.com/ggobi/ggally/issues/139
<- function(data, mapping, color = I("black"), sizeRange = c(1,5), ...) {
my_upper # Prep data
<- eval_data_col(data, mapping$x)
x <- eval_data_col(data, mapping$y)
y #
<- cor(x, y, method = "pearson", use = "complete.obs")^2
r2 <- format(r2, digits = 2)[1]
rt <- max(sizeRange)
cex <- as.character(rt)
tt # plot the cor value
<- ggally_text(label = tt, mapping = aes(), color = color,
p xP = 0.5, yP = 0.5, size = 4, ... ) + theme_bw()
# Create color palette
<- RColorBrewer::brewer.pal(n = 10, name = "RdBu")[2:9]
corColors if (r2 <= -0.9) { corCol <- alpha(corColors[1], 0.5)
else if (r2 >= -0.9 & r2 <= -0.6) { corCol <- alpha(corColors[2], 0.5)
} else if (r2 >= -0.6 & r2 <= -0.3) { corCol <- alpha(corColors[3], 0.5)
} else if (r2 >= -0.3 & r2 <= 0) { corCol <- alpha(corColors[4], 0.5)
} else if (r2 >= 0 & r2 <= 0.3) { corCol <- alpha(corColors[5], 0.5)
} else if (r2 >= 0.3 & r2 <= 0.6) { corCol <- alpha(corColors[6], 0.5)
} else if (r2 >= 0.6 & r2 <= 0.9) { corCol <- alpha(corColors[7], 0.5)
} else { corCol <- alpha(corColors[8], 0.5) }
} # Plot
<- p +
p theme(panel.background = element_rect(fill = corCol),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p
}# Prep data
<- c("PCA_PC1", "PCA_PC2", "PCA_PC3", "PTModel_a", "PTModel_b", "PTModel_c",
myTraits1 "Su17_DTF", "It17_DTF", "Ba17_DTF",
"Su17_Pf", "It17_Pf", "Ba17_Pf", "Su17_Pc", "It17_Pc", "Ba17_Pc",
"Su17_Tf", "It17_Tf", "Ba17_Tf", "Su17_Tb", "It17_Tb", "Ba17_Tb")
<- c("PC1", "PC2", "PC3", "*a*", "*b*", "*c*",
myTraits2 "Su17_DTF", "It17_DTF", "Ba17_DTF",
"Su17_*Pf*", "It17_*Pf*", "Ba17_*Pf*", "Su17_*Pc*", "It17_*Pc*", "Ba17_*Pc*",
"Su17_*Tf*", "It17_*Tf*", "Ba17_*Tf*", "Su17_*Tb*", "It17_*Tb*", "Ba17_*Tb*")
<- myY %>% left_join(myLDP, by = "Name")
xx colnames(xx) <- plyr::mapvalues(colnames(xx), myTraits1, myTraits2)
# Plot
<- ggpairs(xx, columns = myTraits2,
mp upper = list(continuous = my_upper),
diag = list(continuous = my_middle),
lower = list(continuous = wrap(my_lower))) +
theme(strip.background = element_rect(fill = "White"),
strip.text.x = element_markdown(),
strip.text.y = element_markdown())
ggsave("Supplemental_Figure_02.jpg",
width = 16, height = 16, dpi = 600) mp,
Additional Figure 5
# Prep data
<- c("PCA_PC1", "PCA_PC2", "PCA_PC3")
myTraits1 <- c("PTModel_a", "PTModel_b", "PTModel_c",
myTraits2 "Su17_DTF", "It17_DTF", "Ba17_DTF",
"Su17_Tf", "It17_Tf", "Ba17_Tf",
"Su17_Tb", "It17_Tb", "Ba17_Tb",
"Su17_Pf", "It17_Pf", "Ba17_Pf",
"Su17_Pc", "It17_Pc", "Ba17_Pc")
<- myY %>% left_join(myLDP, by = "Name") %>%
xx select(Name, Cluster, myTraits2) %>%
gather(Trait, Value, 3:ncol(.)) %>%
mutate(Trait = factor(Trait, levels = myTraits2)) %>%
left_join(select(myY, Name, myTraits1), by = "Name")
# Plot
<- ggplot(xx, aes(x = Value, y = PCA_PC1, color = Cluster)) +
mp1 geom_point(alpha = 0.7, size = 0.5) +
facet_grid(. ~ Trait, scales = "free_x") +
scale_color_manual(values = myColors_Cluster) +
+ labs(x = NULL) +
theme_AGL theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(colour = guide_legend(override.aes = list(size = 2)))
<- ggplot(xx, aes(x = Value, y = PCA_PC2, color = Cluster)) +
mp2 geom_point(alpha = 0.7, size = 0.5) +
facet_grid(. ~ Trait, scales = "free_x") +
scale_color_manual(values = myColors_Cluster) +
+ labs(x = NULL) +
theme_AGL theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(colour = guide_legend(override.aes = list(size = 2)))
<- ggplot(xx, aes(x = Value, y = PCA_PC3, color = Cluster)) +
mp3 geom_point(alpha = 0.7, size = 0.5) +
facet_grid(. ~ Trait, scales = "free_x") +
scale_color_manual(values = myColors_Cluster) +
+ labs(x = NULL) +
theme_AGL theme(axis.text = element_blank(),
axis.ticks = element_blank()) +
guides(colour = guide_legend(override.aes = list(size = 2)))
<- ggarrange(mp1, mp2, mp3, ncol = 1, nrow = 3,
mp common.legend = T, legend = "right")
ggsave("Additional/Additional_Figure_05.jpg",
width = 16, height = 4, dpi = 600, bg = "white") mp,
Supplemental Figure 3
Supplemental Figure 3: Regional genome-wide association results from 35 Mpb to 50 Mbp on chromosome 2 for selected traits with a lentil diversity panel using MLM, MLMM, FarmCPU and Blink models. Traits include days from sowing to flower (DTF) in Sutherland, Canada 2017 (Su17), with and without the c coefficient used as a covariate (CV=c), DTF in Jessore, Bangladesh 2017 (Ba17), DTF in Bardiya, Nepal 2017 (Ne17), DTF in Metaponto, Italy 2017 (It17), the first principal component (PC1) from a principal component analysis of an analysis of 18 site-years of DTF data, and the b coefficient derived from a photothermal model done by Wright et al. (2020). The vertical line represents a specific base pair location to facilitate comparisons across traits.
# Prep data
<- c("Su17_DTF", "Ba17_DTF", "Ne17_DTF",
expttraits "Sp17_DTF", "PCA_PC1", "PTModel_b")
<- 2
chr <- 35000000
pos1 <- 50000000
pos2 #
<- NULL
xx for(i in expttraits) {
<- grep(paste0(i,".GWAS.Results"), list.files("Results/"))
fnames <- list.files("Results/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results/", j)) %>%
xj filter(Chromosome == chr, Position > pos1, Position < pos2) %>%
mutate(Model = mod, ExptTrait = i,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xx, xj)
xx
}
}<- "Su17_DTF"
i <- grep(paste0(i,".GWAS.Results"), list.files("Results_c/"))
fnames <- list.files("Results_c/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results_c/", j)) %>%
xj filter(Chromosome == chr, Position > pos1, Position < pos2) %>%
mutate(Model = mod, ExptTrait = paste0(i,"-c"),
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xx, xj)
xx
}#
<- c("Su17_DTF", "Su17_DTF (CV=*c*)",
expttraits "Ba17_DTF", "Ne17_DTF", "Sp17_DTF", "PC1", "*b*")
<- xx %>%
xx mutate(ExptTrait = plyr::mapvalues(ExptTrait,
c("Su17_DTF-c", "PCA_PC1", "PTModel_b"),
c("Su17_DTF (CV=*c*)", "PC1", "*b*")),
ExptTrait = factor(ExptTrait, levels = expttraits),
Model = factor(Model, levels = myModels)) %>%
mutate(Chromosome = paste("Chromosome", Chromosome)) %>%
arrange(desc(Model))
#
<- xx %>% filter(`-log10(p)` < threshold2)
x1 <- xx %>% filter(`-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(`-log10(p)` > threshold)
x3 #
<- myGM %>% filter(Chromosome == 2) %>%
vi mutate(Chromosome = paste("Chromosome",Chromosome))
# Plot
<- ggplot() +
mp geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_vline(data = vi, alpha = 0.5, color = "red",
aes(xintercept = Position / 1000000)) +
geom_point(data = x1, size = 0.75, color = alpha("white", 0),
aes(x = Position / 1000000, y = `-log10(p)`, shape = Model, fill = Model)) +
geom_point(data = x2, size = 1, color = alpha("white", 0),
aes(x = Position / 1000000, y = `-log10(p)`, shape = Model, fill = Model)) +
geom_point(data = x3, size = 1.5,
aes(x = Position / 1000000, y = `-log10(p)`, shape = Model, fill = Model)) +
facet_grid(ExptTrait ~ Chromosome, scales = "free", space = "free_x") +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(legend.position = "bottom",
legend.box = "vertical",
panel.grid = element_blank(),
axis.title.y = ggtext::element_markdown(),
strip.text.y = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "Mbp")
# Save
ggsave("Supplemental_Figure_03.jpg", mp, width = 5, height = 10, dpi = 600)
Supplemental Figure 4
Supplemental Figure 4: Regional genome-wide association results from 300 Mbp to 315 Mbp on chromosome 6 for selected traits with a lentil diversity panel using MLM, MLMM, FarmCPU and Blink models. Traits include days from sowing to flower (DTF) in Marchouch, Morocco 2016 (Mo16), thermal sum required for flowering (Tf) in Sutherland, Canada 2017 (Su17) and Metaponto, Italy 2017 (It17), nominal base temperature (Tb) in Su17 and It17, and the third principal component (PC3) from a principal component analysis of an analysis of 18 site-years of DTF data (Wright et al., 2020). Vertical lines represent the locations of selected flowering time genes within the associated QTL.
# Prep data
<- c("PCA_PC3", "Su17_Tb", "It17_Tb", "Su17_Tf", "It17_Tf", "Mo16_DTF")
expttraits <- c("Lcu.2RBY.Chr6p307256203",
markers "Lcu.2RBY.Chr6p309410465",
"Lcu.2RBY.Chr6p306914970")
<- 6
chr <- 300000000
pos1 <- 315000000
pos2 <- c("darkgreen","darkred","darkblue","darkslategray4","khaki4")
colors #
<- NULL
xx for(i in expttraits) {
<- grep(paste0(i,".GWAS.Results"), list.files("Results/"))
fnames <- list.files("Results/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results/", j)) %>%
xj filter(Chromosome == chr, Position > pos1, Position < pos2) %>%
mutate(Model = mod, ExptTrait = i,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xx, xj)
xx
}
}#
<- c("PC3", "Su17_*Tb*", "It17_*Tb*", "Su17_*Tf*", "It17_*Tf*", "Mo16_DTF")
expttraits <- xx %>%
xx mutate(Model = factor(Model, levels = myModels),
ExptTrait = plyr::mapvalues(ExptTrait,
c("PCA_PC3", "Su17_Tb", "It17_Tb", "Su17_Tf", "It17_Tf"),
c("PC3", "Su17_*Tb*", "It17_*Tb*", "Su17_*Tf*", "It17_*Tf*")),
ExptTrait = factor(ExptTrait, levels = expttraits)) %>%
mutate(Chromosome = paste("Chromosome",Chromosome)) %>%
arrange(desc(Model))
#
<- xx %>% filter(`-log10(p)` < threshold2)
x1 <- xx %>% filter(`-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(`-log10(p)` > threshold)
x3 #
<- c("LcLWD1", "LcFTa1", "LcFTc")
myGenes <- myFTGenes %>%
vg filter(Name %in% myGenes) %>%
mutate(Name = plyr::mapvalues(Name, myGenes, paste0("*",myGenes,"*")),
Name = factor(Name, levels = paste0("*",myGenes,"*")),
Chromosome = paste("Chromosome",Chromosome))
# Plot
<- ggplot() +
mp geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_vline(data = vg, alpha = 0.5, color = "red",
aes(xintercept = Position / 1000000, lty = Name)) +
geom_point(data = x1, size = 0.75, color = alpha("white", 0),
aes(x = Position / 1000000, y = -log10(P.value),
shape = Model, fill = Model)) +
geom_point(data = x2, size = 1, color = alpha("white", 0),
aes(x = Position / 1000000, y = -log10(P.value),
shape = Model, fill = Model)) +
geom_point(data = x3, size = 1.5,
aes(x = Position / 1000000, y = -log10(P.value),
shape = Model, fill = Model)) +
facet_grid(ExptTrait ~ Chromosome, scales = "free", space = "free_x") +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
scale_linetype_manual(name = "Genes", values = c(4,1,3)) +
+
theme_AGL theme(legend.position = "bottom",
legend.box = "vertical",
panel.grid = element_blank(),
axis.title.y = ggtext::element_markdown(),
strip.text.y = ggtext::element_markdown(),
legend.text = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "Mbp")
# Save
ggsave("Supplemental_Figure_04.jpg", mp, width = 5, height = 10, dpi = 600)
Supplemental Figure 5
Supplemental Figure 5: Linkage disequilibrium decay across the 7 chromosomes in the lentil genome. (a) Linkage disequilibrium decay for all marker combinations. (b) Linkage disequilibrium decay for marker combinations within a 1 Mbp distance. For each chromosome, 1000 SNP were randomly selected for pairwise LD calculations. Shaded lines represent the moving average of 100 pair-wise marker comparisons. Solid line represents a loess regression used to determine the value (vertical dashed line) in which R2 reaches the 0.2 threshold (blue dashed line). Red dashed lines represent the average R2 for each chromosome.
library(genetics)
<- data.frame(stringsAsFactors = F,
dna Symbol = c("A", "C", "G", "T", "U",
"R", "Y", "S", "W", "K", "M", "N"),
Value = c("A/A","C/C","G/G","T/T","U/U",
"A/G","C/T","G/C","A/T","G/T","A/C","N/N") )
<- myG[,c(-2,-5,-6,-7,-8,-9,-10,-11)]
xx for(i in 4:ncol(xx)) {
=="N", i] <- NA
xx[xx[,i]<- plyr::mapvalues(xx[,i], dna$Symbol, dna$Value)
xx[,i]
}#
<- function(x, folder = "Additional/LD/", Chr = 1, Num = 200) {
LD_Decay <- x %>% filter(chrom == Chr)
xc <- xc %>% column_to_rownames("rs")
xr <- xr[,c(-1,-2)]
xr <- round(runif(Num, 1, nrow(xr)))
rr while(sum(duplicated(rr))>0) {
<- round(runif(sum(duplicated(rr)), 1, nrow(xr)))
ra <- rr[!duplicated(rr)]
rr <- c(rr, ra)
rr
}<- rr[order(rr)]
rr <- xr[rr,]
xr #
<- xr %>% t() %>% as.data.frame()
xi <- genetics::LD(genetics::makeGenotypes(xi))
myLD save(myLD, file = paste0(folder, "LD_Chrom_", Chr, "_", Num, ".Rdata"))
}# Calculate LD per chromosome
LD_Decay(x = xx, Chr = 1, Num = 1000)
LD_Decay(x = xx, Chr = 2, Num = 1000)
LD_Decay(x = xx, Chr = 3, Num = 1000)
LD_Decay(x = xx, Chr = 4, Num = 1000)
LD_Decay(x = xx, Chr = 5, Num = 1000)
LD_Decay(x = xx, Chr = 6, Num = 1000)
LD_Decay(x = xx, Chr = 7, Num = 1000)
# Create function
<- function(x, n = 5) {
movingAverage ::filter(x, rep(1 / n, n), sides = 2)
stats
}# Prep data
<- NULL
xx for(i in 1:7) {
<- myG %>% filter(chrom == i)
xc load(paste0("Additional/LD/LD_Chrom_",i,"_1000.Rdata"))
<- myLD$`R^2` %>% as.data.frame() %>%
xi rownames_to_column("SNP1") %>%
gather(SNP2, LD, 2:ncol(.)) %>%
filter(!is.na(LD)) %>%
mutate(Chr = i,
SNP1_d = plyr::mapvalues(SNP1, xc$rs, xc$pos, warn_missing = F),
SNP2_d = plyr::mapvalues(SNP2, xc$rs, xc$pos, warn_missing = F),
Distance = as.numeric(SNP2_d) - as.numeric(SNP1_d)) %>%
arrange(Distance, rev(LD))
#
<- xi %>% filter(Distance < 1000000)
xii <- stats::loess(LD ~ Distance, data = xii, span=0.50)
myloess #
<- xi %>%
xi mutate(Moving_Avg = movingAverage(LD, n = 100),
Loess = ifelse(Distance < 1000000, predict(myloess), NA))
#
<- bind_rows(xx, xi)
xx
}#
<- xx %>% group_by(Chr) %>%
x1 summarise(Mean_LD = mean(Moving_Avg, na.rm = T))
<- xx %>% filter(Loess < 0.2) %>% group_by(Chr) %>%
x2 summarise(Threshold_0.2 = min(Distance, na.rm = T))
<- xx %>% left_join(x1, by = "Chr") %>%
x3 group_by(Chr) %>% filter(Moving_Avg < Mean_LD) %>%
summarise(Threshold_0.1 = min(Distance, na.rm = T))
<- x1 %>%
myChr left_join(x2, by = "Chr") %>%
left_join(x3, by = "Chr")
#
<- left_join(xx, myChr, by = "Chr") %>%
xx mutate(Chr = as.factor(Chr))
# Plot full chromsomes
<- ggplot(xx, aes(x = Distance/1000000, y = Moving_Avg)) +
mp1 geom_line(size = 0.5, alpha = 0.5) +
geom_hline(data = myChr, aes(yintercept = Mean_LD), color = "red", lty = 2) +
geom_hline(yintercept = 0.2, color = "blue", lty = 2) +
scale_y_continuous(breaks = seq(0, 0.5, by = 0.1), limits = c(0,0.5)) +
facet_wrap(paste("Chr =", Chr) ~ ., ncol = 7, scales = "free_x") +
+
theme_AGL theme(legend.position = "none",
axis.title.y = ggtext::element_markdown()) +
labs(y = "R^2", x = "Mbp")
# Plot first 1 Mbp
<- xx %>% filter(Distance < 1000000)
yy <- ggplot(yy, aes(x = Distance/1000, y = Loess)) +
mp2 geom_line(aes(y = Moving_Avg), alpha = 0.5) +
geom_line() +
geom_hline(data = myChr, aes(yintercept = Mean_LD), color = "red", lty = 2) +
geom_hline(yintercept = 0.2, color = "blue", lty = 2) +
scale_y_continuous(breaks = seq(0, 0.5, by = 0.1), limits = c(0,0.5)) +
facet_wrap(paste("Chr =", Chr) + paste("Threshold =", Threshold_0.2) ~ .,
ncol = 7, scales = "free_x") +
geom_vline(data = myChr, lty = 2, size = 0.3, alpha = 0.8,
aes(xintercept = Threshold_0.2/1000)) +
+
theme_AGL theme(legend.position = "none",
axis.title.y = ggtext::element_markdown()) +
labs(y = "R^2", x = "Kbp")
# Append
<- ggarrange(mp1, mp2, ncol = 1, labels = c("(a)", "(b)"))
mp # Save
ggsave(paste0("Supplemental_Figure_05.jpg"), mp , width = 12, height = 8, dpi = 600)
Supplemental Figure 6
Supplemental Figure 6: Diagram of a 290 kb region of lentil chromosome 2 (A) and a 215 kb region of lentil chromosome 5 (B) containing the relevant SNPs Lcu.2RBY.Chr2p42543877, Lcu.2RBY.Chr2p42556949 and Lcu.2RBY.Chr5p1063138 (highlighted in red). The syntenic regions in chickpea and Medicago genomes are also shown for comparison. The length of the relevant interval for each chromosome was calculated according to the SNP position ± chromosome-specific linkage disequilibrium decay. Numbers over or besides each gene correspond to those shown in Supplemental Table 2.
Supplemental Figure 7
Supplemental Figure 7: Pair-wise plots of a principal component analysis of genetic marker data from a lentil diversity panel. Colors are based on a hierarchical clustering of principal components done by Wright et al. (2020) using 18 site-years of days from sowing to flower data across the three major lentil growing macroenvironments.
# Prep data
<- read.csv("Results/GAPIT.PCA.csv") %>%
pca rename(Name=taxa) %>%
left_join(myLDP, by = "Name")
# Plot PCs
<- ggplot(pca, aes(x = PC1, y = PC2, color = Cluster)) +
mp1 geom_point() +
scale_color_manual(name = "DTF Cluster", values = myColors_Cluster) +
guides(colour = guide_legend(nrow = 1, override.aes = list(size=2))) +
theme_AGL<- ggplot(pca, aes(x = PC1, y = PC3, color = Cluster)) +
mp2 geom_point() +
scale_color_manual(name = "DTF Cluster", values = myColors_Cluster) +
theme_AGL<- ggplot(pca, aes(x = PC2, y = PC3, color = Cluster)) +
mp3 geom_point() +
scale_color_manual(name = "DTF Cluster", values = myColors_Cluster) +
theme_AGL# Append
<- ggarrange(mp1, mp2, mp3, nrow = 1, ncol = 3,
mp common.legend = T, legend = "bottom")
# Save
ggsave("Supplemental_Figure_07.jpg", mp, width = 8, height = 3, dpi = 600, bg = "white")
# Plotly
<- plot_ly(pca, x = ~PC1, y = ~PC2, z = ~PC3,
mp color = ~Cluster, colors = myColors_Cluster,
hoverinfo = "text",
text = ~paste(Entry, "|", Name,
"\nOrigin:", Origin,
"\nDTF_Cluster:", Cluster)) %>%
add_markers()
# Save
saveWidget(as_widget(mp), "Supplemental_Figure_07.html")
Supplemental Table 2
Supplemental Table 2: List of genes in the regions associated with flowering time in lentils chromosomes 2 and 5, and the syntenic regions of chickpea (Cicer arietinum) and Medicago truncatula.
Additional Plots
Phenotype Data
# Prep data
<- myY %>%
xx gather(ExptTrait, Value, 2:ncol(.)) %>%
mutate(ExptTrait = factor(ExptTrait, levels = colnames(myY)[-1]))
# Plot
<- ggplot(xx, aes(x = Value)) +
mp geom_histogram(fill = "darkgreen", color = "black", alpha = 0.7) +
facet_wrap(ExptTrait~., ncol = 9, scales = "free") +
theme_AGL# Save
ggsave("Additional/Additional_Figure_06.jpg", mp, width = 14, height = 7, dpi = 600)
Grouped Manhattan Plots
# Create function
<- function(expttraits = c("Su17_DTF", "Ba17_DTF", "Ne17_DTF", "It17_DTF"),
gg_Custom_Man folder = "Results/", filename = "Fig01.jpg", title = filename,
markers = myMs, height = 10) {
# Prep data
<- NULL
xx <- NULL
xs for(i in expttraits) {
<- grep(paste0(i,".GWAS.Results"), list.files(folder))
fnames <- list.files(folder)[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- GWAS_PeakTable(folder = folder, file = j) %>%
xsi arrange(P.value) %>%
filter(!duplicated(paste(FT_Genes, Model))) %>%
mutate(FT_Genes = gsub(" ; ", "\n", FT_Genes))
<- read.csv(paste0(folder, j)) %>%
xj mutate(Model = mod, ExptTrait = i,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xx, xj)
xx if(nrow(xsi)>0) { xs <- bind_rows(xs, xsi) }
}
}#
<- xx %>%
xx mutate(Model = factor(Model, levels = myModels),
ExptTrait = factor(ExptTrait, levels = expttraits)) %>%
arrange(desc(Model))
#
<- xx %>% mutate(`-log10(p)` = ifelse(`-log10(p)` > 10, 10, `-log10(p)`))
xx <- xs %>% mutate(`-log10(p)` = ifelse(`-log10(p)` > 10, 10, `-log10(p)`),
xs ExptTrait = paste(Expt, Trait, sep = "_"))
#
<- xx %>% filter(`-log10(p)` < threshold2)
x1 <- xx %>% filter(`-log10(p)` > threshold2, `-log10(p)` < threshold)
x2 <- xx %>% filter(`-log10(p)` > threshold)
x3 # Plot
<- ggplot(x1, aes(x = Position / 100000000, y = `-log10(p)`,
mp shape = Model, fill = Model)) +
geom_vline(data = myGM, color = "red", alpha = 0.6,
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
::geom_text_repel(data = xs, aes(label = FT_Genes, ggroup = ExptTrait),
ggrepelsize = 1.5, nudge_y = 0.5) +
facet_grid(ExptTrait ~ Chromosome, scales = "free_x", space = "free_x") +
scale_x_continuous(breaks = 1:7) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(legend.position = "bottom") +
guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(title = title, y = NULL, x = "100 Mbp")
# Save
ggsave(paste0("Additional/Man_Grouped/", filename, ".jpg"),
width = 10, height = height)
mp,
}# DTF
gg_Custom_Man(expttraits = c("Ro16_DTF", "Ro17_DTF", "Su16_DTF",
"Su17_DTF", "Su18_DTF", "Us18_DTF"),
filename = "Man_DTF_Temperate")
gg_Custom_Man(expttraits = c("It16_DTF", "It17_DTF", "Sp16_DTF",
"Sp17_DTF", "Mo16_DTF", "Mo17_DTF"),
filename = "Man_DTF_Mediterranean")
gg_Custom_Man(expttraits = c("Ne16_DTF", "Ne17_DTF", "Ba16_DTF",
"Ba17_DTF", "In16_DTF", "In17_DTF"),
filename = "Man_DTF_SouthAsia")
# Tf & Tb
gg_Custom_Man(expttraits = c("Su17_Tf", "Ba17_Tf", "It17_Tf",
"Su17_Tb", "Ba17_Tb", "It17_Tb"),
filename = "Man_Tf_Tb")
# Pf & Pc
gg_Custom_Man(expttraits = c("Su17_Pf", "Ba17_Pf", "It17_Pf",
"Su17_Pc", "Ba17_Pc", "It17_Pc"),
filename = "Man_Pf_Pc")
# PCA & abc
gg_Custom_Man(expttraits = c("PCA_PC1", "PCA_PC2", "PCA_PC3",
"PTModel_a", "PTModel_b", "PTModel_c"),
filename = "Man_PCA_abc")
# DTF - c
gg_Custom_Man(expttraits = c("Ro16_DTF", "Ro17_DTF", "Su16_DTF",
"Su17_DTF", "Su18_DTF", "Us18_DTF"),
folder = "Results_c/",
filename = "Man_DTF_c_Temperate")
gg_Custom_Man(expttraits = c("It16_DTF", "It17_DTF", "Sp16_DTF",
"Sp17_DTF", "Mo16_DTF", "Mo17_DTF"),
folder = "Results_c/",
filename = "Man_DTF_c_Mediterranean")
gg_Custom_Man(expttraits = c("Ne16_DTF", "Ne17_DTF", "Ba16_DTF",
"Ba17_DTF", "In16_DTF", "In17_DTF"),
folder = "Results_c/",
filename = "Man_DTF_c_SouthAsia")
# DTF - b
gg_Custom_Man(expttraits = c("Ro16_DTF", "Ro17_DTF", "Su16_DTF",
"Su17_DTF", "Su18_DTF", "Us18_DTF"),
folder = "Results_b/",
filename = "Man_DTF_b_Temperate")
gg_Custom_Man(expttraits = c("It16_DTF", "It17_DTF", "Sp16_DTF",
"Sp17_DTF", "Mo16_DTF", "Mo17_DTF"),
folder = "Results_b/",
filename = "Man_DTF_b_Mediterranean")
gg_Custom_Man(expttraits = c("Ne16_DTF", "Ne17_DTF", "Ba16_DTF",
"Ba17_DTF", "In16_DTF", "In17_DTF"),
folder = "Results_b/",
filename = "Man_DTF_b_SouthAsia")
Facetted & Multi-Modeled Manhattan Plots
# Create function
<- function(folder = "Results/", expttrait = "Ro17_DTF", suffix = NULL,
gg_Man colors = c("darkgreen","darkgoldenrod3","darkgreen","darkgoldenrod3",
"darkgreen", "darkgoldenrod3","darkgreen")) {
# Prep data
<- substr(expttrait, 1, gregexpr("_", expttrait)[[1]][1]-1 )
expt <- substr(expttrait, gregexpr("_", expttrait)[[1]][1]+1, nchar(expttrait) )
trait <- grep(paste0(expttrait, ".GWAS.Results"), list.files(folder))
fnames <- list.files(folder)[fnames]
fnames <- NULL
xx <- NULL
xs for(i in fnames) {
<- substr(i, gregexpr("GAPIT.", i)[[1]][1]+6,
trt gregexpr(".GWAS.Results.csv", i)[[1]][1]-1 )
<- substr(trt, 1, gregexpr("\\.", trt)[[1]][1]-1 )
mod <- substr(trt, gregexpr("\\.", trt)[[1]][1]+1, nchar(trt) )
trt #
<- GWAS_PeakTable(folder = folder, file = i) %>%
xsi arrange(P.value) %>%
filter(!duplicated(paste(FT_Genes, Model))) %>%
mutate(FT_Genes = gsub(" ; ", "\n", FT_Genes))
<- read.csv(paste0(folder, i))
xi #
if(sum(colnames(xi)=="nobs")>0) { xi <- select(xi, -nobs) }
<- xi %>%
xi mutate(Model = mod,
`-log10(p)` = -log10(P.value),
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)),
`-log10(p)_FDR` = -log10(FDR_Adjusted_P.values))
<- bind_rows(xx, xi)
xx if(nrow(xsi)>0) { xs <- bind_rows(xs, xsi) }
}#
<- xs %>% select(SNP, Model, FT_Genes)
xs2 <- xx %>%
xx mutate(Sig = ifelse(-log10(P.value) > threshold2, "Suggested", "Not Significant"),
Sig = ifelse(-log10(P.value) > threshold, "Significant", Sig)) %>%
left_join(xs2, by = c("SNP", "Model")) %>%
mutate(Model = factor(Model, levels = myModels)) %>%
arrange(desc(Model))
#
<- xx %>% mutate(`-log10(p)` = ifelse(`-log10(p)` > 10, 10, `-log10(p)`))
xx #
<- xx %>% filter(-log10(P.value) > 0.5, -log10(P.value) < threshold2)
x1 <- xx %>% filter(-log10(P.value) > threshold2 & -log10(P.value) < threshold)
x2 <- xx %>% filter(-log10(P.value) > threshold)
x3 # Plot GWAS
<- ggplot(xx, aes(x = Position / 100000000, y = -log10(P.value))) +
mp1 geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_point(aes(color = factor(Chromosome)), pch = 16, size = 0.5) +
geom_point(data = x2, pch = 18, size = 0.75, color = "darkred") +
geom_point(data = x3, pch = 23, size = 1.25, color = "black", fill = "darkred") +
::geom_text_repel(aes(label = FT_Genes), size = 1.5, nudge_y = 0.5) +
ggrepelfacet_grid(Model ~ Chromosome, scales = "free") +
scale_x_continuous(breaks = 0:7) +
scale_color_manual(values = colors) +
+
theme_AGL theme(legend.position = "none",
axis.title.y = ggtext::element_markdown()) +
labs(title = paste0(expttrait, suffix),
y = "-log<sub>10</sub>(*p*)", x = "100 Mbp")
# Plot QQ
<- ggplot(x1, aes(y = `-log10(p)`, x = `-log10(p)_Exp`)) +
mp2 geom_point(pch = 16, size = 0.75, color = colors[1]) +
geom_point(data = x2, pch = 18, size = 1, color = "darkred") +
geom_point(data = x3, pch = 23, size = 1.25, color = "black", fill = "darkred") +
geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_abline() +
facet_grid(Model ~ "QQ", scales = "free_y") +
+
theme_AGL labs(title = "", y = NULL, x = "Expected")
# Append and save plots
<- ggarrange(mp1, mp2, ncol = 2, widths = c(4,1), align = "h")
mp ggsave(paste0("Additional/Man_Facet/ManQQ_", expt, "_", trait, suffix, ".jpg"),
width = 10, height = 7)
mp, #
<- xs %>% arrange(P.value) %>% filter(!duplicated(FT_Genes))
xs # Plot phenotypes
.1 <- ggplot(myY %>% select(Name, Value=expttrait)) +
mp1geom_histogram(aes(x = Value), fill = colors[1], alpha = 0.8) +
facet_grid(. ~ paste0(expttrait, suffix)) +
+
theme_AGL theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(x = "Value", y = "Count")
.2 <- ggplot(myY %>% select(Name, Value=expttrait)) +
mp1stat_ecdf(aes(x = Value), color = colors[1], alpha = 0.8, lwd = 1.5) +
facet_grid(. ~ "Accum. Dist.") +
+
theme_AGL labs(x = "Density", y = "Accumulation")
# Plot GWAS
<- ggplot(x1, aes(x = Position / 100000000, y = -log10(P.value),
mp2 shape = Model, fill = Model)) +
geom_vline(data = myGM, alpha = 0.5, color = "red",
aes(xintercept = Position / 100000000)) +
geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
::geom_text_repel(data = xs, aes(label = FT_Genes), size = 1.5, nudge_y = 0.5) +
ggrepelfacet_grid(. ~ Chromosome, scales = "free") +
scale_x_continuous(breaks = 0:7) +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(axis.title.y = ggtext::element_markdown()) +
guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(y = "-log<sub>10</sub>(*p*)", x = "Mbp")
# Plot QQ
.1 <- ggplot(x1, aes(y = `-log10(p)`, x = `-log10(p)_Exp`, shape = Model, fill = Model)) +
mp3geom_point(size = 0.5, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_point(size = 0.3, color = alpha("white", 0)) +
geom_abline() +
scale_fill_manual(name = NULL, values = myColors_Model) +
scale_shape_manual(name = NULL, values = 22:25) +
facet_grid(. ~ "QQ", scales = "free_y") +
+
theme_AGL theme(legend.position = "none") +
labs(title = "", y = NULL, x = "Expected")
# Plot MAF
.2 <- ggplot(x1, aes(y = `-log10(p)`, x = maf * 100, shape = Model, fill = Model)) +
mp3geom_point(size = 0.3, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
geom_hline(yintercept = threshold, color = "red") +
geom_hline(yintercept = threshold2, color = "blue") +
geom_point(size = 0.3, color = alpha("white", 0)) +
facet_grid(. ~ "MAF", scales = "free_y") +
scale_fill_manual(name = NULL, values = myColors_Model) +
scale_shape_manual(name = NULL, values = 22:25) +
+
theme_AGL theme(legend.position = "none") +
labs(y = NULL, x = "%")
# Append
<- ggarrange(mp1.1, mp1.2, ncol = 1, align = "v")
mp1 <- ggarrange(mp3.1, mp3.2, ncol = 1, align = "v")
mp3 <- ggarrange(mp1, mp2, mp3, ncol = 3, align = "h",
mp widths = c(1.5,7,1.5), legend = "top")
# Save
ggsave(paste0("Additional/Man_Multi/ManQQ_", expt, "_", trait, suffix, ".jpg"),
width = 12, height = 4)
mp,
}# Plot
<- colnames(myY)[2:ncol(myY)]
expttraits for(i in expttraits) {
gg_Man(folder = "Results/", expttrait = i)
}for(i in expttraits[!grepl("PTModel_b", expttraits)]) {
gg_Man(folder = "Results_b/", expttrait = i, suffix = "-b")
}for(i in expttraits[!grepl("PTModel_c", expttraits)]) {
gg_Man(folder = "Results_c/", expttrait = i, suffix = "-c")
}
Marker Plots
# Create function
<- function(marker = myMs[1],
gg_Marker expttraits = c("Su17_DTF", "Ba17_DTF", "Ne17_DTF", "It17_DTF",
"Su17_Tf", "Su17_Pf", "PTModel_b", "PTModel_c")) {
# Prep data
<- myG %>% filter(rs == marker) %>%
gg column_to_rownames("rs") %>%
select(11:ncol(.)) %>%
t() %>% as.data.frame() %>%
rownames_to_column("Name") %>%
rename(Marker=2)
#
<- myY %>% select(Name, expttraits) %>%
yy gather(ExptTrait, Value, 2:ncol(.))
#
<- left_join(yy, gg, by = "Name") %>%
xx left_join(myLDP, by = "Name") %>%
mutate(ExptTrait = factor(ExptTrait, levels = expttraits))
#
<- xx %>%
x1 filter(!grepl("M|R|W|S|Y|K|N", Marker), ExptTrait %in% expttraits) %>%
arrange(Marker)
#
<- myP %>% filter(SNP == marker) %>% slice(1)
fg <- paste(fg$FT_Genes, fg$FT_Distances, sep = "\n")
fg # Plot
<- ggplot(x1, aes(x = Marker, y = Value)) +
mp geom_quasirandom(aes(color = Cluster)) +
facet_wrap(ExptTrait ~ ., ncol = 4, scales = "free_y") +
scale_color_manual(values = myColors_Cluster) +
guides(color = guide_legend(nrow = 1, override.aes = list(size = 3))) +
+
theme_AGL theme(legend.position = "bottom",
panel.grid.major.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5)) +
labs(title = marker, caption = fg,
x = NULL, y = "DTF")
mp
}# Plot Markers
for(i in myMs) {
<- gg_Marker(marker = i)
mp ggsave(paste0("Additional/Markers/Markers_",i,".jpg"), mp, width = 8, height = 6)
}# Chr 1
pdf("Additional/Markers/Markers_Chr_1.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 1)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
# Chr 2
pdf("Additional/Markers/Markers_Chr_2.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 2)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
# Chr 3
pdf("Additional/Markers/Markers_Chr_3.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 3)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
# Chr 4
pdf("Additional/Markers/Markers_Chr_4.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 4)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
# Chr 5
pdf("Additional/Markers/Markers_Chr_5.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 5)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
# Chr 6
pdf("Additional/Markers/Markers_Chr_6.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 6)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
# Chr 7
pdf("Additional/Markers/Markers_Chr_7.pdf", width = 8, height = 6)
<- myP %>% filter(Chromosome == 7)
myPi for(i in unique(myPi$SNP)) { print(gg_Marker(marker = i)) }
dev.off()
Flowering Time Genes GWAS Results
# Create function
<- function(GeneName = "LcELF3a") {
pdfGenePlots # Prep data
<- myFTGenes %>% filter(Name == GeneName)
myGene <- c(
expttraits "PCA_PC1", "PCA_PC2", "PCA_PC3",
"PTModel_a", "PTModel_b", "PTModel_c",
"Ro16_DTF", "Ro17_DTF", "Su16_DTF", "Su17_DTF", "Su18_DTF", "Us18_DTF",
"In16_DTF", "In17_DTF", "Ba16_DTF", "Ba17_DTF", "Ne16_DTF", "Ne17_DTF",
"Mo16_DTF", "Mo17_DTF", "Sp16_DTF", "Sp17_DTF", "It16_DTF", "It17_DTF",
"Su17_Tf", "Ba17_Tf", "It17_Tf",
"Su17_Tb", "Ba17_Tb", "It17_Tb",
"Su17_Pf", "Ba17_Pf", "It17_Pf",
"Su17_Pc", "Ba17_Pc", "It17_Pc")
#
pdf(paste0("Additional/",myGene$Name, ".pdf"), width = 10, height = 6)
for(i in expttraits) {
<- NULL; xb <- NULL; xc <- NULL
x1 #
<- grep(paste0(i,".GWAS.Results"), list.files("Results/"))
fnames <- list.files("Results/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results/", j)) %>%
xj mutate(Model = mod, Facet = i,
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(x1, xj)
x1
}#
<- grep(paste0(i,".GWAS.Results"), list.files("Results_b/"))
fnames <- list.files("Results_b/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results_b/", j)) %>%
xj mutate(Model = mod, Facet = "CV = *b*",
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xb, xj)
xb
}#
<- grep(paste0(i,".GWAS.Results"), list.files("Results_c/"))
fnames <- list.files("Results_c/")[fnames]
fnames for(j in fnames) {
<- substr(j, gregexpr("\\.", j)[[1]][1]+1, nchar(j))
mod <- substr(mod, 1, gregexpr("\\.", mod)[[1]][1]-1 )
mod <- read.csv(paste0("Results_c/", j)) %>%
xj mutate(Model = mod, Facet = "CV = *c*",
`-log10(p)_Exp` = -log10((rank(P.value, ties.method="first")-.5)/nrow(.)))
<- bind_rows(xc, xj)
xc
}#
<- bind_rows(x1, xb, xc) %>%
xx filter(Chromosome == myGene$Chromosome,
> myGene$Start - 10000000,
Position < myGene$Start + 10000000) %>%
Position mutate(Model = factor(Model, levels = myModels),
Facet = factor(Facet, levels = c("CV = *c*", i, "CV = *b*"))) %>%
arrange(Facet, rev(Model))
#
<- xx %>% filter(-log10(P.value) < threshold2)
x1 <- xx %>% filter(-log10(P.value) > threshold2, -log10(P.value) < threshold)
x2 <- xx %>% filter(-log10(P.value) > threshold)
x3 # Plot
print(ggplot(x1, aes(x = Position / 100000000, y = -log10(P.value),
shape = Model, fill = Model)) +
geom_vline(data = myGene, xintercept = myGene$Start / 100000000) +
geom_hline(yintercept = threshold, color = "red", alpha = 0.7) +
geom_hline(yintercept = threshold2, color = "blue", alpha = 0.7) +
geom_point(size = 0.5, color = alpha("white", 0)) +
geom_point(data = x2, size = 0.75, color = alpha("white", 0)) +
geom_point(data = x3, size = 1.25) +
facet_grid(Facet ~ Chromosome, scales = "free", space = "free_x") +
scale_fill_manual(values = myColors_Model) +
scale_shape_manual(values = 22:25) +
+
theme_AGL theme(axis.title.y = ggtext::element_markdown(),
strip.text.y = ggtext::element_markdown()) +
labs(title = i, subtitle = GeneName,
y = "-log<sub>10</sub>(*p*)", x = "100 Mbp")
)
}dev.off()
}# Plot genes
pdfGenePlots(GeneName = "LcELF3a")
pdfGenePlots(GeneName = "LcFTb1")
pdfGenePlots(GeneName = "LcFTa1")
pdfGenePlots(GeneName = "LcGI")
© Derek Michael Wright