Evaluating the breeding potential of cultivated lentils for protein and amino acid concentration and quality
unpublished
Introduction
This vignette contains the R
code and analysis done for
the paper:
which is follow-up to:
- Derek M Wright, Sandesh Neupane, Taryn Heidecker, Teketel A Haile, Clarice J Coyne, Rebecca J McGee, Sripada Udupa, Fatima Henkrar, Eleonora Barilli, Diego Rubiales, Tania Gioia, Giuseppina Logozzo, Stefania Marzario, Reena Mehra, Ashutosh Sarker, Rajeev Dhakal, Babul Anwar, Debashish Sarker, Albert Vandenberg, and Kirstin E. Bett. Understanding photothermal interactions can help expand production range and increase genetic diversity of lentil (Lens culinaris Medik.). Plants, People, Planet. (2020) 3(2): 171-181. doi.org/10.1002/ppp3.10158
- https://github.com/derekmichaelwright/AGILE_LDP_Phenology
This work done as part of the AGILE & EVOLVES projects at the University of Saskatchewan along with collaboration with partners at the University of Manitoba.
Data Preparation
# Load Libraries
library(tidyverse)
library(ggbeeswarm)
library(ggpubr)
library(FactoMineR)
library(plotly)
library(htmlwidgets)
# Create plotting theme
<- theme_bw() +
theme_AGL theme(strip.background = element_rect(colour = "black", fill = NA, size = 0.5),
panel.background = element_rect(colour = "black", fill = NA, size = 0.5),
panel.border = element_rect(colour = "black", size = 0.5),
panel.grid = element_line(color = alpha("black", 0.1), size = 0.5),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
legend.key = element_rect(color = NA))
# Prep data
<- c("Protein", "Glutamate", "Aspartate", "Arginine",
myPs "Leucine", "Lysine", "Phenylalanine", "Serine", "Valine",
"Isoleucine", "Proline", "Alanine", "Glycine", "Threonine",
"Histidine", "Tyrosine", "Methionine", "Cysteine", "Tryptophan")
<- c("Sutherland, Canada 2016", "Rosthern, Canada 2016",
myEs1 "Sutherland, Canada 2017", "Rosthern, Canada 2017")
<- c("Su16", "Ro16", "Su17", "Ro17")
myEs2 <- c("steelblue", "darkorange", "darkblue", "darkred")
myCs_Expt <- c("darkred", "darkgreen", "darkorange", "darkblue", "steelblue")
myCs_Region <- c("red4", "darkorange3", "blue2", "deeppink3",
myCs_Clusters "steelblue", "darkorchid4", "darkslategray", "chartreuse4")
# Wet Chemistry Data
<- read.csv("Data/myD_Protein_WetChem.csv") %>%
d1 select(-Plot, -Rep, -Sample.Name..1st..text.) %>%
rename(Year=Planting.Date..date.) %>%
gather(AminoAcid, Value, 5:ncol(.)) %>%
mutate(AminoAcid = gsub("Protein....", "Protein", AminoAcid),
AminoAcid = gsub("Glutamic.acid", "Glutamate", AminoAcid),
AminoAcid = gsub("Aspartic.acid", "Aspartate", AminoAcid),
AminoAcid = gsub("..1st....", "", AminoAcid),
AminoAcid = factor(AminoAcid, levels = myPs),
Expt = factor(paste(Location, Year), levels = myEs1),
ExptShort = plyr::mapvalues(Expt, myEs1, myEs2),
Value = round(as.numeric(Value), 4))
# NIRS Data
<- read.csv("Data/myD_Protein_NIRS.csv") %>%
d2 select(-Plot, -Rep, -Sample.Name..1st..text.) %>%
rename(Year=Planting.Date..date.) %>%
gather(AminoAcid, Value, 5:ncol(.)) %>%
mutate(AminoAcid = gsub("Glutamic.acid", "Glutamate", AminoAcid),
AminoAcid = gsub("Aspartic.acid", "Aspartate", AminoAcid),
AminoAcid = gsub("..1st....", "", AminoAcid),
AminoAcid = factor(AminoAcid, levels = myPs),
Expt = factor(paste(Location, Year), levels = myEs1),
ExptShort = plyr::mapvalues(Expt, myEs1, myEs2),
Value = round(Value, 4))
#
<- read.csv("Data/myD_LDP.csv")
myLDP # Protein Families
<- c("Family.Glutamate", "Family.Aspartate", "Family.Pyruvate",
myPFs "Family.Serine", "Family.Histidine", "Family.Aromatic",
"Perc.Family.Glutamate", "Perc.Family.Aspartate", "Perc.Family.Pyruvate",
"Perc.Family.Serine", "Perc.Family.Histidine", "Perc.Family.Aromatic")
<- d2 %>% spread(AminoAcid, Value) %>%
d3 mutate(Family.Glutamate = Glutamate + Proline + Arginine,
Family.Aspartate = Aspartate + Threonine + Isoleucine + Methionine + Lysine,
Family.Pyruvate = Alanine + Valine + Leucine,
Family.Serine = Serine + Glycine + Cysteine,
Family.Histidine = Histidine,
Family.Aromatic = Tryptophan + Phenylalanine + Tyrosine ) %>%
mutate(Perc.Family.Glutamate = 100 * Family.Glutamate / Protein,
Perc.Family.Aspartate = 100 * Family.Aspartate / Protein,
Perc.Family.Pyruvate = 100 * Family.Pyruvate / Protein,
Perc.Family.Serine = 100 * Family.Serine / Protein,
Perc.Family.Histidine = 100 * Family.Histidine / Protein,
Perc.Family.Aromatic = 100 * Family.Aromatic / Protein) %>%
select(Name, Expt, ExptShort,
Family.Glutamate, Family.Aspartate, Family.Histidine,
Family.Pyruvate, Family.Serine, Family.Aromatic,#
Perc.Family.Glutamate, Perc.Family.Aspartate, Perc.Family.Histidine,%>%
Perc.Family.Pyruvate, Perc.Family.Serine, Perc.Family.Aromatic) gather(AminoAcidFamily, Value, 4:ncol(.)) %>%
mutate(AminoAcidFamily = factor(AminoAcidFamily, levels = myPFs))
# Essential Amino Acid Ratio
<- myEAA <- c("Histidine","Isoleucine","Leucine","Lysine","Methionine",
myEAA "Phenylalanine","Threonine","Tryptophan","Valine")
<- d2 %>% filter(AminoAcid == "Protein") %>%
x1 select(Name, Expt, ExptShort, Protein=Value)
<- d2 %>% filter(AminoAcid %in% myEAA) %>%
x2 group_by(Name, ExptShort) %>%
summarise(Essential.AA = sum(Value)) %>% ungroup()
<- left_join(x1, x2, by = c("Name", "ExptShort")) %>%
d4 mutate(Perc.Essential.AA = 100 * Essential.AA / Protein)
GWAS
Prepare Data For GWAS
# Function to fix names for GWAS
<- function(xx) {
fixNames %>% mutate(Name = gsub(" ", "_", Name),
xx Name = gsub("-", "\\.", Name),
Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
}# Ro16 data
<- d2 %>% filter(ExptShort == "Ro16") %>%
myY select(-Entry, -Location, -Year, -Expt) %>%
mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
select(-ExptShort) %>%
spread(AminoAcid, Value) %>%
fixNames()
write.csv(myY, "Data/myY_NIRS_Ro16.csv", row.names = F)
# Ro17 data
<- d2 %>% filter(ExptShort == "Ro17") %>%
myY select(-Entry, -Location, -Year, -Expt) %>%
mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
select(-ExptShort) %>%
spread(AminoAcid, Value) %>%
fixNames()
write.csv(myY, "Data/myY_NIRS_Ro17.csv", row.names = F)
# Su16 data
<- d2 %>% filter(ExptShort == "Su16") %>%
myY select(-Entry, -Location, -Year, -Expt) %>%
mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
select(-ExptShort) %>%
spread(AminoAcid, Value) %>%
fixNames()
write.csv(myY, "Data/myY_NIRS_Su16.csv", row.names = F)
# Su17 data
<- d2 %>% filter(ExptShort == "Su17") %>%
myY select(-Entry, -Location, -Year, -Expt) %>%
mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
select(-ExptShort) %>%
spread(AminoAcid, Value) %>%
fixNames()
write.csv(myY, "Data/myY_NIRS_Su17.csv", row.names = F)
# All Data
<- d2 %>%
myY select(-Entry, -Location, -Year, -Expt) %>%
mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
select(-ExptShort) %>%
spread(AminoAcid, Value) %>%
fixNames()
write.csv(myY, "Data/myY_NIRS.csv", row.names = F)
# Covariate Data
<- myLDP %>%
myCV select(Name, DTF_Ro16, DTF_Ro17, DTF_Su16, DTF_Su17,
%>%
REP_Ro16, REP_Ro17, REP_Su16, REP_Su17) fixNames()
write.csv(myCV, "Data/myCV.csv", row.names = F)
Run GWAS
# devtools::install_github("jiabowang/GAPIT3")
library(GAPIT3)
#
<- read.csv("Data/myG_LDP.csv", header = F)
myG <- read.csv("Data/myCV.csv")
myCV # Run GWAS on all data
<- read.csv("Data/myY_NIRS.csv")
myY <- GAPIT(
myGAPIT Y = myY,
G = myG,
PCA.total = 4,
model = c("MLM","FarmCPU","Blink"),
Phenotype.View = F
)# Run GWAS for Ro16 with CVs
<- read.csv("Data/myY_NIRS_Ro16.csv")
myY_Ro16 <- GAPIT(
myGAPIT Y = myY_Ro16,
G = myG,
CV = myCV[,c("Name","DTF_Ro16","REP_Ro16")],
PCA.total = 4,
model = c("MLM","FarmCPU","Blink"),
Phenotype.View = F
)# Run GWAS for Ro17 with CVs
<- read.csv("Data/myY_NIRS_Ro17.csv")
myY_Ro17 <- GAPIT(
myGAPIT Y = myY_Ro17,
G = myG,
CV = myCV[!is.na(myCV$REP_Ro17),c("Name","DTF_Ro17","REP_Ro17")],
PCA.total = 4,
model = c("MLM","FarmCPU","Blink"),
Phenotype.View = F
)# Run GWAS for Su16 with CVs
<- read.csv("Data/myY_NIRS_Su16.csv")
myY_Su16 <- GAPIT(
myGAPIT Y = myY_Su16,
G = myG,
CV = myCV[,c("Name","DTF_Su16","REP_Su16")],
PCA.total = 4,
model = c("MLM","FarmCPU","Blink"),
Phenotype.View = F
)# Run GWAS for Su17 with CVs
<- read.csv("Data/myY_NIRS_Su17.csv")
myY_Su17 <- GAPIT(
myGAPIT Y = myY_Su17,
G = myG,
CV = myCV[!is.na(myCV$REP_Su17),c("Name","DTF_Su17","REP_Su17")],
PCA.total = 4,
model = c("MLM","FarmCPU","Blink"),
Phenotype.View = F
)
Post GWAS
# devtools::install_github("derekmichaelwright/gwaspr")
library(gwaspr)
#
<- read.csv("Data/myG_LDP.csv", header = T)
myG <- c(# Marker set #1
myMs "Lcu.2RBY.Chr3p339102503", "Lcu.2RBY.Chr5p327505937", "Lcu.2RBY.Chr5p467479275",
# Marker set #2
"Lcu.2RBY.Chr1p437385632", "Lcu.2RBY.Chr4p432694216", "Lcu.2RBY.Chr6p411536500")
<- c(rep("red",3), rep("blue",3)) myCs
Supplemental Table 1 - Significant Results
# reorder GWAS results tables and compile significant results
<- list_Result_Files("GWAS_Results/")
myR1 order_GWAS_Results(folder = "GWAS_Results/", files = myR1)
<- table_GWAS_Results("GWAS_Results/", myR1, threshold = 6.7, sug.threshold = 5.3)
x1 #
<- list_Result_Files("GWAS_Results_CV_DTF_REP/")
myR2 order_GWAS_Results(folder = "GWAS_Results_CV_DTF_REP/", files = myR2)
<- table_GWAS_Results("GWAS_Results_CV_DTF_REP/", myR2, threshold = 6.7, sug.threshold = 5.3)
x2 #
<- x1 %>% mutate(CV = "NONE") %>% arrange(P.value)
x1 <- x2 %>% mutate(CV = "DTF+REP") %>% arrange(P.value)
x2 <- bind_rows(x1, x2)
xx write.csv(xx, "Supplemental_table_01.csv", row.names = F)
<- read.csv("Supplemental_table_01.csv")
xx ::datatable(xx) DT
Manhattan plots
# Create custom manhattan plots
<- list_Traits("GWAS_Results/")
myTs for(i in myTs) {
# No CV
<- gg_Manhattan(folder = "GWAS_Results/", trait = i,
mp1 threshold = 6.7, sug.threshold = 5.3, legend.rows = 2,
vlines = myMs, vline.colors = myCs )
# With CV
<- gg_Manhattan(folder = "GWAS_Results_CV_DTF_REP/", trait = i, facet = F,
mp2 title = paste(i, "| CV = DTF + REP"),
threshold = 6.7, sug.threshold = 5.3, legend.rows = 2,
vlines = myMs, vline.colors = myCs)
# Bind together
<- substr(i, 1, regexpr("_",i)-1)
i1 <- substr(i, regexpr("_",i)+1, nchar(i))
i2 <- ggarrange(mp1, mp2, nrow = 2, ncol = 1, common.legend = T, legend = "bottom")
mp ggsave(paste0("Additional/ManH/Multi_",i2,"_",i1,".png"),
width = 12, height = 8, bg = "white")
mp, }
Supplemental Figure 1 - Wet Chem vs NIRS
# Prep data
<- d1 %>% select(Name, Expt, AminoAcid, `Wet Chemistry`=Value)
x1 <- d2 %>% select(Name, Expt, AminoAcid, NIRS=Value)
x2 <- left_join(x1, x2, by = c("Name", "Expt", "AminoAcid"))
xx # Plot
<- ggplot(xx, aes(x = `Wet Chemistry`, y = NIRS)) +
mp geom_point(aes(color = Expt), alpha = 0.5, pch = 16) +
stat_smooth(geom = "line", method = "lm", alpha = 0.7, linewidth = 1.5) +
stat_regline_equation(aes(label = ..rr.label..)) +
facet_wrap(AminoAcid ~ ., scales = "free") +
scale_color_manual(values = myCs_Expt) +
+
theme_AGL theme(legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 3))) +
labs(title = "Concentration (g / 100g dry seed)",
y = "Near-infrared Spectroscopy")
ggsave("Supplemental_Figure_01.png", mp, width = 10, height = 8, dpi = 600)
Supplemental Figure 2 - Total Protein & Amino Acids
# Prep data
<- d2 %>%
xx mutate(Family = ifelse(AminoAcid == "Protein", "Total Protein", NA),
Family = ifelse(AminoAcid %in% c("Glutamate", "Proline", "Arginine"), "Glutamate", Family),
Family = ifelse(AminoAcid %in% c("Aspartate", "Threonine", "Isoleucine", "Methionine", "Lysine"), "Aspartate", Family),
Family = ifelse(AminoAcid %in% c("Alanine", "Valine", "Leucine"), "Pyruvate", Family),
Family = ifelse(AminoAcid %in% c("Serine", "Glycine", "Cysteine"), "Serine", Family),
Family = ifelse(AminoAcid %in% c("Histidine"), "Histidine", Family),
Family = ifelse(AminoAcid %in% c("Tryptophan", "Phenylalanine", "Tyrosine"), "Aromatic", Family)) %>%
mutate(AminoAcid = factor(AminoAcid, levels = myPs),
Family = factor(Family, levels = c("Total Protein", "Glutamate", "Aspartate", "Pyruvate",
"Serine", "Histidine", "Aromatic")))
# Plot
<- ggplot(xx, aes(x = Value, fill = Family)) +
mp geom_histogram(color = "black", alpha = 0.7, lwd = 0.2) +
facet_grid(ExptShort ~ AminoAcid, scales = "free_x") +
scale_x_reverse() +
+
theme_AGL theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
guides(fill = guide_legend(nrow = 1)) +
labs(title = "Lentil Diversity Panel", x = "g / 100g dry seed")
ggsave("Supplemental_Figure_02.png", mp, width = 18, height = 5, dpi = 600)
Figure 1 - Total Protein & Amino Acids
# Prep data
<- d2 %>% filter(AminoAcid != "Protein")
xx # Plot
<- ggplot(xx, aes(x = AminoAcid, y = Value, fill = Expt)) +
mp1 geom_boxplot(alpha = 0.7, coef = 5, lwd = 0.1, position = position_dodge(width = 0.9)) +
scale_fill_manual(name = NULL, values = myCs_Expt) +
scale_y_continuous(breaks = 0:5) +
+
theme_AGL theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "A) Amino Acids", y = "g / 100g dry seed", x = NULL)
# Prep data
<- c("Family.Glutamate", "Family.Aspartate", "Family.Pyruvate",
myFams1 "Family.Serine", "Family.Histidine", "Family.Aromatic")
<- c("Glutamte Family", "Aspartate Family", "Pyruvate Family",
myFams2 "Serine Family", "Histidine Family", "Aromatic Family")
<- d3 %>% filter(!grepl("Perc", AminoAcidFamily)) %>%
xx mutate(AminoAcidFamily = plyr::mapvalues(AminoAcidFamily, myFams1, myFams2))
# Plot
<- ggplot(xx, aes(x = Value, fill = Expt)) +
mp2 geom_histogram(color = "black", alpha = 0.7, lwd = 0.1) +
facet_grid(ExptShort ~ AminoAcidFamily, scales = "free_x", labeller = label_wrap_gen(width = 10)) +
scale_fill_manual(values = myCs_Expt) +
+
theme_AGL theme(legend.position = "bottom",
legend.text = element_text(size = 8),
axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
guides(fill = guide_legend(ncol = 4, override.aes = list(lwd = 0))) +
labs(title = "B) Amino Acid Families",
y = "Count", x = "g / 100g dry seed")
<- get_legend(mp2)
mpl <- mp2 + theme(legend.position = "none")
mp2 #
<- myEAA <- c("Histidine", "Isoleucine", "Leucine", "Lysine", "Methionine",
myEAA "Phenylalanine", "Threonine", "Tryptophan", "Valine")
<- d2 %>% filter(AminoAcid == "Protein") %>%
x1 select(Name, ExptShort, Protein=Value)
<- d2 %>% filter(AminoAcid %in% myEAA) %>%
x2 group_by(Name, ExptShort) %>%
summarise(Essential = sum(Value)) %>% ungroup()
<- left_join(x1, x2, by = c("Name", "ExptShort")) %>%
xx mutate(Ratio = 100 * Essential / Protein)
# Plot
<- ggplot(xx, aes(x = ExptShort, y = Protein)) +
mp3 geom_violin() +
geom_quasirandom(aes(color = ExptShort), pch = 16, size = 1, alpha = 0.7) +
geom_boxplot(width = 0.1, coef = 5) +
scale_color_manual(values = myCs_Expt) +
scale_y_continuous(breaks = c(26,28,30,32,34)) +
+
theme_AGL theme(legend.position = "none") +
labs(title = "C) Total Protein", y = "g / 100g dry seed", x = NULL)
<- ggplot(xx, aes(x = ExptShort, y = Ratio)) +
mp4 geom_violin() +
geom_quasirandom(aes(color = ExptShort), pch = 16, size = 1, alpha = 0.7) +
geom_boxplot(width = 0.1, coef = 5) +
scale_color_manual(values = myCs_Expt) +
+
theme_AGL theme(legend.position = "none") +
labs(title = "D) Essential Amino Acids", y = "Percent of Total Protein", x = NULL)
# Bind together
<- ggarrange(ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2),
mp nrow = 2, heights = c(1,0.1))
mpl, ggsave("Figure_01.png", mp, width = 10, height = 8, dpi = 600, bg = "white")
Figure 2 - Protein x Traits
# Prep data
<- c("Red", "Yellow", "Red/Yellow", "Green")
myCots <- d2 %>% filter(AminoAcid == "Protein") %>%
xx left_join(myLDP, by = c("Entry","Name")) %>%
mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other"),
CotyledonColor = factor(CotyledonColor, levels = myCots)) %>%
arrange(desc(Origin)) %>%
filter(ExptShort == "Su16")
# Plot
<- ggplot(xx, aes(x = Region, y = Value, color = Origin, pch = CotyledonColor)) +
mp1 geom_quasirandom(fill = "steelblue", alpha = 0.7) +
facet_grid(. ~ Expt) +
scale_color_manual(values = c("black", "steelblue"), guide = F) +
scale_shape_manual(name = "Cotyledon Color", values = c(24,25,23,22)) +
+
theme_AGL guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(title = "A) Protein x Region",
x = "Origin", y = "Protein (g / 100g dry seed)")
# Plot
<- ggplot(xx, aes(x = SeedMass1000.2017, y = Value)) +
mp2 geom_point(aes(shape = CotyledonColor, color = Origin), fill = "steelblue", alpha = 0.7) +
stat_smooth(geom = "line", method = "lm", se = F,
color = "black", alpha = 0.7, size = 1) +
stat_regline_equation(aes(label = ..rr.label..)) +
facet_wrap(Expt ~ .) +
scale_color_manual(values = c("black", "steelblue"), guide = F) +
scale_shape_manual(name = "Cotyledon Color", values = c(24,25,23,22)) +
+
theme_AGL guides(shape = guide_legend(override.aes = list(size = 3))) +
labs(title = "B) Protein x Seed Size",
x = "Mass of 1000 seeds (g)", y = "Protein (g / 100g dry seed)")
# Prep data
<- d2 %>% filter(AminoAcid == "Protein") %>%
x1 select(Entry, Expt, ExptShort, Protein=Value)
<- myLDP %>% select(Entry, Name, DTF_Ro16, DTF_Ro17, DTF_Su16, DTF_Su17) %>%
x2 gather(ExptShort, DTF, 3:6) %>%
mutate(ExptShort = gsub("DTF_", "", ExptShort))
<- left_join(x1, x2, by = c("Entry", "ExptShort")) %>%
xx mutate(ExptShort = factor(ExptShort, levels = myEs2)) %>%
left_join(myLDP, by = c("Entry","Name")) %>%
mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other")) %>%
arrange(desc(Origin))
# Plot
<- ggplot(xx, aes(x = DTF, y = Protein)) +
mp3 geom_point(aes(fill = Expt), color = alpha("white",0), pch = 21) +
geom_point(aes(color = Origin), fill = alpha("white",0), pch = 21) +
stat_smooth(geom = "line", method = "lm", se = F,
color = "black", alpha = 0.7, size = 1) +
stat_regline_equation(aes(label = ..rr.label..)) +
scale_fill_manual(values = alpha(myCs_Expt,0.5)) +
scale_color_manual(values = c("black", alpha("white",0))) +
guides(color = guide_legend(nrow = 2, override.aes = list(size = 2.5, fill = "grey50")),
fill = guide_legend(nrow = 2, override.aes = list(size = 3, alpha = 0.9))) +
+
theme_AGL labs(title = "C) Protein x DTF",
x = "Days from sowing to flower (days)", y = "Protein (g / 100g dry seed)")
# Prep data
<- d2 %>% filter(AminoAcid == "Protein") %>%
x1 select(Entry, Expt, ExptShort, Protein=Value)
<- myLDP %>% select(Entry, Name, REP_Ro16, REP_Ro17, REP_Su16, REP_Su17) %>%
x2 gather(ExptShort, REP, 3:6) %>%
mutate(ExptShort = gsub("REP_", "", ExptShort))
<- left_join(x1, x2, by = c("Entry", "ExptShort")) %>%
xx left_join(myLDP, by = c("Entry","Name")) %>%
mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other")) %>%
arrange(desc(Origin))
# Plot
<- ggplot(xx, aes(x = REP, y = Protein)) +
mp4 geom_point(aes(fill = Expt), color = alpha("white",0), pch = 21) +
geom_point(aes(color = Origin), fill = alpha("white",0), pch = 21) +
stat_smooth(geom = "line", method = "lm", se = F,
color = "black", alpha = 0.7, size = 1) +
stat_regline_equation(aes(label = ..rr.label..)) +
scale_fill_manual(values = alpha(myCs_Expt,0.6)) +
scale_color_manual(values = c("black", alpha("white",0))) +
guides(color = guide_legend(nrow = 2, override.aes = list(size = 2.5, fill = "grey50")),
fill = guide_legend(nrow = 2, override.aes = list(size = 3, alpha = 0.9))) +
+
theme_AGL labs(title = "D) Protein x REP",
x = "Reproductive period (days)", y = "Protein (g / 100g dry seed)")
# Bind together
<- ggarrange(mp1, mp2, ncol = 2, common.legend = T, legend = "bottom")
mp1 <- ggarrange(mp3, mp4, ncol = 2, common.legend = T, legend = "bottom")
mp2 <- ggarrange(mp1, mp2, ncol = 1, heights = c(1,1.1))
mp ggsave("Figure_02.png", mp, width = 8, height = 8, dpi = 600, bg = "white")
Figure 3 & Supplemental Figure 3 - Selections
# Prep data
<- function(myAmino = "Protein", myExpt = "Ro16",
gg_Selection myTitle = myExpt, myColor = "steelblue") {
# Prep data
<- myLDP %>% select(Name, Origin, Ro16=DTF_Ro16, Ro17=DTF_Ro17, Su16=DTF_Su16, Su17=DTF_Su17) %>%
xi gather(ExptShort, DTF, Ro16, Ro17, Su16, Su17)
<- d2 %>% filter(AminoAcid == myAmino) %>%
xx left_join(xi, by = c("Name", "ExptShort")) %>%
mutate(Selection = NA)
#
for(i in myEs2) {
<- quantile(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(Value), 0.75)
myYmin <- max(xx %>% filter(ExptShort == i) %>% pull(Value))
myYmax <- min(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(DTF))
myXmin <- max(xx %>% filter(ExptShort == i) %>% pull(DTF))
myXmax <- xx %>%
xx mutate(Selection = ifelse((ExptShort == i & Value > myYmin & DTF > myXmin), "Yes", Selection))
}<- xx %>% select(Name, ExptShort, Selection) %>% spread(ExptShort, Selection) %>%
ss filter(!is.na(Ro16), !is.na(Ro17), !is.na(Su16), !is.na(Su17)) %>% # must be "Yes" in all Expt
mutate(Selection = "Yes") %>% select(Name, Selection)
<- xx %>% filter(ExptShort == myExpt) %>% select(-Selection) %>%
xx left_join(ss, by = "Name") %>%
mutate(Origin2 = ifelse(Origin %in% c("Canada", "USA"), "Local", "Other"),
Selection = ifelse(is.na(Selection), "No", Selection),
Selection = ifelse(Origin2 == "Local", "Local", Selection)) %>%
arrange(desc(Origin2))
<- sum(xx$Selection == "Yes")
myNum <- xx %>%
xx mutate(Selection = ifelse(Selection == "Yes", paste0("Yes (", myNum, ")"), Selection),
Selection = factor(Selection, levels = c("No", paste0("Yes (", myNum, ")"), "Local")))
#
<- quantile(xx %>% filter(Origin2 == "Local") %>% pull(Value), 0.75)
myYmin <- max(xx %>% pull(Value))
myYmax <- min(xx %>% filter(Origin2 == "Local") %>% pull(DTF))
myXmin <- max(xx %>% pull(DTF))
myXmax # Plot
ggplot(xx) +
geom_rect(xmin = myXmin, xmax = myXmax,
ymin = myYmin, ymax = myYmax,
color = "black", fill = NA) +
geom_point(aes(x = DTF, y = Value, shape = Selection, size = Selection,
fill = Selection, color = Selection,
key1 = Entry, key2 = Name, key3 = Origin), alpha = 0.5) +
scale_color_manual(values = c(myColor, "black", "black")) +
scale_fill_manual(values = c(myColor, myColor, "black")) +
scale_shape_manual(values = c(0,22,8)) +
scale_size_manual(values = c(1,1,1.5)) +
guides(shape = guide_legend(override.aes = list(fill = "grey", color = "black", size = 2.5))) +
+
theme_AGL theme(axis.title = element_text(size = 10)) +
labs(subtitle = myTitle,
x = "Days from sowing to flower (days)",
y = paste(myAmino, "(g / 100g dry seed)"))
}#
<- gg_Selection(myExpt = "Su16", myColor = "steelblue", myTitle ="A) Sutherland, Canada 2016")
mp1 <- gg_Selection(myExpt = "Su17", myColor = "darkblue", myTitle ="B) Sutherland, Canada 2017")
mp2 <- gg_Selection(myExpt = "Ro16", myColor = "darkorange", myTitle ="C) Rosthern, Canada 2016")
mp3 <- gg_Selection(myExpt = "Ro17", myColor = "darkred", myTitle ="D) Rosthern, Canada 2017")
mp4 <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
mp ggsave("Supplemental_Figure_03.png", mp, width = 6, height = 6, dpi = 600, bg = "white")
#
<- gg_Selection(myExpt = "Su16", myAmino = "Lysine", myColor = "steelblue",
mp1 myTitle ="A) Sutherland, Canada 2016")
<- gg_Selection(myExpt = "Su17", myAmino = "Lysine", myColor = "darkblue",
mp2 myTitle ="B) Sutherland, Canada 2017")
<- gg_Selection(myExpt = "Ro16", myAmino = "Lysine", myColor = "darkorange",
mp3 myTitle ="C) Rosthern, Canada 2016")
<- gg_Selection(myExpt = "Ro17", myAmino = "Lysine", myColor = "darkred",
mp4 myTitle ="D) Rosthern, Canada 2017")
<- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
mp ggsave("Figure_03.png", mp, width = 6, height = 6, dpi = 600, bg = "white")
#
saveWidget(ggplotly(mp1), file="Additional/Figure_03_Su16.html")
saveWidget(ggplotly(mp2), file="Additional/Figure_03_Su17.html")
saveWidget(ggplotly(mp3), file="Additional/Figure_03_Ro16.html")
saveWidget(ggplotly(mp4), file="Additional/Figure_03_Ro17.html")
Additional Figures - AA Selections
- Additional/AA_Selections/Figure_03_01_Protein_Su16.html
- Additional/AA_Selections/Figure_03_02_Glutamate_Su16.html
- Additional/AA_Selections/Figure_03_03_Aspartate_Su16.html
- Additional/AA_Selections/Figure_03_04_Arginine_Su16.html
- Additional/AA_Selections/Figure_03_05_Leucine_Su16.html
- Additional/AA_Selections/Figure_03_06_Lysine_Su16.html
- Additional/AA_Selections/Figure_03_07_Phenylalanine_Su16.html
- Additional/AA_Selections/Figure_03_08_Serine_Su16.html
- Additional/AA_Selections/Figure_03_09_Valine_Su16.html
- Additional/AA_Selections/Figure_03_10_Isoleucine_Su16.html
- Additional/AA_Selections/Figure_03_11_Proline_Su16.html
- Additional/AA_Selections/Figure_03_12_Alanine_Su16.html
- Additional/AA_Selections/Figure_03_13_Glycine_Su16.html
- Additional/AA_Selections/Figure_03_14_Threonine_Su16.html
- Additional/AA_Selections/Figure_03_15_Histidine_Su16.html
- Additional/AA_Selections/Figure_03_16_Tyrosine_Su16.html
- Additional/AA_Selections/Figure_03_17_Methionine_Su16.html
- Additional/AA_Selections/Figure_03_18_Cysteine_Su16.html
- Additional/AA_Selections/Figure_03_19_Tryptophan_Su16.html
<- 1
counter for(i in myPs) {
# Plot
<- gg_Selection(myExpt = "Su16", myAmino = i, myColor = "steelblue",
mp1 myTitle ="A) Sutherland, Canada 2016")
<- gg_Selection(myExpt = "Su17", myAmino = i, myColor = "darkblue",
mp2 myTitle ="B) Sutherland, Canada 2017")
<- gg_Selection(myExpt = "Ro16", myAmino = i, myColor = "darkorange",
mp3 myTitle ="C) Rosthern, Canada 2016")
<- gg_Selection(myExpt = "Ro17", myAmino = i, myColor = "darkred",
mp4 myTitle ="D) Rosthern, Canada 2017")
# Bind together
<- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
mp ggsave(paste0("Additional/AA_Selections/Figure_03_",
::str_pad(counter, 2, pad = "0"), "_", i,".png"),
stringrwidth = 6, height = 6, dpi = 600, bg = "white")
mp, # Save HTML
saveWidget(ggplotly(mp1),
file = paste0("Additional/AA_Selections/Figure_03_",
::str_pad(counter, 2, pad = "0"), "_", i, "_Su16.html"))
stringr# Increase loop counter
<- counter + 1
counter }
Supplemental Table 2 - Amino Acid Selections
# Create function
<- function(myAmino = "Protein") {
DT_AA_Selection # Prep data
<- myLDP %>%
xi select(Name, Origin, Ro16=DTF_Ro16, Ro17=DTF_Ro17, Su16=DTF_Su16, Su17=DTF_Su17) %>%
gather(ExptShort, DTF, Ro16, Ro17, Su16, Su17)
<- d2 %>% filter(AminoAcid == myAmino) %>%
xx left_join(xi, by = c("Name", "ExptShort")) %>%
mutate(Selection = NA)
#
for(i in myEs2) {
<- quantile(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(Value), 0.75)
myYmin <- max(xx %>% filter(ExptShort == i) %>% pull(Value))
myYmax <- min(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(DTF))
myXmin <- max(xx %>% filter(ExptShort == i) %>% pull(DTF))
myXmax <- xx %>%
xx mutate(Selection = ifelse((ExptShort == i & Value > myYmin & DTF > myXmin), "Yes", Selection))
}#
<- xx %>% select(Entry, Name, ExptShort, Selection) %>% spread(ExptShort, Selection) %>%
ss filter(!is.na(Ro16), !is.na(Ro17), !is.na(Su16), !is.na(Su17)) %>% # must be "Yes" in all Expt
mutate(Trait = myAmino) %>% select(Entry, Name, Trait)
}#
<- bind_rows(DT_AA_Selection(myAmino = "Protein"),
xx DT_AA_Selection(myAmino = "Glutamate"),
DT_AA_Selection(myAmino = "Aspartate"),
DT_AA_Selection(myAmino = "Arginine"),
DT_AA_Selection(myAmino = "Leucine"),
DT_AA_Selection(myAmino = "Phenylalanine"),
DT_AA_Selection(myAmino = "Serine"),
DT_AA_Selection(myAmino = "Valine"),
DT_AA_Selection(myAmino = "Isoleucine"),
DT_AA_Selection(myAmino = "Proline"),
DT_AA_Selection(myAmino = "Alanine"),
DT_AA_Selection(myAmino = "Glycine"),
DT_AA_Selection(myAmino = "Threonine"),
DT_AA_Selection(myAmino = "Histidine"),
DT_AA_Selection(myAmino = "Tyrosine"),
DT_AA_Selection(myAmino = "Methionine"),
DT_AA_Selection(myAmino = "Cysteine"),
DT_AA_Selection(myAmino = "Tryptophan") )
#
write.csv(xx, "Supplemental_table_02.csv", row.names = F)
<- read.csv("Supplemental_table_02.csv")
xx ::datatable(xx) DT
Figure 4 - GWAS Summary
# Create plotting function
<- function (folder = NULL, traits = list_Traits(),
gg_GWAS_Summary threshold = -log10(5e-08), sug.threshold = -log10(5e-06),
models = c("MLM", "MLMM", "FarmCPU", "BLINK", "GLM"),
colors = c("darkgreen", "darkred", "darkorange3", "steelblue", "darkgoldenrod2"),
shapes = 21:25, hlines = NULL,
vlines = NULL, vline.colors = rep("red", length(vlines)),
vline.legend = T, title = NULL,
caption = paste0("Sig Threshold = ", threshold, " = Large\nSuggestive = ", sug.threshold, " = Small"),
rowread = 2000, legend.position = "bottom", lrows = 1) {
# Prep data
<- list_Result_Files(folder)
files <- files[grepl(paste(traits, collapse = "|"), files)]
files <- files[grepl(paste(models, collapse = "|"), files)]
files <- NULL
xp for (i in files) {
<- table_GWAS_Results(folder = folder, files = i,
xpi threshold = threshold, sug.threshold = sug.threshold)
if (nrow(xpi) > 0) { xp <- bind_rows(xp, xpi) }
}<- xp %>%
xp filter(!is.na(SNP)) %>%
arrange(Chr, Pos, P.value, Trait) %>%
mutate(Model = factor(Model, levels = models),
Trait = factor(Trait, levels = rev(traits))) %>%
filter(!is.na(Trait)) %>%
arrange(desc(Model))
<- xp %>% filter(`-log10(p)` > threshold)
x1 <- xp %>% filter(`-log10(p)` < threshold)
x2 <- read.csv(paste0(folder, files[1])) %>%
xg mutate(Trait = xp$Trait[1],
Trait = factor(Trait, levels = rev(traits)))
# Plot
<- ggplot(x1, aes(x = Pos/1e+08, y = Trait)) + geom_blank(data = xg)
mp #
if (!is.null(vlines)) {
<- xg %>% filter(SNP %in% vlines) %>%
xm mutate(SNP = factor(SNP, levels = vlines)) %>%
arrange(SNP)
<- mp +
mp geom_vline(data = xm, alpha = 0.5, aes(xintercept = Pos/1e+08, color = SNP)) +
scale_color_manual(name = NULL, values = vline.colors)
}#
if (!is.null(hlines)) { mp <- mp + geom_hline(yintercept = hlines, alpha = 0.7) }
#
<- mp +
mp geom_point(data = x2, size = 0.75, color = "black", alpha = 0.5,
aes(shape = Model, fill = Model, key1 = SNP, key2 = `-log10(p)`)) +
geom_point(size = 2.25, color = "black", alpha = 0.5,
aes(shape = Model, fill = Model, key1 = SNP, key2 = `-log10(p)`)) +
facet_grid(. ~ Chr, drop = F, scales = "free_x", space = "free_x") +
scale_fill_manual(name = NULL, values = colors, breaks = models) +
scale_shape_manual(name = NULL, values = shapes, breaks = models) +
scale_x_continuous(breaks = 0:20) +
scale_y_discrete(drop = F) +
+
theme_AGL theme(legend.position = legend.position) +
guides(shape = guide_legend(nrow = lrows, override.aes = list(size = 4)),
color = guide_legend(nrow = lrows),
fill = guide_legend(nrow = lrows)) +
labs(title = title, y = NULL, x = "100 Mbp", caption = caption)
#
if (vline.legend == F) { mp <- mp + guides(color = vline.legend) }
#
mp }
# Prep data
<- c("Protein", "Glutamate", "Aspartate", "Leucine",
myPs "Arginine", "Lysine", "Phenylalanine", "Valine", "Serine",
"Proline", "Isoleucine", "Alanine", "Glycine", "Threonine",
"Tyrosine", "Histidine", "Methionine", "Cysteine", "Tryptophan")
#
<- c(paste0(myPs, "_Su16"), paste0(myPs, "_Su17"),
myTs paste0(myPs, "_Ro16"), paste0(myPs, "_Ro17"))
# Plot
<- gg_GWAS_Summary(folder = "GWAS_Results/",
mp1 traits = myTs,
models = c("MLM", "MLMM", "FarmCPU", "BLINK"),
colors = c("darkgreen", "darkred", "darkorange3", "steelblue"),
threshold = 6.7, sug.threshold = 5.3,
hlines = seq(19.5,72.5, by = 19), lrows = 2,
vlines = myMs, vline.colors = myCs,
title = "A) No Covariate") +
labs(caption = NULL) +
guides(fill = guide_legend(title="Model", order = 1, nrow = 2),
shape = guide_legend(title="Model", order = 1, nrow = 2),
color = guide_legend(title="Marker", order = 2, byrow = T))
#
<- gg_GWAS_Summary(folder = "GWAS_Results_CV_DTF_REP/",
mp2 traits = myTs,
models = c("MLM", "MLMM", "FarmCPU", "BLINK"),
colors = c("darkgreen", "darkred", "darkorange3", "steelblue"),
threshold = 6.7, sug.threshold = 5.3,
hlines = seq(19.5, 72.5, by = 19), lrows = 2,
vlines = myMs, vline.colors = myCs,
title = "B) Covariate = DTF + REP") +
labs(caption = NULL) +
guides(fill = guide_legend(title="Model", order = 1, nrow = 2),
shape = guide_legend(title="Model", order = 1, nrow = 2),
color = guide_legend(title="Marker", order = 2, byrow = T))
<- ggarrange(mp1, mp2, ncol = 1, common.legend = T, legend = "bottom", heights = c(1,1.1))
mp ggsave("Figure_04.png", mp, width = 12, height = 20, bg = "white")
# Save HTML
<- ggplotly(mp1) %>% layout(showlegend = FALSE)
mp1 saveWidget(as_widget(mp1), "Additional/Figure_04_A.html",
knitrOptions = list(fig.width = 12, fig.height = 10),
selfcontained = T)
<- ggplotly(mp2) %>% layout(showlegend = FALSE)
mp2 saveWidget(as_widget(mp2), "Additional/Figure_04_B.html",
knitrOptions = list(fig.width = 12, fig.height = 10),
selfcontained = T)
Supplemental Figure 4 - Protein x Amino Acid
# Prep data
<- d2 %>% filter(AminoAcid != "Protein")
x1 <- d2 %>% filter(AminoAcid == "Protein") %>%
x2 select(Name, ExptShort, Total.Protein=Value)
<- left_join(x1, x2, by = c("Name", "ExptShort"))
xx # Plot
<- ggplot(xx, aes(x = Total.Protein, y = Value, color = Expt)) +
mp geom_point(size = 0.75, alpha = 0.6, pch = 16) +
facet_wrap(. ~ AminoAcid, scales = "free_y", ncol = 6) +
scale_color_manual(values = myCs_Expt) +
+
theme_AGL theme(legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 3))) +
labs(title = "Lentil Diversity Panel",
y = "Amino Acid (g / 100g dry seed)", x = "Protein (g / 100g dry seed)")
ggsave("Supplemental_Figure_04.png", mp, width = 10, height = 5.5, dpi = 600)
Figure 5 - Markers
Sutherland, Canada 2016
# Create plotting function
<- function (xg = myG, xy, myTrait,
gg_PlotMarkers points = T, myColor = "steelblue",
myMarkers, myWidth = 0.1, myTitle = "", myYlab = "") {
# Prep data
<- xg %>% filter(rs %in% myMarkers) %>%
xg mutate(rs = factor(rs, levels = myMarkers)) %>% arrange(rs)
for(i in 12:ncol(xg)) { if(sum(!grepl("G|C|A|T", xg[,i])) > 0) { xg[,i] <- NA } }
<- xg[,!is.na(xg[1,])]
xg #
<- paste(myMarkers, collapse = "\n")
myMLabs #
<- xg %>% gather(Name, Allele, 12:ncol(.)) %>%
xx group_by(Name) %>%
summarise(Alleles = paste(Allele, collapse = ""))
#
<- read.csv("Data/myD_LDP.csv") %>%
myLDP mutate(Name = gsub(" ", "_", Name),
Name = gsub("-", "\\.", Name),
Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
#
<- xx %>% left_join(xy, by = "Name") %>% filter(!is.na(get(myTrait)))
xx <- xx %>% group_by(Alleles) %>%
x2 summarise(Value = mean(get(myTrait), na.rm = T)) %>%
arrange(Value)
<- xx %>% mutate(Alleles = factor(Alleles, levels = x2$Alleles) ) %>%
xx left_join(select(myLDP, Name, Origin), by = "Name") %>%
mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other"))
#
<- ggplot(xx, aes(x = Alleles, y = get(myTrait))) +
mp geom_violin(fill = "grey90", color = NA) +
geom_boxplot(width = myWidth, coef = 5) +
facet_grid(. ~ as.character(myMLabs)) +
+
theme_AGL theme(legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = myTitle, y = myYlab, x = NULL)
if(points == T) {
<- mp +
mp geom_quasirandom(aes(fill = Origin, color = Origin), pch = 21, size = 1) +
scale_fill_manual(values = alpha(c("black", myColor),0.7)) +
scale_color_manual(values = c("black",alpha("white",0))) +
guides(color = guide_legend(override.aes = list(size = 3)))
}
mp
}# Prep data
<- read.csv("Data/myY_NIRS_Su16.csv")
myY_Su16 # Plot
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp1 myMarkers = myMs[1], myColor = "steelblue",
myTitle = "A) Individual Markers",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp2 myMarkers = myMs[2], myColor = "steelblue")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp3 myMarkers = myMs[3], myColor = "steelblue")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp4 myMarkers = myMs[4], myColor = "steelblue",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp5 myMarkers = myMs[5], myColor = "steelblue")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp6 myMarkers = myMs[6], myColor = "steelblue")
#
<- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2,
mp1 common.legend = T, legend = "bottom")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp2 myMarkers = myMs[c(1:3)], myColor = "steelblue", myWidth = 0.2,
myTitle = "B) Triplet Marker Set 1",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16",
mp3 myMarkers = myMs[c(4:6)], myColor = "steelblue", myWidth = 0.2,
myTitle = "C) Triplet Marker Set 2",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp2, mp3, ncol = 2, legend = "none")
mp2 #
<- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
mp #
ggsave("Figure_05a.png", mp, width = 8, height = 8, bg = "white", dpi = 600)
Sutherland, Canada 2017
# Prep data
<- read.csv("Data/myY_NIRS_Su17.csv")
myY_Su17 # Plot
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp1 myMarkers = myMs[1], myColor = "darkblue",
myTitle = "A) Individual Markers",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp2 myMarkers = myMs[2], myColor = "darkblue")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp3 myMarkers = myMs[3], myColor = "darkblue")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp4 myMarkers = myMs[4], myColor = "darkblue",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp5 myMarkers = myMs[5], myColor = "darkblue")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp6 myMarkers = myMs[6], myColor = "darkblue")
#
<- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2,
mp1 common.legend = T, legend = "bottom")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp2 myMarkers = myMs[c(1:3)], myColor = "darkblue", myWidth = 0.2,
myTitle = "B) Triplet Marker Set 1",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17",
mp3 myMarkers = myMs[c(4:6)], myColor = "darkblue", myWidth = 0.2,
myTitle = "C) Triplet Marker Set 2",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp2, mp3, ncol = 2, legend = "none")
mp2 #
<- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
mp #
ggsave("Figure_05b.png", mp, width = 8, height = 8, bg = "white", dpi = 600)
Rosthern, Canada 2016
# Prep data
<- read.csv("Data/myY_NIRS_Ro16.csv")
myY_Ro16 #
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp1 myMarkers = myMs[1], myColor = "darkorange",
myTitle = "A) Individual Markers",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp2 myMarkers = myMs[2], myColor = "darkorange")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp3 myMarkers = myMs[3], myColor = "darkorange")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp4 myMarkers = myMs[4], myColor = "darkorange",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp5 myMarkers = myMs[5], myColor = "darkorange")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp6 myMarkers = myMs[6], myColor = "darkorange")
#
<- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2,
mp1 common.legend = T, legend = "bottom")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp2 myMarkers = myMs[c(1:3)], myColor = "darkorange", myWidth = 0.2,
myTitle = "B) Triplet Marker Set 1",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
mp3 myMarkers = myMs[c(4:6)], myColor = "darkorange", myWidth = 0.2,
myTitle = "C) Triplet Marker Set 2",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp2, mp3, ncol = 2, legend = "none")
mp2 #
<- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
mp #
ggsave("Figure_05c.png", mp, width = 8, height = 8, bg = "white", dpi = 600)
Rosthern, Canada 2017
# Prep data
<- read.csv("Data/myY_NIRS_Ro17.csv")
myY_Ro17 #
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp1 myMarkers = myMs[1], myColor = "darkred",
myTitle = "A) Individual Markers",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp2 myMarkers = myMs[2], myColor = "darkred")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp3 myMarkers = myMs[3], myColor = "darkred")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp4 myMarkers = myMs[4], myColor = "darkred",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp5 myMarkers = myMs[5], myColor = "darkred")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp6 myMarkers = myMs[6], myColor = "darkred")
#
<- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2,
mp1 common.legend = T, legend = "bottom")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp2 myMarkers = myMs[c(1:3)], myColor = "darkred", myWidth = 0.2,
myTitle = "B) Triplet Marker Set 1",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17",
mp3 myMarkers = myMs[c(4:6)], myColor = "darkred", myWidth = 0.2,
myTitle = "C) Triplet Marker Set 2",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp2, mp3, ncol = 2, legend = "none")
mp2 #
<- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
mp #
ggsave("Figure_05d.png", mp, width = 8, height = 8, bg = "white", dpi = 600)
Additional Marker Plots
<- read.csv("Supplemental_table_01.csv") %>%
xx arrange(desc(X.log10.p.)) %>% filter(!duplicated(SNP)) %>% slice(1:200)
#
<- 1
counter for(i in xx$SNP) {
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", myMarkers = i,
mp1 myColor = "steelblue", myTitle = "A) Su16",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", myMarkers = i,
mp2 myColor = "darkblue", myTitle = "B) Su17",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", myMarkers = i,
mp3 myColor = "darkorange", myTitle = "C) Ro16",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", myMarkers = i,
mp4 myColor = "darkred", myTitle = "D) Ro17",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2,
mp common.legend = T, legend = "bottom")
#
ggsave(paste0("additional/Markers/pvalue/",
::str_pad(counter, 3, pad = "0"),"_",i,".png"), mp,
stringrwidth = 8, height = 6, bg = "white")
# Loop conditions
<- counter + 1
counter
}#
<- read.csv("Supplemental_table_01.csv") %>%
xx arrange(desc(abs(Effect))) %>% filter(!duplicated(SNP)) %>% slice(1:200)
#
<- 1
counter for(i in xx$SNP) {
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", myMarkers = i,
mp1 myColor = "steelblue", myTitle = "A) Su16",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", myMarkers = i,
mp2 myColor = "darkblue", myTitle = "B) Su17",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", myMarkers = i,
mp3 myColor = "darkorange", myTitle = "C) Ro16",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", myMarkers = i,
mp4 myColor = "darkred", myTitle = "D) Ro17",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2,
mp common.legend = T, legend = "bottom")
#
ggsave(paste0("additional/Markers/effect/",
::str_pad(counter, 3, pad = "0"),"_",i,".png"), mp,
stringrwidth = 8, height = 6, bg = "white")
# Loop conditions
<- counter + 1
counter
}#
<- read.csv("Supplemental_table_01.csv") %>%
xx arrange(Chr, Pos) %>% filter(!duplicated(SNP))
#
<- 1
counter for(i in xx$SNP) {
#
<- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", myMarkers = i,
mp1 myColor = "steelblue", myTitle = "A) Su16",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", myMarkers = i,
mp2 myColor = "darkblue", myTitle = "B) Su17",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", myMarkers = i,
mp3 myColor = "darkorange", myTitle = "C) Ro16",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", myMarkers = i,
mp4 myColor = "darkred", myTitle = "D) Ro17",
myYlab = "Protein\n(g / 100g dry seed)")
#
<- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2,
mp common.legend = T, legend = "bottom")
#
ggsave(paste0("additional/Markers/chr/",i,".png"), mp,
width = 8, height = 6, bg = "white")
# Loop conditions
<- counter + 1
counter }
Supplemental Figure 5 - MAS Markers
# Prep data
<- read.csv("Data/MAS_Markers.csv")
xm # Create a plotting function
<- function(myExpt, myColor) {
gg_MASMarkers <- myG %>% filter(rs %in% xm$SNP) %>%
xx mutate(rs = factor(rs, levels = xm$SNP))
#
for(i in 12:ncol(xx)) { xx[,i] <- ifelse(!grepl("G|C|A|T", xx[,i]), NA, xx[,i]) }
#
<- xx %>% gather(Name, Allele, 12:ncol(.))
xx #
<- read.csv("Data/myD_LDP.csv") %>%
myLDP mutate(Name = gsub(" ", "_", Name),
Name = gsub("-", "\\.", Name),
Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
#
<- d2 %>% filter(AminoAcid == "Protein", ExptShort == myExpt) %>%
xY select(Name, Value) %>%
mutate(Name = gsub(" ", "_", Name),
Name = gsub("-", "\\.", Name),
Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
<- xx %>% left_join(xY, by = "Name") %>%
xx mutate(Value = ifelse(is.na(Allele), NA, Value)) %>%
left_join(select(myLDP, Name, Origin), by = "Name") %>%
filter(!is.na(Allele))
<- xx %>% filter(Origin %in% c("Canada","USA"))
xc #
ggplot(xx, aes(x = Allele, y = Value)) +
geom_violin(fill = myColor, color = NA, alpha = 0.7) +
geom_boxplot(width = 0.1, coef = 5) +
geom_quasirandom(data = xc, alpha = 0.7, size = 0.5) +
facet_wrap(rs ~ ., scales = "free_x", ncol = 6) +
+
theme_AGL theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = myExpt, x = "Allele", y = "Protein (g / 100g dry seed)")
}# Plot
<- gg_MASMarkers(myExpt = "Su16", myColor = "steelblue")
mp ggsave("Supplemental_Figure_05a.png", mp, width = 12, height = 12, dpi = 300)
#
<- gg_MASMarkers(myExpt = "Su17", myColor = "darkblue")
mp ggsave("Supplemental_Figure_05b.png", mp, width = 12, height = 12, dpi = 300)
#
<- gg_MASMarkers(myExpt = "Ro16", myColor = "darkorange")
mp ggsave("Supplemental_Figure_05c.png", mp, width = 12, height = 12, dpi = 300)
#
<- gg_MASMarkers(myExpt = "Ro17", myColor = "darkred")
mp ggsave("Supplemental_Figure_05d.png", mp, width = 12, height = 12, dpi = 300)
Additional Figures
Additional Figure 1 - Total Protein by Entry
# Prep data
<- d2 %>% filter(AminoAcid == "Protein", ExptShort == "Su16") %>%
x1 arrange(Value) %>% pull(Entry)
<- d2 %>% filter(AminoAcid == "Protein") %>%
xx mutate(Entry = factor(Entry, levels = x1))
# Plot
<- ggplot(xx, aes(x = Entry, y = Value, color = Expt)) +
mp geom_point(alpha = 0.7, pch = 16) +
scale_color_manual(values = myCs_Expt) +
scale_y_continuous(breaks = 25:35) +
+
theme_AGL theme(legend.position = "bottom",
panel.grid.major.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
guides(color = guide_legend(nrow = 2)) +
labs(title = "Total Protein", x = "Entry",
y = "Protein (g / 100g dry seed)")
ggsave("Additional/Additional_Figure_01.png", mp, width = 6, height = 4, dpi = 600)
Additional Figure 2 - Amino Acid Family by Expt
# Prep data
<- c("Family.Glutamate", "Family.Aspartate", "Family.Pyruvate",
myFams1 "Family.Serine","Family.Histidine","Family.Aromatic")
<- c("Glutamte Family", "Aspartate Family", "Pyruvate Family",
myFams2 "Serine Family", "Histidine Family", "Aromatic Family")
<- d3 %>% filter(!grepl("Perc", AminoAcidFamily)) %>%
xx mutate(AminoAcidFamily = plyr::mapvalues(AminoAcidFamily, myFams1, myFams2))
# Plot
<- ggplot(xx, aes(x = ExptShort, y = Value, fill = Expt)) +
mp geom_boxplot(alpha = 0.7, coef = 3.5) +
scale_fill_manual(values = myCs_Expt) +
facet_grid(. ~ AminoAcidFamily, labeller = label_wrap_gen(width = 10)) +
+
theme_AGL theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Amino Acid Concentration In A Lentil Diversity Panel",
y = "g / 100g dry seed", x = "")
ggsave("Additional/Additional_Figure_02.png", mp, width = 8, height = 5, dpi = 600)
Additional Figure 3 - Proteins by Expt
# Prep data
<- d2 %>% filter(AminoAcid != "Protein")
xx # Plot
<- ggplot(xx, aes(x = Expt, y = Value, fill = Expt)) +
mp geom_boxplot(alpha = 0.7) +
facet_wrap(. ~ AminoAcid, scales = "free_y", ncol = 9) +
scale_fill_manual(name = NULL, values = myCs_Expt) +
+
theme_AGL theme(legend.position = "bottom",
axis.text.x = element_blank(),
axis.ticks.x = element_blank()) +
labs(title = "Amino Acid Concentration In A Lentil Diversity Panel",
y = "g / 100g dry seed", x = NULL)
ggsave("Additional/Additional_Figure_03.png", mp, width = 12, height = 6, dpi = 600)
Additional Figure 4 - Cotyledon Color
# Prep data
<- c("Red", "Yellow", "Red/Yellow", "Green")
myCots <- d2 %>% filter(AminoAcid == "Protein") %>%
xx left_join(myLDP, by = c("Entry", "Name")) %>%
mutate(CotyledonColor = factor(CotyledonColor, levels = myCots))
# Plot
<- ggplot(xx %>% filter(ExptShort == "Su16"),
mp1 aes(x = Origin, y = Value, color = CotyledonColor)) +
geom_quasirandom(alpha = 0.7, pch = 16) +
facet_grid(. ~ Region, scales = "free_x", space = "free_x") +
scale_color_manual(values = c("darkred","darkorange","black","darkgreen")) +
+
theme_AGL theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom") +
guides(color = guide_legend(override.aes = list(size = 3))) +
labs(title = "A) Protein Concentration in Sutherland, Canada 2016",
x = NULL, y = "g / 100g dry seed")
#
<- ggplot(xx %>% filter(ExptShort == "Su16"),
mp2 aes(x = CotyledonColor, y = Value, color = CotyledonColor)) +
geom_quasirandom(alpha = 0.7, pch = 16) +
facet_grid(. ~ Region, scales = "free_x", space = "free_x") +
scale_color_manual(values = c("darkred","darkorange","black","darkgreen")) +
+
theme_AGL theme(legend.position = "none") +
labs(title = "B) Protein Concentration in Sutherland, Canada 2016",
x = NULL, y = "g / 100g dry seed")
#
<- ggarrange(mp1, mp2, ncol = 1, nrow = 2, heights = c(1.4,1))
mp ggsave("Additional/Additional_Figure_04.png", mp, width = 10, height = 8, dpi = 600)
Additional Figure 5 - Size
# Prep data
<- d2 %>% filter(AminoAcid == "Protein") %>% rename(Protein=Value)
x1 <- myLDP %>% select(Name, CotyledonColor, SeedMass1000.2017, Size, DTF_Cluster, Source)
x2 <- left_join(x1, x2, by = "Name") %>%
xx mutate(Size = factor(Size, levels = c("S","M","L","XL")))
# Plot
<- ggplot(xx, aes(x = Size, y = Protein)) +
mp geom_violin(aes(fill = ExptShort), alpha = 0.7) +
geom_boxplot(width = 0.1) +
scale_fill_manual(values = myCs_Expt) +
facet_wrap(ExptShort ~ ., scales = "free_y", ncol = 2) +
+
theme_AGL theme(legend.position = "none") +
labs(title = "Protein x Seed Size",
x = "Seed Size", y = "Protein (g / 100g dry seed)")
ggsave("Additional/Additional_Figure_05.png", mp, width = 6, height = 4, dpi = 600)
Additional Figure 6 - Stability
# Prep data
<- read.csv("Additional/Pulses/lentils-quality-report.csv")
xx <- xx %>% group_by(Grade) %>% summarise(Avg = round(mean(Mean),1))
xm <- xx %>% left_join(xm, by = "Grade")
xx # Plot
<- ggplot(xx, aes(x = Year, y = Mean, color = Grade)) +
mp geom_hline(aes(yintercept = Avg), alpha = 0.7) +
geom_point(size = 0.75, alpha = 0.6) + geom_line() +
geom_errorbar(aes(ymin = Min, ymax = Max), alpha = 0.5) +
facet_grid(. ~ Grade + paste("Mean =", Avg)) +
scale_x_continuous(breaks = seq(2010,2022, by = 2)) +
scale_y_continuous(breaks = seq(22,32, by = 2), minor_breaks = 22:32) +
+
theme_AGL theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Saskatchewan Lentil Protein Concentration",
subtitle = "Data: Dr. Ning Wang (Canadian Grain Commision)",
y = "g / 100g dry seed", x = NULL)
ggsave("Additional/Additional_Figure_06.png", mp, width = 8, height = 4, dpi = 600)
Additional Figure 7 - Pulses
# Prep data
<- readxl::read_xlsx("Additional/Pulses/pulses-quality.xlsx", "mean") %>%
x1 gather(Crop, Mean, 2:ncol(.))
<- readxl::read_xlsx("Additional/Pulses/pulses-quality.xlsx", "sd") %>%
x2 gather(Crop, SD, 2:ncol(.))
<- left_join(x1, x2, by = c("Amino Acid","Crop")) %>%
xx mutate(`Amino Acid` = factor(`Amino Acid`, levels = myPs))
# Plot
<- ggplot(xx, aes(x = `Amino Acid`, y = Mean, fill = Crop)) +
mp geom_col(position = "dodge", alpha = 0.7) +
geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD),
position = "dodge", linewidth = 0.2, alpha = 0.7) +
+
theme_AGL theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Amino Acid Concentration", x = NULL)
ggsave("Additional/Additional_Figure_07.png", mp, width = 8, height = 4, dpi = 600)
© Derek Michael Wright