Evaluating the breeding potential of cultivated lentils for protein and amino acid concentration and quality

unpublished


Introduction

This vignette contains the R code and analysis done for the paper:

which is follow-up to:


This work done as part of the AGILE & EVOLVES projects at the University of Saskatchewan along with collaboration with partners at the University of Manitoba.


Data Preparation

# Load Libraries
library(tidyverse)
library(ggbeeswarm)
library(ggpubr)
library(FactoMineR)
library(plotly)
library(htmlwidgets)
# Create plotting theme
theme_AGL <- theme_bw() + 
  theme(strip.background   = element_rect(colour = "black", fill = NA, size = 0.5),
        panel.background   = element_rect(colour = "black", fill = NA, size = 0.5),
        panel.border       = element_rect(colour = "black", size = 0.5),
        panel.grid         = element_line(color  = alpha("black", 0.1), size = 0.5),
        panel.grid.minor.x = element_blank(), 
        panel.grid.minor.y = element_blank(),
        legend.key = element_rect(color = NA))
# Prep data
myPs <- c("Protein", "Glutamate", "Aspartate", "Arginine",
          "Leucine", "Lysine", "Phenylalanine", "Serine", "Valine", 
          "Isoleucine", "Proline", "Alanine", "Glycine", "Threonine", 
          "Histidine", "Tyrosine", "Methionine", "Cysteine", "Tryptophan")
myEs1 <- c("Sutherland, Canada 2016", "Rosthern, Canada 2016",
           "Sutherland, Canada 2017", "Rosthern, Canada 2017")
myEs2 <- c("Su16", "Ro16", "Su17", "Ro17")
myCs_Expt <- c("steelblue", "darkorange", "darkblue", "darkred")
myCs_Region <- c("darkred", "darkgreen", "darkorange", "darkblue", "steelblue")
myCs_Clusters <- c("red4", "darkorange3", "blue2", "deeppink3", 
                      "steelblue", "darkorchid4", "darkslategray", "chartreuse4")
# Wet Chemistry Data
d1 <- read.csv("Data/myD_Protein_WetChem.csv") %>%
  select(-Plot, -Rep, -Sample.Name..1st..text.) %>%
  rename(Year=Planting.Date..date.) %>%
  gather(AminoAcid, Value, 5:ncol(.)) %>%
  mutate(AminoAcid = gsub("Protein....", "Protein", AminoAcid),
         AminoAcid = gsub("Glutamic.acid", "Glutamate", AminoAcid),
         AminoAcid = gsub("Aspartic.acid", "Aspartate", AminoAcid),
         AminoAcid = gsub("..1st....", "", AminoAcid),
         AminoAcid = factor(AminoAcid, levels = myPs),
         Expt = factor(paste(Location, Year), levels = myEs1),
         ExptShort = plyr::mapvalues(Expt, myEs1, myEs2),
         Value = round(as.numeric(Value), 4))
# NIRS Data
d2 <- read.csv("Data/myD_Protein_NIRS.csv") %>% 
  select(-Plot, -Rep, -Sample.Name..1st..text.) %>%
  rename(Year=Planting.Date..date.) %>%
  gather(AminoAcid, Value, 5:ncol(.)) %>%
  mutate(AminoAcid = gsub("Glutamic.acid", "Glutamate", AminoAcid),
         AminoAcid = gsub("Aspartic.acid", "Aspartate", AminoAcid),
         AminoAcid = gsub("..1st....", "", AminoAcid),
         AminoAcid = factor(AminoAcid, levels = myPs),
         Expt = factor(paste(Location, Year), levels = myEs1),
         ExptShort = plyr::mapvalues(Expt, myEs1, myEs2),
         Value = round(Value, 4))
#
myLDP <- read.csv("Data/myD_LDP.csv")
# Protein Families
myPFs <- c("Family.Glutamate", "Family.Aspartate", "Family.Pyruvate", 
           "Family.Serine", "Family.Histidine", "Family.Aromatic",
           "Perc.Family.Glutamate", "Perc.Family.Aspartate", "Perc.Family.Pyruvate",
           "Perc.Family.Serine", "Perc.Family.Histidine", "Perc.Family.Aromatic")
d3 <- d2 %>% spread(AminoAcid, Value) %>%
  mutate(Family.Glutamate = Glutamate + Proline + Arginine, 
         Family.Aspartate = Aspartate + Threonine + Isoleucine + Methionine + Lysine,
         Family.Pyruvate  = Alanine + Valine + Leucine,
         Family.Serine    = Serine + Glycine + Cysteine,
         Family.Histidine = Histidine,
         Family.Aromatic  = Tryptophan + Phenylalanine + Tyrosine ) %>%
  mutate(Perc.Family.Glutamate = 100 * Family.Glutamate / Protein,
         Perc.Family.Aspartate = 100 * Family.Aspartate / Protein, 
         Perc.Family.Pyruvate  = 100 * Family.Pyruvate / Protein,
         Perc.Family.Serine    = 100 * Family.Serine / Protein,
         Perc.Family.Histidine = 100 * Family.Histidine / Protein,
         Perc.Family.Aromatic  = 100 * Family.Aromatic / Protein) %>%
  select(Name, Expt, ExptShort, 
         Family.Glutamate, Family.Aspartate, Family.Histidine,
         Family.Pyruvate, Family.Serine, Family.Aromatic,
         #
         Perc.Family.Glutamate, Perc.Family.Aspartate, Perc.Family.Histidine,
         Perc.Family.Pyruvate, Perc.Family.Serine, Perc.Family.Aromatic) %>%
  gather(AminoAcidFamily, Value, 4:ncol(.)) %>%
  mutate(AminoAcidFamily = factor(AminoAcidFamily, levels = myPFs))
# Essential Amino Acid Ratio
myEAA <- myEAA <- c("Histidine","Isoleucine","Leucine","Lysine","Methionine",
                    "Phenylalanine","Threonine","Tryptophan","Valine")
x1 <- d2 %>% filter(AminoAcid == "Protein") %>%
  select(Name, Expt, ExptShort, Protein=Value)
x2 <- d2 %>% filter(AminoAcid %in% myEAA) %>%
  group_by(Name, ExptShort) %>% 
  summarise(Essential.AA = sum(Value)) %>% ungroup()
d4 <- left_join(x1, x2, by = c("Name", "ExptShort")) %>% 
  mutate(Perc.Essential.AA = 100 * Essential.AA / Protein)

GWAS

Prepare Data For GWAS

# Function to fix names for GWAS
fixNames <- function(xx) {
  xx %>% mutate(Name = gsub(" ", "_", Name),
         Name = gsub("-", "\\.", Name),
         Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
}
# Ro16 data
myY <- d2 %>% filter(ExptShort == "Ro16") %>%
  select(-Entry, -Location, -Year, -Expt) %>%
  mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
  select(-ExptShort) %>%
  spread(AminoAcid, Value) %>%
  fixNames()
write.csv(myY, "Data/myY_NIRS_Ro16.csv", row.names = F)
# Ro17 data
myY <- d2 %>% filter(ExptShort == "Ro17") %>%
  select(-Entry, -Location, -Year, -Expt) %>%
  mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
  select(-ExptShort) %>%
  spread(AminoAcid, Value) %>%
  fixNames()
write.csv(myY, "Data/myY_NIRS_Ro17.csv", row.names = F)
# Su16 data
myY <- d2 %>% filter(ExptShort == "Su16") %>%
  select(-Entry, -Location, -Year, -Expt) %>%
  mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
  select(-ExptShort) %>%
  spread(AminoAcid, Value) %>%
  fixNames()
write.csv(myY, "Data/myY_NIRS_Su16.csv", row.names = F)
# Su17 data
myY <- d2 %>% filter(ExptShort == "Su17") %>%
  select(-Entry, -Location, -Year, -Expt) %>%
  mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
  select(-ExptShort) %>%
  spread(AminoAcid, Value) %>%
  fixNames()
write.csv(myY, "Data/myY_NIRS_Su17.csv", row.names = F)
# All Data
myY <- d2 %>% 
  select(-Entry, -Location, -Year, -Expt) %>%
  mutate(AminoAcid = paste(AminoAcid, ExptShort, sep = "_")) %>%
  select(-ExptShort) %>%
  spread(AminoAcid, Value) %>%
  fixNames()
write.csv(myY, "Data/myY_NIRS.csv", row.names = F)
# Covariate Data
myCV <- myLDP %>% 
  select(Name, DTF_Ro16, DTF_Ro17, DTF_Su16, DTF_Su17,
               REP_Ro16, REP_Ro17, REP_Su16, REP_Su17) %>%
  fixNames()
write.csv(myCV, "Data/myCV.csv", row.names = F)

Run GWAS

# devtools::install_github("jiabowang/GAPIT3")
library(GAPIT3)
#
myG <- read.csv("Data/myG_LDP.csv", header = F)
myCV <- read.csv("Data/myCV.csv")
# Run GWAS on all data
myY <- read.csv("Data/myY_NIRS.csv")
myGAPIT <- GAPIT(
  Y = myY,
  G = myG,
  PCA.total = 4,
  model = c("MLM","FarmCPU","Blink"),
  Phenotype.View = F
)
# Run GWAS for Ro16 with CVs
myY_Ro16 <- read.csv("Data/myY_NIRS_Ro16.csv")
myGAPIT <- GAPIT(
  Y = myY_Ro16,
  G = myG,
  CV = myCV[,c("Name","DTF_Ro16","REP_Ro16")],
  PCA.total = 4,
  model = c("MLM","FarmCPU","Blink"),
  Phenotype.View = F
)
# Run GWAS for Ro17 with CVs
myY_Ro17 <- read.csv("Data/myY_NIRS_Ro17.csv")
myGAPIT <- GAPIT(
  Y = myY_Ro17,
  G = myG,
  CV = myCV[!is.na(myCV$REP_Ro17),c("Name","DTF_Ro17","REP_Ro17")],
  PCA.total = 4,
  model = c("MLM","FarmCPU","Blink"),
  Phenotype.View = F
)
# Run GWAS for Su16 with CVs
myY_Su16 <- read.csv("Data/myY_NIRS_Su16.csv")
myGAPIT <- GAPIT(
  Y = myY_Su16,
  G = myG,
  CV = myCV[,c("Name","DTF_Su16","REP_Su16")],
  PCA.total = 4,
  model = c("MLM","FarmCPU","Blink"),
  Phenotype.View = F
)
# Run GWAS for Su17 with CVs
myY_Su17 <- read.csv("Data/myY_NIRS_Su17.csv")
myGAPIT <- GAPIT(
  Y = myY_Su17,
  G = myG,
  CV = myCV[!is.na(myCV$REP_Su17),c("Name","DTF_Su17","REP_Su17")],
  PCA.total = 4,
  model = c("MLM","FarmCPU","Blink"),
  Phenotype.View = F
)

Post GWAS

# devtools::install_github("derekmichaelwright/gwaspr")
library(gwaspr)
#
myG <- read.csv("Data/myG_LDP.csv", header = T)
myMs <- c(# Marker set #1
          "Lcu.2RBY.Chr3p339102503", "Lcu.2RBY.Chr5p327505937", "Lcu.2RBY.Chr5p467479275",
          # Marker set #2
          "Lcu.2RBY.Chr1p437385632", "Lcu.2RBY.Chr4p432694216", "Lcu.2RBY.Chr6p411536500") 
myCs <- c(rep("red",3), rep("blue",3))

Supplemental Table 1 - Significant Results

# reorder GWAS results tables and compile significant results
myR1 <- list_Result_Files("GWAS_Results/")
order_GWAS_Results(folder = "GWAS_Results/", files = myR1)
x1 <- table_GWAS_Results("GWAS_Results/", myR1, threshold = 6.7, sug.threshold = 5.3)
#
myR2 <- list_Result_Files("GWAS_Results_CV_DTF_REP/")
order_GWAS_Results(folder = "GWAS_Results_CV_DTF_REP/", files = myR2)
x2 <- table_GWAS_Results("GWAS_Results_CV_DTF_REP/", myR2, threshold = 6.7, sug.threshold = 5.3)
#
x1 <- x1 %>% mutate(CV = "NONE") %>% arrange(P.value)
x2 <- x2 %>% mutate(CV = "DTF+REP") %>% arrange(P.value)
xx <- bind_rows(x1, x2)
write.csv(xx, "Supplemental_table_01.csv", row.names = F)
xx <- read.csv("Supplemental_table_01.csv")
DT::datatable(xx)

Manhattan plots

# Create custom manhattan plots
myTs <- list_Traits("GWAS_Results/")
for(i in myTs) {
  # No CV
  mp1 <- gg_Manhattan(folder = "GWAS_Results/", trait = i, 
                      threshold = 6.7, sug.threshold = 5.3, legend.rows = 2,
                      vlines = myMs, vline.colors = myCs )
  # With CV
  mp2 <- gg_Manhattan(folder = "GWAS_Results_CV_DTF_REP/", trait = i, facet = F,
                      title = paste(i, "| CV = DTF + REP"),
                      threshold = 6.7, sug.threshold = 5.3, legend.rows = 2,
                      vlines = myMs, vline.colors = myCs)
  # Bind together
  i1 <- substr(i, 1, regexpr("_",i)-1) 
  i2 <- substr(i, regexpr("_",i)+1, nchar(i))
  mp <- ggarrange(mp1, mp2, nrow = 2, ncol = 1, common.legend = T, legend = "bottom")
  ggsave(paste0("Additional/ManH/Multi_",i2,"_",i1,".png"), 
         mp, width = 12, height = 8, bg = "white")
}

Supplemental Figure 1 - Wet Chem vs NIRS

# Prep data
x1 <- d1 %>% select(Name, Expt, AminoAcid, `Wet Chemistry`=Value)
x2 <- d2 %>% select(Name, Expt, AminoAcid, NIRS=Value)
xx <- left_join(x1, x2, by = c("Name", "Expt", "AminoAcid"))
# Plot
mp <- ggplot(xx, aes(x = `Wet Chemistry`, y = NIRS)) +
  geom_point(aes(color = Expt), alpha = 0.5, pch = 16) +
  stat_smooth(geom = "line", method = "lm", alpha = 0.7, linewidth = 1.5) +
  stat_regline_equation(aes(label = ..rr.label..)) +
  facet_wrap(AminoAcid ~ ., scales = "free") +
  scale_color_manual(values = myCs_Expt) +
  theme_AGL +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(override.aes = list(size = 3))) +
  labs(title = "Concentration (g / 100g dry seed)",
       y = "Near-infrared Spectroscopy")
ggsave("Supplemental_Figure_01.png", mp, width = 10, height = 8, dpi = 600)

Supplemental Figure 2 - Total Protein & Amino Acids

# Prep data
xx <- d2 %>% 
  mutate(Family = ifelse(AminoAcid == "Protein", "Total Protein", NA),
         Family = ifelse(AminoAcid %in% c("Glutamate", "Proline",  "Arginine"), "Glutamate", Family),
         Family = ifelse(AminoAcid %in% c("Aspartate", "Threonine", "Isoleucine", "Methionine", "Lysine"), "Aspartate", Family),
         Family = ifelse(AminoAcid %in% c("Alanine", "Valine", "Leucine"), "Pyruvate", Family),
         Family = ifelse(AminoAcid %in% c("Serine", "Glycine", "Cysteine"), "Serine", Family),
         Family = ifelse(AminoAcid %in% c("Histidine"), "Histidine", Family),
         Family = ifelse(AminoAcid %in% c("Tryptophan", "Phenylalanine", "Tyrosine"), "Aromatic", Family)) %>%
  mutate(AminoAcid = factor(AminoAcid, levels = myPs),
         Family = factor(Family, levels = c("Total Protein", "Glutamate", "Aspartate", "Pyruvate",
                                            "Serine", "Histidine", "Aromatic")))
# Plot
mp <- ggplot(xx, aes(x = Value, fill = Family)) +
  geom_histogram(color = "black", alpha = 0.7, lwd = 0.2) +
  facet_grid(ExptShort ~ AminoAcid, scales = "free_x") +
  scale_x_reverse() +
  theme_AGL +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  guides(fill = guide_legend(nrow = 1)) +
  labs(title = "Lentil Diversity Panel", x = "g / 100g dry seed")
ggsave("Supplemental_Figure_02.png", mp, width = 18, height = 5, dpi = 600)

Figure 1 - Total Protein & Amino Acids

# Prep data
xx <- d2 %>% filter(AminoAcid != "Protein")
# Plot
mp1 <- ggplot(xx, aes(x = AminoAcid, y = Value, fill = Expt)) +
  geom_boxplot(alpha = 0.7, coef = 5, lwd = 0.1, position = position_dodge(width = 0.9)) +
  scale_fill_manual(name = NULL, values = myCs_Expt) +
  scale_y_continuous(breaks = 0:5) +
  theme_AGL +
  theme(legend.position = "none", 
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "A) Amino Acids", y = "g / 100g dry seed", x = NULL)
# Prep data
myFams1 <- c("Family.Glutamate", "Family.Aspartate", "Family.Pyruvate", 
             "Family.Serine", "Family.Histidine", "Family.Aromatic")
myFams2 <- c("Glutamte Family", "Aspartate Family", "Pyruvate Family", 
             "Serine Family", "Histidine Family", "Aromatic Family")
xx <- d3 %>% filter(!grepl("Perc", AminoAcidFamily)) %>% 
  mutate(AminoAcidFamily = plyr::mapvalues(AminoAcidFamily, myFams1, myFams2))
# Plot
mp2 <- ggplot(xx, aes(x = Value, fill = Expt)) +
  geom_histogram(color = "black", alpha = 0.7, lwd = 0.1) +
  facet_grid(ExptShort ~ AminoAcidFamily, scales = "free_x", labeller = label_wrap_gen(width = 10)) +
  scale_fill_manual(values = myCs_Expt) +
  theme_AGL +
  theme(legend.position = "bottom",
        legend.text = element_text(size = 8),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
  guides(fill = guide_legend(ncol = 4, override.aes = list(lwd = 0))) +
  labs(title = "B) Amino Acid Families", 
       y = "Count", x = "g / 100g dry seed")
mpl <- get_legend(mp2)
mp2 <- mp2 + theme(legend.position = "none")
#
myEAA <- myEAA <- c("Histidine", "Isoleucine", "Leucine", "Lysine", "Methionine",
                    "Phenylalanine", "Threonine", "Tryptophan", "Valine")
x1 <- d2 %>% filter(AminoAcid == "Protein") %>%
  select(Name, ExptShort, Protein=Value)
x2 <- d2 %>% filter(AminoAcid %in% myEAA) %>%
  group_by(Name, ExptShort) %>% 
  summarise(Essential = sum(Value)) %>% ungroup()
xx <- left_join(x1, x2, by = c("Name", "ExptShort")) %>% 
  mutate(Ratio = 100 * Essential / Protein)
# Plot
mp3 <- ggplot(xx, aes(x = ExptShort, y = Protein)) +
  geom_violin() +
  geom_quasirandom(aes(color = ExptShort), pch = 16, size = 1, alpha = 0.7) +
  geom_boxplot(width = 0.1, coef = 5) +
  scale_color_manual(values = myCs_Expt) +
  scale_y_continuous(breaks = c(26,28,30,32,34)) +
  theme_AGL +
  theme(legend.position = "none") +
  labs(title = "C) Total Protein", y = "g / 100g dry seed", x = NULL)
mp4 <- ggplot(xx, aes(x = ExptShort, y = Ratio)) +
  geom_violin() +
  geom_quasirandom(aes(color = ExptShort), pch = 16, size = 1, alpha = 0.7) +
  geom_boxplot(width = 0.1, coef = 5) +
  scale_color_manual(values = myCs_Expt) +
  theme_AGL +
  theme(legend.position = "none") +
  labs(title = "D) Essential Amino Acids", y = "Percent of Total Protein", x = NULL)
# Bind together
mp <- ggarrange(ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2), 
                mpl, nrow = 2, heights = c(1,0.1))
ggsave("Figure_01.png", mp, width = 10, height = 8, dpi = 600, bg = "white")

Figure 2 - Protein x Traits

# Prep data
myCots <- c("Red", "Yellow", "Red/Yellow", "Green")
xx <- d2 %>% filter(AminoAcid == "Protein") %>%
  left_join(myLDP, by = c("Entry","Name")) %>%
  mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other"),
         CotyledonColor = factor(CotyledonColor, levels = myCots)) %>%
  arrange(desc(Origin)) %>% 
  filter(ExptShort == "Su16")
# Plot
mp1 <- ggplot(xx, aes(x = Region, y = Value, color = Origin, pch = CotyledonColor)) +
  geom_quasirandom(fill = "steelblue", alpha = 0.7) +
  facet_grid(. ~ Expt) +
  scale_color_manual(values = c("black", "steelblue"), guide = F) +
  scale_shape_manual(name = "Cotyledon Color", values = c(24,25,23,22)) +
  theme_AGL +
  guides(shape = guide_legend(override.aes = list(size = 3))) +
  labs(title = "A) Protein x Region", 
       x = "Origin", y = "Protein (g / 100g dry seed)")
# Plot
mp2 <- ggplot(xx, aes(x = SeedMass1000.2017, y = Value)) +
  geom_point(aes(shape = CotyledonColor, color = Origin), fill = "steelblue", alpha = 0.7) +
  stat_smooth(geom = "line", method = "lm", se = F, 
              color = "black", alpha = 0.7, size = 1) +
  stat_regline_equation(aes(label = ..rr.label..)) +
  facet_wrap(Expt ~ .) +
  scale_color_manual(values = c("black", "steelblue"), guide = F) +
  scale_shape_manual(name = "Cotyledon Color", values = c(24,25,23,22)) +
  theme_AGL + 
  guides(shape = guide_legend(override.aes = list(size = 3))) +
  labs(title = "B) Protein x Seed Size", 
       x = "Mass of 1000 seeds (g)", y = "Protein (g / 100g dry seed)")
# Prep data
x1 <- d2 %>% filter(AminoAcid == "Protein") %>%
  select(Entry, Expt, ExptShort, Protein=Value)
x2 <- myLDP %>% select(Entry, Name, DTF_Ro16, DTF_Ro17, DTF_Su16, DTF_Su17) %>%
  gather(ExptShort, DTF, 3:6) %>%
  mutate(ExptShort = gsub("DTF_", "", ExptShort))
xx <- left_join(x1, x2, by = c("Entry", "ExptShort")) %>%
  mutate(ExptShort = factor(ExptShort, levels = myEs2)) %>%
  left_join(myLDP, by = c("Entry","Name")) %>%
  mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other")) %>%
  arrange(desc(Origin))
# Plot
mp3 <- ggplot(xx, aes(x = DTF, y = Protein)) +
  geom_point(aes(fill = Expt), color = alpha("white",0), pch = 21) +
  geom_point(aes(color = Origin), fill = alpha("white",0), pch = 21) +
  stat_smooth(geom = "line", method = "lm", se = F, 
              color = "black", alpha = 0.7, size = 1) +
  stat_regline_equation(aes(label = ..rr.label..)) +
  scale_fill_manual(values = alpha(myCs_Expt,0.5)) +
  scale_color_manual(values = c("black", alpha("white",0))) +
  guides(color = guide_legend(nrow = 2, override.aes = list(size = 2.5, fill = "grey50")),
         fill = guide_legend(nrow = 2, override.aes = list(size = 3, alpha = 0.9))) +
  theme_AGL + 
  labs(title = "C) Protein x DTF", 
       x = "Days from sowing to flower (days)", y = "Protein (g / 100g dry seed)")
# Prep data
x1 <- d2 %>% filter(AminoAcid == "Protein") %>%
  select(Entry, Expt, ExptShort, Protein=Value)
x2 <- myLDP %>% select(Entry, Name, REP_Ro16, REP_Ro17, REP_Su16, REP_Su17) %>%
  gather(ExptShort, REP, 3:6) %>%
  mutate(ExptShort = gsub("REP_", "", ExptShort))
xx <- left_join(x1, x2, by = c("Entry", "ExptShort")) %>%
  left_join(myLDP, by = c("Entry","Name")) %>%
  mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other")) %>%
  arrange(desc(Origin))
# Plot
mp4 <- ggplot(xx, aes(x = REP, y = Protein)) +
  geom_point(aes(fill = Expt), color = alpha("white",0), pch = 21) +
  geom_point(aes(color = Origin), fill = alpha("white",0), pch = 21) +
  stat_smooth(geom = "line", method = "lm", se = F, 
              color = "black", alpha = 0.7, size = 1) +
  stat_regline_equation(aes(label = ..rr.label..)) +
  scale_fill_manual(values = alpha(myCs_Expt,0.6)) +
  scale_color_manual(values = c("black", alpha("white",0))) +
  guides(color = guide_legend(nrow = 2, override.aes = list(size = 2.5, fill = "grey50")),
         fill = guide_legend(nrow = 2, override.aes = list(size = 3, alpha = 0.9))) +
  theme_AGL + 
  labs(title = "D) Protein x REP", 
       x = "Reproductive period (days)", y = "Protein (g / 100g dry seed)")
# Bind together
mp1 <- ggarrange(mp1, mp2, ncol = 2, common.legend = T, legend = "bottom")
mp2 <- ggarrange(mp3, mp4, ncol = 2, common.legend = T, legend = "bottom")
mp <- ggarrange(mp1, mp2, ncol = 1, heights = c(1,1.1))
ggsave("Figure_02.png", mp, width = 8, height = 8, dpi = 600, bg = "white")

Figure 3 & Supplemental Figure 3 - Selections

# Prep data
gg_Selection <- function(myAmino = "Protein", myExpt = "Ro16", 
                         myTitle = myExpt, myColor = "steelblue") {
  # Prep data
  xi <- myLDP %>% select(Name, Origin, Ro16=DTF_Ro16, Ro17=DTF_Ro17, Su16=DTF_Su16, Su17=DTF_Su17) %>%
    gather(ExptShort, DTF, Ro16, Ro17, Su16, Su17)
  xx <- d2 %>% filter(AminoAcid == myAmino) %>%
    left_join(xi, by = c("Name", "ExptShort")) %>% 
    mutate(Selection = NA)
  #
  for(i in myEs2) {
    myYmin <- quantile(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(Value), 0.75)
    myYmax <- max(xx %>% filter(ExptShort == i) %>% pull(Value))
    myXmin <- min(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(DTF))
    myXmax <- max(xx %>% filter(ExptShort == i) %>% pull(DTF))
    xx <- xx %>% 
      mutate(Selection = ifelse((ExptShort == i & Value > myYmin & DTF > myXmin), "Yes", Selection))
  }
  ss <- xx %>% select(Name, ExptShort, Selection) %>% spread(ExptShort, Selection) %>% 
    filter(!is.na(Ro16), !is.na(Ro17), !is.na(Su16), !is.na(Su17)) %>% # must be "Yes" in all Expt
    mutate(Selection = "Yes") %>% select(Name, Selection)
  xx <- xx %>% filter(ExptShort == myExpt) %>% select(-Selection) %>%
    left_join(ss, by = "Name") %>%
    mutate(Origin2 = ifelse(Origin %in% c("Canada", "USA"), "Local", "Other"),
           Selection = ifelse(is.na(Selection), "No", Selection),
           Selection = ifelse(Origin2 == "Local", "Local", Selection)) %>%
    arrange(desc(Origin2))
  myNum <- sum(xx$Selection == "Yes")
  xx <- xx %>% 
    mutate(Selection = ifelse(Selection == "Yes", paste0("Yes (", myNum, ")"), Selection),
           Selection = factor(Selection, levels = c("No", paste0("Yes (", myNum, ")"), "Local")))
  #
  myYmin <- quantile(xx %>% filter(Origin2 == "Local") %>% pull(Value), 0.75)
  myYmax <- max(xx %>% pull(Value))
  myXmin <- min(xx %>% filter(Origin2 == "Local") %>% pull(DTF))
  myXmax <- max(xx %>% pull(DTF))
  # Plot
  ggplot(xx) +
    geom_rect(xmin = myXmin, xmax = myXmax, 
              ymin = myYmin, ymax = myYmax,
              color = "black", fill = NA) +
    geom_point(aes(x = DTF, y = Value, shape = Selection, size = Selection,
                   fill = Selection, color = Selection,
                   key1 = Entry, key2 = Name, key3 = Origin), alpha = 0.5) +
    scale_color_manual(values = c(myColor, "black", "black")) +
    scale_fill_manual(values = c(myColor, myColor, "black")) +
    scale_shape_manual(values = c(0,22,8)) +
    scale_size_manual(values = c(1,1,1.5)) +
    guides(shape = guide_legend(override.aes = list(fill = "grey", color = "black", size = 2.5))) +
    theme_AGL + 
    theme(axis.title = element_text(size = 10)) +
    labs(subtitle = myTitle, 
         x = "Days from sowing to flower (days)",
         y = paste(myAmino, "(g / 100g dry seed)"))
}
#
mp1 <- gg_Selection(myExpt = "Su16", myColor = "steelblue",  myTitle ="A) Sutherland, Canada 2016")
mp2 <- gg_Selection(myExpt = "Su17", myColor = "darkblue",   myTitle ="B) Sutherland, Canada 2017")
mp3 <- gg_Selection(myExpt = "Ro16", myColor = "darkorange", myTitle ="C) Rosthern, Canada 2016")
mp4 <- gg_Selection(myExpt = "Ro17", myColor = "darkred",    myTitle ="D) Rosthern, Canada 2017")
mp <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
ggsave("Supplemental_Figure_03.png", mp, width = 6, height = 6, dpi = 600, bg = "white")
#
mp1 <- gg_Selection(myExpt = "Su16", myAmino = "Lysine", myColor = "steelblue",
                    myTitle ="A) Sutherland, Canada 2016")
mp2 <- gg_Selection(myExpt = "Su17", myAmino = "Lysine", myColor = "darkblue",
                    myTitle ="B) Sutherland, Canada 2017")
mp3 <- gg_Selection(myExpt = "Ro16", myAmino = "Lysine", myColor = "darkorange",
                    myTitle ="C) Rosthern, Canada 2016")
mp4 <- gg_Selection(myExpt = "Ro17", myAmino = "Lysine", myColor = "darkred",
                    myTitle ="D) Rosthern, Canada 2017")
mp <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
ggsave("Figure_03.png", mp, width = 6, height = 6, dpi = 600, bg = "white")
#
saveWidget(ggplotly(mp1), file="Additional/Figure_03_Su16.html")
saveWidget(ggplotly(mp2), file="Additional/Figure_03_Su17.html")
saveWidget(ggplotly(mp3), file="Additional/Figure_03_Ro16.html")
saveWidget(ggplotly(mp4), file="Additional/Figure_03_Ro17.html")

Additional Figures - AA Selections

counter <- 1
for(i in myPs) {
  # Plot
  mp1 <- gg_Selection(myExpt = "Su16", myAmino = i, myColor = "steelblue",
                      myTitle ="A) Sutherland, Canada 2016")
  mp2 <- gg_Selection(myExpt = "Su17", myAmino = i, myColor = "darkblue",
                      myTitle ="B) Sutherland, Canada 2017")
  mp3 <- gg_Selection(myExpt = "Ro16", myAmino = i, myColor = "darkorange",
                      myTitle ="C) Rosthern, Canada 2016")
  mp4 <- gg_Selection(myExpt = "Ro17", myAmino = i, myColor = "darkred",
                      myTitle ="D) Rosthern, Canada 2017")
  # Bind together
  mp <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, common.legend = T, legend = "bottom")
  ggsave(paste0("Additional/AA_Selections/Figure_03_", 
                stringr::str_pad(counter, 2, pad = "0"), "_", i,".png"), 
         mp, width = 6, height = 6, dpi = 600, bg = "white")
  # Save HTML
  saveWidget(ggplotly(mp1), 
             file = paste0("Additional/AA_Selections/Figure_03_",
                           stringr::str_pad(counter, 2, pad = "0"), "_", i, "_Su16.html"))
  # Increase loop counter
  counter <- counter + 1
}

Supplemental Table 2 - Amino Acid Selections

# Create function
DT_AA_Selection <- function(myAmino = "Protein") {
  # Prep data
  xi <- myLDP %>%
    select(Name, Origin, Ro16=DTF_Ro16, Ro17=DTF_Ro17, Su16=DTF_Su16, Su17=DTF_Su17) %>%
    gather(ExptShort, DTF, Ro16, Ro17, Su16, Su17)
  xx <- d2 %>% filter(AminoAcid == myAmino) %>%
    left_join(xi, by = c("Name", "ExptShort")) %>% 
    mutate(Selection = NA)
  #
  for(i in myEs2) {
    myYmin <- quantile(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(Value), 0.75)
    myYmax <- max(xx %>% filter(ExptShort == i) %>% pull(Value))
    myXmin <- min(xx %>% filter(ExptShort == i, Origin %in% c("Canada", "USA")) %>% pull(DTF))
    myXmax <- max(xx %>% filter(ExptShort == i) %>% pull(DTF))
    xx <- xx %>% 
      mutate(Selection = ifelse((ExptShort == i & Value > myYmin & DTF > myXmin), "Yes", Selection))
  }
  #
  ss <- xx %>% select(Entry, Name, ExptShort, Selection) %>% spread(ExptShort, Selection) %>% 
    filter(!is.na(Ro16), !is.na(Ro17), !is.na(Su16), !is.na(Su17)) %>% # must be "Yes" in all Expt
    mutate(Trait = myAmino) %>% select(Entry, Name, Trait)
}
#
xx <- bind_rows(DT_AA_Selection(myAmino = "Protein"),
                DT_AA_Selection(myAmino = "Glutamate"),
                DT_AA_Selection(myAmino = "Aspartate"),
                DT_AA_Selection(myAmino = "Arginine"),
                DT_AA_Selection(myAmino = "Leucine"),
                DT_AA_Selection(myAmino = "Phenylalanine"),
                DT_AA_Selection(myAmino = "Serine"),
                DT_AA_Selection(myAmino = "Valine"),
                DT_AA_Selection(myAmino = "Isoleucine"),
                DT_AA_Selection(myAmino = "Proline"),
                DT_AA_Selection(myAmino = "Alanine"),
                DT_AA_Selection(myAmino = "Glycine"),
                DT_AA_Selection(myAmino = "Threonine"),
                DT_AA_Selection(myAmino = "Histidine"),
                DT_AA_Selection(myAmino = "Tyrosine"),
                DT_AA_Selection(myAmino = "Methionine"),
                DT_AA_Selection(myAmino = "Cysteine"),
                DT_AA_Selection(myAmino = "Tryptophan") )
#
write.csv(xx, "Supplemental_table_02.csv", row.names = F)
xx <- read.csv("Supplemental_table_02.csv")
DT::datatable(xx)

Figure 4 - GWAS Summary

# Create plotting function
gg_GWAS_Summary <- function (folder = NULL, traits = list_Traits(), 
                             threshold = -log10(5e-08), sug.threshold = -log10(5e-06), 
                             models = c("MLM", "MLMM", "FarmCPU", "BLINK", "GLM"), 
                             colors = c("darkgreen", "darkred", "darkorange3", "steelblue", "darkgoldenrod2"), 
                             shapes = 21:25, hlines = NULL, 
                             vlines = NULL, vline.colors = rep("red", length(vlines)), 
                             vline.legend = T, title = NULL, 
                             caption = paste0("Sig Threshold = ", threshold, " = Large\nSuggestive = ", sug.threshold, " = Small"), 
                             rowread = 2000, legend.position = "bottom", lrows = 1) {
  # Prep data
  files <- list_Result_Files(folder)
  files <- files[grepl(paste(traits, collapse = "|"), files)]
  files <- files[grepl(paste(models, collapse = "|"), files)]
  xp <- NULL
  for (i in files) {
    xpi <- table_GWAS_Results(folder = folder, files = i, 
                               threshold = threshold, sug.threshold = sug.threshold)
    if (nrow(xpi) > 0) { xp <- bind_rows(xp, xpi) }
  }
  xp <- xp %>% 
    filter(!is.na(SNP)) %>% 
    arrange(Chr, Pos, P.value, Trait) %>% 
    mutate(Model = factor(Model, levels = models),
           Trait = factor(Trait, levels = rev(traits))) %>% 
    filter(!is.na(Trait)) %>% 
    arrange(desc(Model))
  x1 <- xp %>% filter(`-log10(p)` > threshold)
  x2 <- xp %>% filter(`-log10(p)` < threshold)
  xg <- read.csv(paste0(folder, files[1])) %>% 
    mutate(Trait = xp$Trait[1], 
           Trait = factor(Trait, levels = rev(traits))) 
  # Plot
  mp <- ggplot(x1, aes(x = Pos/1e+08, y = Trait)) + geom_blank(data = xg)
  #
  if (!is.null(vlines)) {
    xm <- xg %>% filter(SNP %in% vlines) %>% 
      mutate(SNP = factor(SNP, levels = vlines)) %>% 
      arrange(SNP)
    mp <- mp + 
      geom_vline(data = xm, alpha = 0.5, aes(xintercept = Pos/1e+08, color = SNP)) + 
      scale_color_manual(name = NULL, values = vline.colors)
  }
  #
  if (!is.null(hlines)) { mp <- mp + geom_hline(yintercept = hlines, alpha = 0.7) }
  #
  mp <- mp + 
    geom_point(data = x2, size = 0.75, color = "black", alpha = 0.5, 
               aes(shape = Model, fill = Model, key1 = SNP, key2 = `-log10(p)`)) + 
    geom_point(size = 2.25, color = "black", alpha = 0.5, 
               aes(shape = Model, fill = Model, key1 = SNP, key2 = `-log10(p)`)) + 
    facet_grid(. ~ Chr, drop = F, scales = "free_x", space = "free_x") + 
    scale_fill_manual(name = NULL, values = colors, breaks = models) + 
    scale_shape_manual(name = NULL, values = shapes, breaks = models) + 
    scale_x_continuous(breaks = 0:20) + 
    scale_y_discrete(drop = F) + 
    theme_AGL +
    theme(legend.position = legend.position) + 
    guides(shape = guide_legend(nrow = lrows, override.aes = list(size = 4)), 
           color = guide_legend(nrow = lrows), 
           fill = guide_legend(nrow = lrows)) + 
    labs(title = title, y = NULL, x = "100 Mbp", caption = caption)
  #
  if (vline.legend == F) { mp <- mp + guides(color = vline.legend) }
  #
  mp
}
# Prep data
myPs <- c("Protein", "Glutamate", "Aspartate", "Leucine",
          "Arginine", "Lysine", "Phenylalanine", "Valine", "Serine",
          "Proline", "Isoleucine", "Alanine", "Glycine", "Threonine", 
          "Tyrosine", "Histidine", "Methionine", "Cysteine", "Tryptophan")
#
myTs <- c(paste0(myPs, "_Su16"), paste0(myPs, "_Su17"),
          paste0(myPs, "_Ro16"), paste0(myPs, "_Ro17"))
# Plot
mp1 <- gg_GWAS_Summary(folder = "GWAS_Results/", 
                       traits = myTs,
                       models = c("MLM", "MLMM", "FarmCPU", "BLINK"),
                       colors = c("darkgreen", "darkred", "darkorange3", "steelblue"),
                       threshold = 6.7, sug.threshold = 5.3, 
                       hlines = seq(19.5,72.5, by = 19), lrows = 2,
                       vlines = myMs, vline.colors = myCs,
                       title = "A) No Covariate") +
  labs(caption = NULL) + 
  guides(fill = guide_legend(title="Model", order = 1, nrow = 2), 
         shape = guide_legend(title="Model", order = 1, nrow = 2),
         color = guide_legend(title="Marker", order = 2, byrow = T))
#
mp2 <- gg_GWAS_Summary(folder = "GWAS_Results_CV_DTF_REP/", 
                       traits = myTs,
                       models = c("MLM", "MLMM", "FarmCPU", "BLINK"),
                       colors = c("darkgreen", "darkred", "darkorange3", "steelblue"),
                       threshold = 6.7, sug.threshold = 5.3, 
                       hlines = seq(19.5, 72.5, by = 19), lrows = 2,
                       vlines = myMs, vline.colors = myCs,
                       title = "B) Covariate = DTF + REP") +
  labs(caption = NULL) + 
  guides(fill = guide_legend(title="Model", order = 1, nrow = 2), 
         shape = guide_legend(title="Model", order = 1, nrow = 2),
         color = guide_legend(title="Marker", order = 2, byrow = T))
mp <- ggarrange(mp1, mp2, ncol = 1, common.legend = T, legend = "bottom", heights = c(1,1.1))
ggsave("Figure_04.png", mp, width = 12, height = 20, bg = "white")
# Save HTML
mp1 <- ggplotly(mp1) %>% layout(showlegend = FALSE)
saveWidget(as_widget(mp1), "Additional/Figure_04_A.html",
           knitrOptions = list(fig.width = 12, fig.height = 10), 
           selfcontained = T)
mp2 <- ggplotly(mp2) %>% layout(showlegend = FALSE)
saveWidget(as_widget(mp2), "Additional/Figure_04_B.html",
           knitrOptions = list(fig.width = 12, fig.height = 10), 
           selfcontained = T)

Supplemental Figure 4 - Protein x Amino Acid

# Prep data
x1 <- d2 %>% filter(AminoAcid != "Protein")
x2 <- d2 %>% filter(AminoAcid == "Protein") %>%
  select(Name, ExptShort, Total.Protein=Value)
xx <- left_join(x1, x2, by = c("Name", "ExptShort"))
# Plot
mp <- ggplot(xx, aes(x = Total.Protein, y = Value, color = Expt)) +
  geom_point(size = 0.75, alpha = 0.6, pch = 16) +
  facet_wrap(. ~ AminoAcid, scales = "free_y", ncol = 6) +
  scale_color_manual(values = myCs_Expt) +
  theme_AGL +
  theme(legend.position = "bottom") +
  guides(color = guide_legend(override.aes = list(size = 3))) +
  labs(title = "Lentil Diversity Panel", 
       y = "Amino Acid (g / 100g dry seed)", x = "Protein (g / 100g dry seed)")
ggsave("Supplemental_Figure_04.png", mp, width = 10, height = 5.5, dpi = 600)

Figure 5 - Markers

Sutherland, Canada 2016

# Create plotting function
gg_PlotMarkers <- function (xg = myG, xy, myTrait, 
                            myMarkers, points = T, myColor = "steelblue",
                            myWidth = 0.1, myTitle = "", myYlab = "") {
    # Prep data
    xg <- xg %>% filter(rs %in% myMarkers) %>% 
      mutate(rs = factor(rs, levels = myMarkers)) %>% arrange(rs)
    for(i in 12:ncol(xg)) { if(sum(!grepl("G|C|A|T", xg[,i])) > 0) { xg[,i] <- NA } }
    xg <- xg[,!is.na(xg[1,])]
    #
    myMLabs <- paste(myMarkers, collapse = "\n")
    #
    xx <- xg %>% gather(Name, Allele, 12:ncol(.)) %>%
      group_by(Name) %>%
      summarise(Alleles = paste(Allele, collapse = ""))
    #
    myLDP <- read.csv("Data/myD_LDP.csv") %>% 
      mutate(Name = gsub(" ", "_", Name),
             Name = gsub("-", "\\.", Name),
             Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
    #
    xx <- xx %>% left_join(xy, by = "Name") %>% filter(!is.na(get(myTrait)))
    x2 <- xx %>% group_by(Alleles) %>% 
      summarise(Value = mean(get(myTrait), na.rm = T)) %>% 
      arrange(Value) 
    xx <- xx %>% mutate(Alleles = factor(Alleles, levels = x2$Alleles) ) %>%
      left_join(select(myLDP, Name, Origin), by = "Name") %>%
      mutate(Origin = ifelse(Origin %in% c("Canada","USA"), "Local", "Other"))
    #
    mp <- ggplot(xx, aes(x = Alleles, y = get(myTrait))) + 
      geom_violin(fill = "grey90", color = NA) + 
      geom_boxplot(width = myWidth, coef = 5) + 
      facet_grid(. ~ as.character(myMLabs)) +
      theme_AGL +
      theme(legend.position = "top",
            axis.text.x = element_text(angle = 45, hjust = 1)) + 
      labs(title = myTitle, y = myYlab, x = NULL)
    if(points == T) { 
      mp <- mp + 
        geom_quasirandom(aes(fill = Origin, color = Origin), pch = 21, size = 1) + 
        scale_fill_manual(values = alpha(c("black", myColor),0.7)) +
        scale_color_manual(values = c("black",alpha("white",0))) +
        guides(color = guide_legend(override.aes = list(size = 3)))
    }
    mp
}
# Prep data
myY_Su16 <- read.csv("Data/myY_NIRS_Su16.csv")
# Plot
mp1 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[1], myColor = "steelblue",
                      myTitle = "A) Individual Markers",
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[2], myColor = "steelblue")
#
mp3 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[3], myColor = "steelblue")
#
mp4 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[4], myColor = "steelblue",
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp5 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[5], myColor = "steelblue")
#
mp6 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[6], myColor = "steelblue")
#
mp1 <- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2, 
                 common.legend = T, legend = "bottom")
#
mp2 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[c(1:3)], myColor = "steelblue", myWidth = 0.2, 
                      myTitle = "B) Triplet Marker Set 1", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp3 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", 
                      myMarkers = myMs[c(4:6)], myColor = "steelblue", myWidth = 0.2, 
                      myTitle = "C) Triplet Marker Set 2", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- ggarrange(mp2, mp3, ncol = 2, legend = "none")
#
mp <- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
#
ggsave("Figure_05a.png", mp, width = 8, height = 8, bg = "white", dpi = 600)

Sutherland, Canada 2017

# Prep data
myY_Su17 <- read.csv("Data/myY_NIRS_Su17.csv")
# Plot
mp1 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[1], myColor = "darkblue", 
                      myTitle = "A) Individual Markers", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[2], myColor = "darkblue")
#
mp3 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[3], myColor = "darkblue")
#
mp4 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[4], myColor = "darkblue", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp5 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[5], myColor = "darkblue")
#
mp6 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[6], myColor = "darkblue")
#
mp1 <- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2, 
                 common.legend = T, legend = "bottom")
#
mp2 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[c(1:3)], myColor = "darkblue", myWidth = 0.2,
                      myTitle = "B) Triplet Marker Set 1", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp3 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", 
                      myMarkers = myMs[c(4:6)], myColor = "darkblue", myWidth = 0.2, 
                      myTitle = "C) Triplet Marker Set 2", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- ggarrange(mp2, mp3, ncol = 2, legend = "none")
#
mp <- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
#
ggsave("Figure_05b.png", mp, width = 8, height = 8, bg = "white", dpi = 600)

Rosthern, Canada 2016

# Prep data
myY_Ro16 <- read.csv("Data/myY_NIRS_Ro16.csv")
#
mp1 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[1], myColor = "darkorange", 
                      myTitle = "A) Individual Markers", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16",
                      myMarkers = myMs[2], myColor = "darkorange")
#
mp3 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[3], myColor = "darkorange")
#
mp4 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[4], myColor = "darkorange", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp5 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[5], myColor = "darkorange")
#
mp6 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[6], myColor = "darkorange")
#
mp1 <- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2, 
                 common.legend = T, legend = "bottom")
#
mp2 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[c(1:3)], myColor = "darkorange", myWidth = 0.2,
                      myTitle = "B) Triplet Marker Set 1", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp3 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", 
                      myMarkers = myMs[c(4:6)], myColor = "darkorange", myWidth = 0.2,
                      myTitle = "C) Triplet Marker Set 2", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- ggarrange(mp2, mp3, ncol = 2, legend = "none")
#
mp <- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
#
ggsave("Figure_05c.png", mp, width = 8, height = 8, bg = "white", dpi = 600)

Rosthern, Canada 2017

# Prep data
myY_Ro17 <- read.csv("Data/myY_NIRS_Ro17.csv")
#
mp1 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[1], myColor = "darkred", 
                      myTitle = "A) Individual Markers", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[2], myColor = "darkred")
#
mp3 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[3], myColor = "darkred")
#
mp4 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[4], myColor = "darkred", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp5 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[5], myColor = "darkred")
#
mp6 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[6], myColor = "darkred")
#
mp1 <- ggarrange(mp1, mp2, mp3, mp4, mp5, mp6, ncol = 3, nrow = 2, 
                 common.legend = T, legend = "bottom")
#
mp2 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[c(1:3)], myColor = "darkred", myWidth = 0.2, 
                      myTitle = "B) Triplet Marker Set 1", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp3 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", 
                      myMarkers = myMs[c(4:6)], myColor = "darkred", myWidth = 0.2,
                      myTitle = "C) Triplet Marker Set 2", 
                      myYlab = "Protein\n(g / 100g dry seed)")
#
mp2 <- ggarrange(mp2, mp3, ncol = 2, legend = "none")
#
mp <- ggarrange(mp1, mp2, ncol = 1, heights = c(1.3,1))
#
ggsave("Figure_05d.png", mp, width = 8, height = 8, bg = "white", dpi = 600)

Additional Marker Plots

xx <- read.csv("Supplemental_table_01.csv") %>% 
  arrange(desc(X.log10.p.)) %>% filter(!duplicated(SNP)) %>% slice(1:200)
#
counter <- 1
for(i in xx$SNP) {
  #
  mp1 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", myMarkers = i, 
                        myColor = "steelblue", myTitle = "A) Su16",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp2 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", myMarkers = i, 
                        myColor = "darkblue", myTitle = "B) Su17",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp3 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", myMarkers = i, 
                        myColor = "darkorange", myTitle = "C) Ro16",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp4 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", myMarkers = i, 
                        myColor = "darkred", myTitle = "D) Ro17",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, 
                  common.legend = T, legend = "bottom") 
  #
  ggsave(paste0("additional/Markers/pvalue/",
                stringr::str_pad(counter, 3, pad = "0"),"_",i,".png"), mp, 
         width = 8, height = 6, bg = "white")
  # Loop conditions
  counter <- counter + 1
}
#
xx <- read.csv("Supplemental_table_01.csv") %>% 
  arrange(desc(abs(Effect))) %>% filter(!duplicated(SNP)) %>% slice(1:200)
#
counter <- 1
for(i in xx$SNP) {
  #
  mp1 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", myMarkers = i, 
                        myColor = "steelblue", myTitle = "A) Su16",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp2 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", myMarkers = i, 
                        myColor = "darkblue", myTitle = "B) Su17",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp3 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", myMarkers = i, 
                        myColor = "darkorange", myTitle = "C) Ro16",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp4 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", myMarkers = i, 
                        myColor = "darkred", myTitle = "D) Ro17",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, 
                  common.legend = T, legend = "bottom")
  #
  ggsave(paste0("additional/Markers/effect/",
                stringr::str_pad(counter, 3, pad = "0"),"_",i,".png"), mp, 
         width = 8, height = 6, bg = "white")
  # Loop conditions
  counter <- counter + 1
}
#
xx <- read.csv("Supplemental_table_01.csv") %>% 
  arrange(Chr, Pos) %>% filter(!duplicated(SNP))
#
counter <- 1
for(i in xx$SNP) {
  #
  mp1 <- gg_PlotMarkers(myG, myY_Su16, myTrait = "Protein_Su16", myMarkers = i, 
                        myColor = "steelblue", myTitle = "A) Su16",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp2 <- gg_PlotMarkers(myG, myY_Su17, myTrait = "Protein_Su17", myMarkers = i, 
                        myColor = "darkblue", myTitle = "B) Su17",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp3 <- gg_PlotMarkers(myG, myY_Ro16, myTrait = "Protein_Ro16", myMarkers = i, 
                        myColor = "darkorange", myTitle = "C) Ro16",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp4 <- gg_PlotMarkers(myG, myY_Ro17, myTrait = "Protein_Ro17", myMarkers = i, 
                        myColor = "darkred", myTitle = "D) Ro17",
                        myYlab = "Protein\n(g / 100g dry seed)")
  #
  mp <- ggarrange(mp1, mp2, mp3, mp4, ncol = 2, nrow = 2, 
                  common.legend = T, legend = "bottom") 
  #
  ggsave(paste0("additional/Markers/chr/",i,".png"), mp, 
         width = 8, height = 6, bg = "white")
  # Loop conditions
  counter <- counter + 1
}

Supplemental Figure 5 - MAS Markers

# Prep data
xm <- read.csv("Data/MAS_Markers.csv")
# Create a plotting function
gg_MASMarkers <- function(myExpt, myColor) {
  xx <- myG %>% filter(rs %in% xm$SNP) %>%
    mutate(rs = factor(rs, levels = xm$SNP))
  #
  for(i in 12:ncol(xx)) { xx[,i] <- ifelse(!grepl("G|C|A|T", xx[,i]), NA, xx[,i]) }
  #
  xx <- xx %>% gather(Name, Allele, 12:ncol(.)) 
  #
  myLDP <- read.csv("Data/myD_LDP.csv") %>% 
    mutate(Name = gsub(" ", "_", Name),
           Name = gsub("-", "\\.", Name),
           Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
  #
  xY <- d2 %>% filter(AminoAcid == "Protein", ExptShort == myExpt) %>% 
    select(Name, Value) %>%
    mutate(Name = gsub(" ", "_", Name),
           Name = gsub("-", "\\.", Name),
           Name = plyr::mapvalues(Name, "3156.11_AGL", "X3156.11_AGL"))
  xx <- xx %>% left_join(xY, by = "Name") %>% 
    mutate(Value = ifelse(is.na(Allele), NA, Value)) %>%
    left_join(select(myLDP, Name, Origin), by = "Name") %>%
    filter(!is.na(Allele))
  xc <- xx %>% filter(Origin %in% c("Canada","USA"))
  #
  ggplot(xx, aes(x = Allele, y = Value)) + 
    geom_violin(fill = myColor, color = NA, alpha = 0.7) + 
    geom_boxplot(width = 0.1, coef = 5) + 
    geom_quasirandom(data = xc, alpha = 0.7, size = 0.5) +
    facet_wrap(rs ~ ., scales = "free_x", ncol = 6) +
    theme_AGL +
    theme(legend.position = "bottom",
          axis.text.x = element_text(angle = 45, hjust = 1)) + 
    labs(title = myExpt, x = "Allele", y = "Protein (g / 100g dry seed)")
}
# Plot
mp <- gg_MASMarkers(myExpt = "Su16", myColor = "steelblue")
ggsave("Supplemental_Figure_05a.png", mp, width = 12, height = 12, dpi = 300)
#
mp <- gg_MASMarkers(myExpt = "Su17", myColor = "darkblue")
ggsave("Supplemental_Figure_05b.png", mp, width = 12, height = 12, dpi = 300)
#
mp <- gg_MASMarkers(myExpt = "Ro16", myColor = "darkorange")
ggsave("Supplemental_Figure_05c.png", mp, width = 12, height = 12, dpi = 300)
#
mp <- gg_MASMarkers(myExpt = "Ro17", myColor = "darkred")
ggsave("Supplemental_Figure_05d.png", mp, width = 12, height = 12, dpi = 300)

Additional Figures

Additional Figure 1 - Total Protein by Entry

# Prep data
x1 <- d2 %>% filter(AminoAcid == "Protein", ExptShort == "Su16") %>%
  arrange(Value) %>% pull(Entry)
xx <- d2 %>% filter(AminoAcid == "Protein") %>%
  mutate(Entry = factor(Entry, levels = x1))
# Plot
mp <- ggplot(xx, aes(x = Entry, y = Value, color = Expt)) +
  geom_point(alpha = 0.7, pch = 16) +
  scale_color_manual(values = myCs_Expt) +
  scale_y_continuous(breaks = 25:35) +
  theme_AGL +
  theme(legend.position = "bottom",
        panel.grid.major.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  guides(color = guide_legend(nrow = 2)) +
  labs(title = "Total Protein", x = "Entry",
       y = "Protein (g / 100g dry seed)")
ggsave("Additional/Additional_Figure_01.png", mp, width = 6, height = 4, dpi = 600)

Additional Figure 2 - Amino Acid Family by Expt

# Prep data
myFams1 <- c("Family.Glutamate", "Family.Aspartate", "Family.Pyruvate", 
             "Family.Serine","Family.Histidine","Family.Aromatic")
myFams2 <- c("Glutamte Family", "Aspartate Family", "Pyruvate Family", 
             "Serine Family", "Histidine Family", "Aromatic Family")
xx <- d3 %>% filter(!grepl("Perc", AminoAcidFamily)) %>% 
  mutate(AminoAcidFamily = plyr::mapvalues(AminoAcidFamily, myFams1, myFams2))
# Plot
mp <- ggplot(xx, aes(x = ExptShort, y = Value, fill = Expt)) +
  geom_boxplot(alpha = 0.7, coef = 3.5) +
  scale_fill_manual(values = myCs_Expt) +
  facet_grid(. ~ AminoAcidFamily, labeller = label_wrap_gen(width = 10)) +
  theme_AGL +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Amino Acid Concentration In A Lentil Diversity Panel",
       y = "g / 100g dry seed", x = "")
ggsave("Additional/Additional_Figure_02.png", mp, width = 8, height = 5, dpi = 600)

Additional Figure 3 - Proteins by Expt

# Prep data
xx <- d2 %>% filter(AminoAcid != "Protein")
# Plot
mp <- ggplot(xx, aes(x = Expt, y = Value, fill = Expt)) +
  geom_boxplot(alpha = 0.7) +
  facet_wrap(. ~ AminoAcid, scales = "free_y", ncol = 9) +
  scale_fill_manual(name = NULL, values = myCs_Expt) +
  theme_AGL +
  theme(legend.position = "bottom",
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank()) +
  labs(title = "Amino Acid Concentration In A Lentil Diversity Panel", 
       y = "g / 100g dry seed", x = NULL)
ggsave("Additional/Additional_Figure_03.png", mp, width = 12, height = 6, dpi = 600)

Additional Figure 4 - Cotyledon Color

# Prep data
myCots <- c("Red", "Yellow", "Red/Yellow", "Green")
xx <- d2 %>% filter(AminoAcid == "Protein") %>%
  left_join(myLDP, by = c("Entry", "Name")) %>%
  mutate(CotyledonColor = factor(CotyledonColor, levels = myCots))
# Plot
mp1 <- ggplot(xx %>% filter(ExptShort == "Su16"), 
              aes(x = Origin, y = Value, color = CotyledonColor)) +
  geom_quasirandom(alpha = 0.7, pch = 16) +
  facet_grid(. ~ Region, scales = "free_x", space = "free_x") +
  scale_color_manual(values = c("darkred","darkorange","black","darkgreen")) +
  theme_AGL +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom") +
  guides(color = guide_legend(override.aes = list(size = 3))) +
  labs(title = "A) Protein Concentration in Sutherland, Canada 2016", 
       x = NULL, y = "g / 100g dry seed")
#
mp2 <- ggplot(xx %>% filter(ExptShort == "Su16"), 
              aes(x = CotyledonColor, y = Value, color = CotyledonColor)) +
  geom_quasirandom(alpha = 0.7, pch = 16) +
  facet_grid(. ~ Region, scales = "free_x", space = "free_x") +
  scale_color_manual(values = c("darkred","darkorange","black","darkgreen")) +
  theme_AGL +
  theme(legend.position = "none") +
  labs(title = "B) Protein Concentration in Sutherland, Canada 2016", 
       x = NULL, y = "g / 100g dry seed")
#
mp <- ggarrange(mp1, mp2, ncol = 1, nrow = 2, heights = c(1.4,1))
ggsave("Additional/Additional_Figure_04.png", mp, width = 10, height = 8, dpi = 600)

Additional Figure 5 - Size

# Prep data
x1 <- d2 %>% filter(AminoAcid == "Protein") %>% rename(Protein=Value)
x2 <- myLDP %>% select(Name, CotyledonColor, SeedMass1000.2017, Size, DTF_Cluster, Source)
xx <- left_join(x1, x2, by = "Name") %>%
  mutate(Size = factor(Size, levels = c("S","M","L","XL")))
# Plot
mp <- ggplot(xx, aes(x = Size, y = Protein)) +
  geom_violin(aes(fill = ExptShort), alpha = 0.7) + 
  geom_boxplot(width = 0.1) +
  scale_fill_manual(values = myCs_Expt) +
  facet_wrap(ExptShort ~ ., scales = "free_y", ncol = 2) +
  theme_AGL +
  theme(legend.position = "none") +
  labs(title = "Protein x Seed Size", 
       x = "Seed Size", y = "Protein (g / 100g dry seed)")
ggsave("Additional/Additional_Figure_05.png", mp, width = 6, height = 4, dpi = 600)

Additional Figure 6 - Stability

# Prep data
xx <- read.csv("Additional/Pulses/lentils-quality-report.csv")
xm <- xx %>% group_by(Grade) %>% summarise(Avg = round(mean(Mean),1))
xx <- xx %>% left_join(xm, by = "Grade")
# Plot
mp <- ggplot(xx, aes(x = Year, y = Mean, color = Grade)) +
  geom_hline(aes(yintercept = Avg), alpha = 0.7) +
  geom_point(size = 0.75, alpha = 0.6) + geom_line() +
  geom_errorbar(aes(ymin = Min, ymax = Max), alpha = 0.5) +
  facet_grid(. ~ Grade + paste("Mean =", Avg)) +
  scale_x_continuous(breaks = seq(2010,2022, by = 2)) +
  scale_y_continuous(breaks = seq(22,32, by = 2), minor_breaks = 22:32) +
  theme_AGL +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Saskatchewan Lentil Protein Concentration", 
       subtitle = "Data: Dr. Ning Wang (Canadian Grain Commision)", 
       y = "g / 100g dry seed", x = NULL)
ggsave("Additional/Additional_Figure_06.png", mp, width = 8, height = 4, dpi = 600)

Additional Figure 7 - Pulses

# Prep data
x1 <- readxl::read_xlsx("Additional/Pulses/pulses-quality.xlsx", "mean") %>%
  gather(Crop, Mean, 2:ncol(.))
x2 <- readxl::read_xlsx("Additional/Pulses/pulses-quality.xlsx", "sd") %>%
  gather(Crop, SD, 2:ncol(.))
xx <- left_join(x1, x2, by = c("Amino Acid","Crop")) %>%
  mutate(`Amino Acid` = factor(`Amino Acid`, levels = myPs))
# Plot
mp <- ggplot(xx, aes(x = `Amino Acid`, y = Mean, fill = Crop)) +
  geom_col(position = "dodge", alpha = 0.7) +
  geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD), 
                position = "dodge", linewidth = 0.2, alpha = 0.7) +
  theme_AGL +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Amino Acid Concentration", x = NULL)
ggsave("Additional/Additional_Figure_07.png", mp, width = 8, height = 4, dpi = 600)

© Derek Michael Wright