QTL Tutorial with qtl2

A tutorial on how to run QTL analyses in R with qtl2


Introduction

Running a QTL analysis in can be done with a few simple steps. This tutorial will go through a couple quick examples with a few lentil (Lens culinaris) recombinant inbred line (RIL) populations (LR68, LR89 & LR95).


qtl2 Installation

install.packages(c("devtools", "yaml", "jsonlite", "data.table", "RcppEigen", "RSQLite", "qtl"))
devtools::install_github("rqtl/qtl2")

gwaspr package

To aid with data prep and data visualization, I will also be using my own optional package gwaspr, which loads tidyverse, ggpubr, ggrepel & ggtext. This package is optional.

# devtools::install_github("derekmichaelwright/gwaspr")
library(gwaspr)
myCaption <- "www.dblogr.com/ or derekmichaelwright.github.io/dblogr/ | Data: AGILE"

Data/Input Files

In order to run the analysis we first need to set up an input folder with properly formatted files containing our phenotyope data, genotype data, a genetic map and a yaml file with details for running the analysis. The input folders used in this vignette can be downloaded below:


LR68

For our first example, we will use LR68, an intraspecfic RIL population from a cross between IG 72643, a Lens orientalis wild accession, and 3339-3 (Lens culinaris), an accession which was released as the variety CDC Greenstar.

input_LR68
  ├── LR68.yaml
  ├── LR68_gmap.csv
  ├── LR68_geno.csv
  └── LR68_pheno.csv
read.csv("input_LR68/LR68.yaml", header = F)
##                                                      V1
## 1            # Data from USASK AGILE & EVOLVES projects
## 2           # LR68: intraspecific lentil RIL population
## 3  # IG 72643 (wild accession) x 3339-3 (CDC Greenstar)
## 4                                     crosstype: riself
## 5                                   geno: LR68_geno.csv
## 6                                 pheno: LR68_pheno.csv
## 7                                   gmap: LR68_gmap.csv
## 8                                              alleles:
## 9                                                   - a
## 10                                                  - b
## 11                                           genotypes:
## 12                                                 a: 1
## 13                                                 b: 2
## 14                                          na.strings:
## 15                                                - '-'
## 16                                                 - NA
read.csv("input_LR68/LR68_gmap.csv")[1:10,]
##                    marker  lg     cM
## 1    Lcu.2RBY.Chr1p102694 LG1  0.000
## 2    Lcu.2RBY.Chr1p890646 LG1  7.073
## 3   Lcu.2RBY.Chr1p1003850 LG1  7.737
## 4   Lcu.2RBY.Chr1p1275060 LG1 10.598
## 5   Lcu.2RBY.Chr1p1363775 LG1 11.437
## 6   Lcu.2RBY.Chr1p1363771 LG1 11.848
## 7   Lcu.2RBY.Chr1p1987560 LG1 13.559
## 8   Lcu.2RBY.Chr1p2275880 LG1 15.756
## 9  Lcu.2RBY.Chr1p26220339 LG1 34.086
## 10 Lcu.2RBY.Chr1p32458546 LG1 35.804
read.csv("input_LR68/LR68_geno.csv")[1:10,1:4]
##         Id Lcu.2RBY.Chr1p102694 Lcu.2RBY.Chr1p890646 Lcu.2RBY.Chr1p1003850
## 1   LR68.4                    b                    a                     a
## 2   LR68.5                    b                    b                     -
## 3   LR68.6                    b                    b                     b
## 4   LR68.7                    b                    b                     b
## 5   LR68.8                    a                    a                     -
## 6   LR68.9                    b                    b                     -
## 7  LR68.12                    b                    b                     b
## 8  LR68.19                    a                    a                     a
## 9  LR68.22                    a                    a                     -
## 10 LR68.26                    a                    -                     -
read.csv("input_LR68/LR68_pheno.csv")[1:10,]
##          Id Days_To_Flower Cotyledon_Color Testa_Color Testa_Pattern
## 1  LR68.100             37               0           0             0
## 2  LR68.102             41               0           0             0
## 3  LR68.104             44               0           1             0
## 4  LR68.105             37               0           1             0
## 5  LR68.109             51               0           1             0
## 6  LR68.112             37               0           1             0
## 7  LR68.116             45               0           0             1
## 8  LR68.117             42               0           1             0
## 9  LR68.119             42               0           0             1
## 10  LR68.12             37               0           0             0

Phenotype Data

We have 4 phenotypes for this population, 3 of which are qualitative traits which need to be converted into a numeric format:

  • Cotyledon_Color: 0 = "Orange", 1 = "Yellow"
  • Testa_Color: 0 = "Green", 1 = "Brown"
  • Testa_Pattern: 0 = "Absent", 1 = "Present"

# Prep data
myColors <- c("darkgreen", "darkred", "steelblue", "black")
myTraits <- c("Days_To_Flower", "Cotyledon_Color", "Testa_Color", "Testa_Pattern")
xx <- read.csv("input_LR68/LR68_Pheno.csv") %>% 
  gather(trait, value, 2:ncol(.)) %>%
  mutate(trait = factor(trait, levels = myTraits))
# Plot
mp <- ggplot(xx, aes(x = value, fill = trait)) +
  geom_bar(color = "black", alpha = 0.7) +
  facet_wrap(trait ~ ., scales = "free", ncol = 4) +
  scale_fill_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR68 - Phenotypes", x = NULL, caption = myCaption)
ggsave("qtl_tutorial_LR68_01.png", mp, width = 8, height = 4)

Run QTL Analysis

library(qtl2)
myQTL <- read_cross2("input_LR68/LR68.yaml", quiet = F)
summary(myQTL)
## Object of class cross2 (crosstype "riself")
## 
## Total individuals            124
## No. genotyped individuals    124
## No. phenotyped individuals   124
## No. with both geno & pheno   124
## 
## No. phenotypes                 4
## No. covariates                 0
## No. phenotype covariates       0
## 
## No. chromosomes                6
## Total markers                614
## 
## No. markers by chr:
## LG1 LG2 LG3 LG4 LG6 LG7 
##  75 143  90 146  67  93
# insert a pseudomarker every 1 cM (step = 1)
map <- insert_pseudomarkers(myQTL$gmap, step = 1)
# calculate the QTL genotype probabilities
pr <- calc_genoprob(myQTL, map)
# perform genome scan to determine LOD scores
out <- scan1(pr, myQTL$pheno)
# Find peaks
find_peaks(out, map, threshold = 3, drop = 1.5)
##   lodindex       lodcolumn chr pos       lod   ci_lo   ci_hi
## 1        2 Cotyledon_Color LG1  42 53.824644  40.834  45.035
## 2        3     Testa_Color LG2  16  6.975825  14.185  19.267
## 3        3     Testa_Color LG3 108  6.930769 103.996 111.737
## 4        4   Testa_Pattern LG6  23 43.281356   0.000  32.573
# Perform permutation tests
operm <- scan1perm(pr, myQTL$pheno, n_perm = 1000)
# Calculate thresholds
summary(operm, alpha = c(0.05, 0.01))
## LOD thresholds (1000 permutations)
##      Days_To_Flower Cotyledon_Color Testa_Color Testa_Pattern
## 0.05           2.93            3.11        2.96          3.02
## 0.01           3.57            3.79        3.51          3.61

Save Results

# Save QTL peaks
write.csv(find_peaks(out, map, threshold = 3, drop = 1.5), 
          "LR68_results.csv", row.names = F)

Plot Results

# Plot
png("qtl_tutorial_LR68_02.png", width = 1200, height = 1200, res = 200)
par(mfrow = c(4,1), mar = c(2,4,2,1))
plot(out, map, lodcolumn = 1, col = myColors[1], 
     main = paste("LR68", colnames(out)[1]))
abline(h = 3, col = "steelblue")
plot(out, map, lodcolumn = 2, col = myColors[2], 
     main = paste("LR68", colnames(out)[2]))
abline(h = 3, col = "darkblue")
plot(out, map, lodcolumn = 3, col = myColors[3], 
     main = paste("LR68", colnames(out)[3]))
abline(h = 3, col = "blue")
plot(out, map, lodcolumn = 4, col = myColors[4], 
     main = paste("LR68", colnames(out)[4]))
abline(h = 3, col = "red")
dev.off()

# Plot
png("qtl_tutorial_LR68_03.png", width = 1200, height = 600, res = 200)
par(mar = c(2,4,2,1))
plot(out, map, lodcolumn = 2, col = myColors[2], main = "LR68")
plot(out, map, lodcolumn = 3, col = myColors[3], add = T)
plot(out, map, lodcolumn = 4, col = myColors[4], add = T)
abline(h = 3, col = "red")
legend("top", lwd = 2, col = myColors[c(2:4)], 
       colnames(out)[c(2:4)], bg = "gray90")
dev.off()

Marker Plots

find_peaks(out, map, threshold = 3, drop = 1.5)
##   lodindex       lodcolumn chr pos       lod   ci_lo   ci_hi
## 1        2 Cotyledon_Color LG1  42 53.824644  40.834  45.035
## 2        3     Testa_Color LG2  16  6.975825  14.185  19.267
## 3        3     Testa_Color LG3 108  6.930769 103.996 111.737
## 4        4   Testa_Pattern LG6  23 43.281356   0.000  32.573

Cotyledon Color

# Prep data
xx <- maxmarg(pr, map, chr = "LG1", pos = 42, return_char = T)
# Plot
png("qtl_tutorial_LR68_04.png", width = 800, height = 600, res = 200)
par(mar = c(2,4,2,1))
plot_pxg(xx, myQTL$pheno[,"Cotyledon_Color"], ylab = "Cotyledon Color",
         main = "LR68 - LG1 pos 42")
dev.off()

Testa Pattern

# Prep data
xx <- maxmarg(pr, map, chr = "LG6", pos = 23, return_char = T)
# Plot
png("qtl_tutorial_LR68_05.png", width = 800, height = 600, res = 200)
par(mar = c(2,4,2,1))
plot_pxg(xx, myQTL$pheno[,"Testa_Pattern"], ylab = "Testa Pattern", 
         main = "LR68 - LG6 pos 23")
dev.off()

Testa Color

# Prep plot 
png("qtl_tutorial_LR68_06.png", width = 1200, height = 600, res = 200)
par(mfrow = c(1,2), mar = c(2,4,2,1))
# Plot QTL 1
xx <- maxmarg(pr, map, chr = "LG2", pos = 16, return_char = T)
plot_pxg(xx, myQTL$pheno[,"Testa_Color"], ylab = "Testa Color", 
         main = "LR68 - LG2 pos 16")
# Plot QTL 2
xx <- maxmarg(pr, map, chr = "LG3", pos = 108, return_char = T)
plot_pxg(xx, myQTL$pheno[,"Testa_Color"], ylab = "Testa Color", 
         main = "LR68 - LG3 pos 108")
dev.off()

Estimated QTL effects

Cotyledon Color

# Prep data
c2eff <- scan1coef(pr[,"LG1"], myQTL$pheno[,"Cotyledon_Color"])
myColors2 <- c("slateblue", "violetred")
# Plot
png("qtl_tutorial_LR68_07.png", width = 800, height = 600, res = 200)
par(mar = c(4,4,2,2.5))
plot(c2eff, map["LG1"], columns = 1:2, 
     col = myColors2, main = "LR68 Cotyledon Color")
last_coef <- unclass(c2eff)[nrow(c2eff),]
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i])
}
dev.off()

Testa Color

# Prep data
myColors2 <- c("slateblue", "violetred")
# Plot
png("qtl_tutorial_LR68_08.png", width = 1400, height = 600, res = 200)
par(mfrow = c(1,2), mar = c(4,4,2,2.5))
c2eff <- scan1coef(pr[,"LG2"], myQTL$pheno[,"Testa_Color"])
plot(c2eff, map["LG2"], columns = 1:2, 
     col = myColors2, main = "LR68 Testa Color")
last_coef <- unclass(c2eff)[nrow(c2eff),]
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i])
}
c2eff <- scan1coef(pr[,"LG3"], myQTL$pheno[,"Testa_Color"])
plot(c2eff, map["LG3"], columns = 1:2, 
     col = myColors2, main = "LR68 Testa Color")
last_coef <- unclass(c2eff)[nrow(c2eff),]
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i])
}
dev.off()

Testa Pattern

# Prep data
c2eff <- scan1coef(pr[,"LG6"], myQTL$pheno[,"Testa_Pattern"])
myColors2 <- c("slateblue", "violetred")
# Plot
png("qtl_tutorial_LR68_09.png", width = 800, height = 600, res = 200)
par(mar = c(4,4,2,2.5))
plot(c2eff, map["LG6"], columns = 1:2, 
     col = myColors2, main = "LR68 Testa Pattern")
last_coef <- unclass(c2eff)[nrow(c2eff),]
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i])
}
dev.off()

Custom Plotting

# Create function to map positions of pseudomarkers
fill_psuedos <- function(xx, step = 1) {
  for(i in 1:nrow(xx)) {
    if(is.na(xx$lg[i])) { 
      xx$lg[i] <- xx$lg[i-1]
      xx$cM[i] <- xx$cM[i-1] + step
    }
  }
  xx
}
# Prep data
x_pheno <- read.csv("input_LR68/LR68_Pheno.csv")
x_gmap <- read.csv("input_LR68/LR68_gmap.csv")
xx <- out %>% as.data.frame() %>%
  rownames_to_column(var = "marker") %>%
  left_join(x_gmap, by = "marker") %>%
  mutate(chr = substr(marker, 13, 13),
         pos = substr(marker, regexpr("p", marker)+1, nchar(marker)),
         pos = as.numeric(pos)) %>% 
  select(marker, lg, cM, chr, pos, everything()) %>%
  fill_psuedos() %>%
  gather(trait, value, 6:ncol(.)) %>%
  mutate(trait = factor(trait, levels = myTraits) )
xx[1:14,]
##                   marker  lg     cM chr     pos          trait       value
## 1   Lcu.2RBY.Chr1p102694 LG1  0.000   1  102694 Days_To_Flower 0.062864855
## 2              cLG1.loc1 LG1  1.000          NA Days_To_Flower 0.036025400
## 3              cLG1.loc2 LG1  2.000          NA Days_To_Flower 0.015201565
## 4              cLG1.loc3 LG1  3.000          NA Days_To_Flower 0.002814201
## 5              cLG1.loc4 LG1  4.000          NA Days_To_Flower 0.000341632
## 6              cLG1.loc5 LG1  5.000          NA Days_To_Flower 0.007857684
## 7              cLG1.loc6 LG1  6.000          NA Days_To_Flower 0.024036121
## 8              cLG1.loc7 LG1  7.000          NA Days_To_Flower 0.046610113
## 9   Lcu.2RBY.Chr1p890646 LG1  7.073   1  890646 Days_To_Flower 0.048455585
## 10 Lcu.2RBY.Chr1p1003850 LG1  7.737   1 1003850 Days_To_Flower 0.030324576
## 11             cLG1.loc8 LG1  8.737          NA Days_To_Flower 0.033117058
## 12             cLG1.loc9 LG1  9.737          NA Days_To_Flower 0.044066155
## 13            cLG1.loc10 LG1 10.737          NA Days_To_Flower 0.054710194
## 14 Lcu.2RBY.Chr1p1275060 LG1 10.598   1 1275060 Days_To_Flower 0.060460112
# Plot by linkage group
mp <- ggplot(xx, aes(x = cM, y = value, color = trait)) + 
  geom_line(alpha = 0.8) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ lg, scales = "free", space = "free_x") +
  scale_color_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR68 QTL Results", y = "LOD", caption = myCaption)
ggsave("qtl_tutorial_LR68_10.png", mp, width = 8, height = 6)
# Plot by physical location - pseudomarkers removed
mp <- ggplot(xx %>% filter(!is.na(pos)), 
             aes(x = pos/100000000, y = value, color = trait)) + 
  geom_line(alpha = 0.8) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ paste("Chr", chr), scales = "free", space = "free_x") +
  scale_color_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR68 QTL Results", y = "LOD", 
       x = "100 Mbp", caption = myCaption)
ggsave("qtl_tutorial_LR68_11.png", mp, width = 8, height = 6)

PDF Output

If you have many traits, plotting the results into a pdf can be a useful.

pdf("qtl_tutorial_results_LR68.pdf", width = 12, height = 4)
for(i in myTraits) {
  print(ggplot(xx %>% filter(trait %in% i), 
               aes(x = cM, y = value)) + 
          geom_line(color = "darkgreen") +
          geom_hline(yintercept = 3, color = "red") +
          facet_grid(. ~ lg, space = "free_x") +
          theme_gwaspr() +
          labs(title = i, y = "LOD", caption = myCaption)
  )
}
dev.off()

Binary traits

A normal QTL analysis assumes the data follows a normal distribution. In our case however, we have 3 qualitative traits which are binary in nature. we can use model="binary" to adjust for this.

out_b <- scan1(pr, myQTL$pheno, model = "binary")
operm <- scan1perm(pr, myQTL$pheno, n_perm = 1000, model = "binary")
# Calculate %5 & 1% thresholds
summary(operm, alpha = c(0.05, 0.01)) 
## LOD thresholds (1000 permutations)
##      Cotyledon_Color Testa_Color Testa_Pattern
## 0.05            2.90        2.99          2.95
## 0.01            3.72        3.88          3.74

# Prep data
x1 <- out %>% as.data.frame() %>% rownames_to_column(var = "marker") %>%
  select(-Days_To_Flower) %>%
  gather(trait, value, 2:ncol(.)) %>%
  mutate(Model = "Normal")
x2 <- out_b %>% as.data.frame() %>% rownames_to_column(var = "marker") %>%
  gather(trait, value, 2:ncol(.)) %>%
  mutate(Model = "Binary")
xx <- bind_rows(x1, x2) %>%
  left_join(x_gmap, by = "marker") %>%
  fill_psuedos()
# Plot by linkage group
mp <- ggplot(xx, aes(x = cM, y = value, color = Model)) + 
  geom_line(alpha = 0.7) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ lg, scales = "free", space = "free_x") +
  scale_color_manual(values = c("darkorange", "darkblue")) +
  theme_gwaspr(legend.position = "bottom") +
  labs(title = "LR68 QTL Results - Model Comparisons", 
       y = "LOD", caption = myCaption)
ggsave("qtl_tutorial_LR68_12.png", mp, width = 8, height = 6)

LR95

For our second example, we will use LR95, an interspecfic RIL population from a cross between Shasta, a zero-tanin variety, and CDC Redberry, the variety which was used for sequencing the Lens culinaris genome.

input_LR95  
  ├── LR95.yaml
  ├── LR95_gmap.csv
  ├── LR95_geno.csv
  └── LR95_pheno.csv
read.csv("input_LR95/LR95.yaml", header = F)
##                                            V1
## 1  # Data from USASK AGILE + EVOLVES projects
## 2                    # Abstract of paper at: 
## 3                                          # 
## 4                           crosstype: riself
## 5                         geno: LR95_geno.csv
## 6                       pheno: LR95_pheno.csv
## 7                         gmap: LR95_gmap.csv
## 8                                    alleles:
## 9                                         - a
## 10                                        - b
## 11                                 genotypes:
## 12                                       a: 1
## 13                                       b: 2
## 14                                na.strings:
## 15                                      - '-'
## 16                                       - NA
read.csv("input_LR95/LR95_gmap.csv")[1:10,]
##                     marker  lg    cM
## 1     Lcu.2RBY.Chr1p721741 LG1  0.00
## 2     Lcu.2RBY.Chr1p433108 LG1  2.61
## 3     Lcu.2RBY.Chr1p122227 LG1  4.17
## 4     Lcu.2RBY.Chr1p121240 LG1  4.67
## 5      Lcu.2RBY.Chr1p82254 LG1  5.21
## 6      Lcu.2RBY.Chr1p80346 LG1  9.49
## 7      Lcu.2RBY.Chr1p77553 LG1 19.10
## 8  Lcu.2RBY.Chr1p220256419 LG1 42.65
## 9   Lcu.2RBY.Chr1p95939503 LG1 49.42
## 10  Lcu.2RBY.Chr1p59044234 LG1 50.45
read.csv("input_LR95/LR95_geno.csv")[1:10,1:4]
##           X Lcu.2RBY.Chr1p721741 Lcu.2RBY.Chr1p433108 Lcu.2RBY.Chr1p122227
## 1   LR-95-3                    b                    b                    b
## 2   LR-95-4                    b                    -                    -
## 3   LR-95-5                    b                    b                    b
## 4   LR-95-7                    b                    a                    b
## 5   LR-95-8                    -                    a                    -
## 6   LR-95-9                    b                    b                    b
## 7  LR-95-10                    a                    -                    -
## 8  LR-95-11                    a                    a                    a
## 9  LR-95-12                    a                    a                    a
## 10 LR-95-13                    a                    b                    b
read.csv("input_LR95/LR95_pheno.csv")[1:10,]
##              Id Cotyledon_Color Testa_Color
## 1  CDC Redberry               0           1
## 2        Shasta               1           0
## 3       LR-95-2               0          NA
## 4       LR-95-3               1           0
## 5       LR-95-4               0           0
## 6       LR-95-5               0           0
## 7       LR-95-7               1           1
## 8       LR-95-8               1           1
## 9       LR-95-9               1           1
## 10     LR-95-10               1           1

Phenotype Data

Similar to the first example, we have two qualitative traits which need to be converted from character to numeric:

  • Cotyledon_Color: 0 = "Orange", 1 = "Yellow"
  • Testa_Color: 0 = "Green", 1 = "Grey"

# Prep data
myColors <- c("darkred", "steelblue")
myTraits <- c("Cotyledon_Color", "Testa_Color")
xx <- read.csv("input_LR95/LR95_Pheno.csv") %>% 
  gather(trait, value, 2:ncol(.)) %>%
  mutate(trait = factor(trait, levels = myTraits))
# Plot
mp <- ggplot(xx, aes(x = value, fill = trait)) +
  geom_bar(color = "black", alpha = 0.7) +
  facet_grid(. ~ trait) +
  scale_fill_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR95 Phenotypes", x = NULL, caption = myCaption)
ggsave("qtl_tutorial_LR95_01.png", mp, width = 6, height = 4)

Run QTL Analysis

library(qtl2)
myQTL <- read_cross2("input_LR95/LR95.yaml", quiet = F)
summary(myQTL)
## Object of class cross2 (crosstype "riself")
## 
## Total individuals             115
## No. genotyped individuals     110
## No. phenotyped individuals    114
## No. with both geno & pheno    109
## 
## No. phenotypes                  2
## No. covariates                  0
## No. phenotype covariates        0
## 
## No. chromosomes                10
## Total markers                4843
## 
## No. markers by chr:
##   LG1 LG2.1 LG2.2   LG3 LG4.1 LG4.2   LG5   LG6 LG7.1 LG7.2 
##   646  1020    24   872   166   265   626   565   652     7
# insert a pseudomarker every 1 cM (step = 1)
map <- insert_pseudomarkers(myQTL$gmap, step = 1)
# calculate the QTL genotype probabilities
pr <- calc_genoprob(myQTL, map)
# perform genome scan to determine LOD scores
out <- scan1(pr, myQTL$pheno)
# Find peaks
find_peaks(out, map, threshold = 3, drop = 1.5)
##   lodindex       lodcolumn   chr     pos       lod   ci_lo   ci_hi
## 1        1 Cotyledon_Color   LG1   68.45 40.572884   65.93   69.48
## 2        1 Cotyledon_Color LG2.1 1182.50  3.042827    0.00 1306.00
## 3        2     Testa_Color LG2.1 1544.65 17.879339 1515.82 1548.76
# Perform permutation tests
operm <- scan1perm(pr, myQTL$pheno, n_perm = 1000)
# Calculate thresholds
summary(operm, alpha = c(0.05, 0.01))
## LOD thresholds (1000 permutations)
##      Cotyledon_Color Testa_Color
## 0.05            3.85        3.77
## 0.01            4.62        4.58

Save Results

# Save QTL peaks
write.csv(find_peaks(out, map, threshold = 3, drop = 1.5), 
          "LR95_results.csv", row.names = F)

Plot Results

# Plot
png("qtl_tutorial_LR95_02.png", width = 1400, height = 1000, res = 200)
par(mfrow = c(2,1), mar = c(2,4,2,1.5))
plot(out, map, lodcolumn = 1, col = myColors[1], 
     main = paste("LR95", colnames(out)[1]))
abline(h = 3, col = "red")
plot(out, map, lodcolumn = 2, col = myColors[2], 
     main = paste("LR95", colnames(out)[2]))
dev.off()

Marker Plots

find_peaks(out, map, threshold = 3, drop = 1.5)
##   lodindex       lodcolumn   chr     pos       lod   ci_lo   ci_hi
## 1        1 Cotyledon_Color   LG1   68.45 40.572884   65.93   69.48
## 2        1 Cotyledon_Color LG2.1 1182.50  3.042827    0.00 1306.00
## 3        2     Testa_Color LG2.1 1544.65 17.879339 1515.82 1548.76

# Prep plot 
png("qtl_tutorial_LR95_03.png", width = 1400, height = 600, res = 200)
par(mfrow = c(1,2), mar = c(2,4,2,1))
# Plot QTL 1
xx <- maxmarg(pr, map, chr = "LG1", pos = 68.45, return_char = T)
plot_pxg(xx, myQTL$pheno[,"Cotyledon_Color"], ylab = "Cotyledon Color", 
         main = "LR95 - LG1 pos = 68.45")
# Plot QTL 2
xx <- maxmarg(pr, map, chr = "LG2.1", pos = 1544.65, return_char = T)
plot_pxg(xx, myQTL$pheno[,"Testa_Color"], ylab = "Testa Color", 
         main = "LR95 - LG2.1 pos 1544.65")
dev.off()

Estimated QTL effects

# Prep data
myColors2 <- c("slateblue", "violetred", "green3")
# Plot
png("qtl_tutorial_LR95_04.png", width = 1400, height = 600, res = 200)
par(mfrow = c(1,2), mar = c(4,4,2,2.5))
c2eff <- scan1coef(pr[,"LG1"], myQTL$pheno[,"Cotyledon_Color"])
plot(c2eff, map["LG1"], columns = 1:2, col = myColors2, main = "LR95 Cotyledon_Color")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i])
}
c2eff <- scan1coef(pr[,"LG2.1"], myQTL$pheno[,"Testa_Color"])
plot(c2eff, map["LG2.1"], columns = 1:2, col = myColors2, main = "LR95 Testa_Color")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i])
}
dev.off()

Custom Plotting

# Create function to map positions of pseudomarkers
fill_psuedos <- function(xx, step = 1) {
  for(i in 1:nrow(xx)) {
    if(is.na(xx$lg[i])) { 
      xx$lg[i] <- xx$lg[i-1]
      xx$cM[i] <- xx$cM[i-1] + step
    }
  }
  xx
}
# Prep data
x_pheno <- read.csv("input_LR95/LR95_Pheno.csv")
x_gmap <- read.csv("input_LR95/LR95_gmap.csv")
xx <- out %>% as.data.frame() %>% 
  rownames_to_column(var = "marker") %>%
  left_join(x_gmap, by = "marker") %>%
  mutate(chr = substr(marker, 13, 13),
         pos = substr(marker, regexpr("p",marker)+1, nchar(marker)),
         pos = as.numeric(pos)) %>%
  select(marker, lg, cM, chr, pos, everything()) %>%
  fill_psuedos() %>%
  gather(trait, value, 6:ncol(.)) %>%
  mutate(trait = factor(trait, levels = myTraits))
# Plot by linkage group
mp <- ggplot(xx, aes(x = cM, y = value, color = trait)) + 
  geom_line(alpha = 0.8) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ lg, scales = "free", space = "free_x") +
  scale_color_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") + 
  labs(title = "LR95 QTL Results", y = "LOD", caption = myCaption)
ggsave("qtl_tutorial_LR95_05.png", mp, width = 8, height = 5)
# Plot by physical location - pseudomarkers removed
mp <- ggplot(xx %>% filter(!is.na(pos)), 
             aes(x = pos/100000000, y = value, color = trait)) + 
  geom_line(alpha = 0.8) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ chr, scales = "free", space = "free_x") +
  scale_color_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR95 QTL Results", y = "LOD", 
       x = "100 Mbp", caption = myCaption)
ggsave("qtl_tutorial_LR95_06.png", mp, width = 8, height = 5)

Mapping Comparison

# Prep data
yy <- xx %>% filter(trait == "Cotyledon_Color") %>%
  filter(lg == "LG1")
# Plot
mp1 <- ggplot(yy, aes(x = cM, y = value)) +
  geom_line(color = "darkgreen", alpha = 0.7, size = 1) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ lg, scales = "free", space = "free_x") +
  theme_gwaspr() +
  labs(title = "LR95 Linkage Map", y = "LOD", caption = "")
mp2 <- ggplot(yy %>% filter(chr == "1"), aes(x = pos / 100000000, y = value)) +
  geom_line(color = "darkgreen", alpha = 0.7, size = 1) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ paste("Chr", chr), scales = "free", space = "free_x") +
  theme_gwaspr() +
  labs(title = "LR95 Physical Location", y = "LOD", 
       x = "100 Mbp", caption = myCaption)
mp <- ggpubr::ggarrange(mp1, mp2)
ggsave("qtl_tutorial_LR95_07.png", mp, width = 8, height = 4)

Data Prep

The original data was not properly formatted for running the QTL analysis. The following is the code I used to take the original files and format them to be used by qtl2.

# Prep phenotype data
xx <- readxl::read_xlsx("rawdata/CombinedphenotypingLR93,LR-95.xlsx", "LR-95", skip = 1) %>%
  filter(!is.na(Cotyledon...6))
xx <- xx %>% select(Id = name, 
                    Cotyledon_Color = Cotyledon...6,
                    Testa_Color = `Seed Coat...9`) %>%
  mutate(Cotyledon_Color = plyr::mapvalues(Cotyledon_Color, c("orange","yellow"), c(0,1)),
         Cotyledon_Color = as.numeric(Cotyledon_Color),
         Testa_Color = plyr::mapvalues(Testa_Color, c("green", "grey", "white"), c(0,1,0)),
         Testa_Color = as.numeric(Testa_Color))
write.csv(xx, "input_LR95/LR95_pheno.csv", row.names = F)
# Prep genotype map
xx <- readxl::read_xlsx("rawdata/221122 - LR-95 map - binned - Rob.xlsx")
gmap <- xx %>% select(marker=Name, lg=...3, cM) %>%
  mutate(lg = substr(lg, 1, regexpr("-", lg)-1)) %>%
  arrange(lg, cM)
write.csv(gmap, "input_LR95/LR95_gmap.csv", row.names = F)
# Prep genotype data
geno <- xx %>% select(6:ncol(.)) %>% t()
colnames(geno) <- geno[1,]
geno <- geno %>% as.data.frame() %>% slice(-1)
for(i in 1:ncol(geno)) { geno[,i] <- plyr::mapvalues(geno[,i], c(" 0"," 2","-1"), c("a","b","-")) }
write.csv(geno, "input_LR95/LR95_geno.csv", row.names = T)

LR89

For our third example, we will use LR89, an interspecfic RIL population from a cross between two wild lentil (Lens orientalis) accessions, BGE 016880 and IG 72529, which have been phenotyped for root rot in 3 experiments.

input_LR89
  ├── LR89.yaml
  ├── LR89_gmap.csv
  ├── LR89_geno.csv
  └── LR89_pheno.csv
read.csv("input_LR89/LR89.yaml", header = F)
##                                            V1
## 1  # Data from USASK AGILE + EVOLVES projects
## 2                    # Abstract of paper at: 
## 3                                          # 
## 4                           crosstype: riself
## 5                         geno: LR89_geno.csv
## 6                       pheno: LR89_pheno.csv
## 7                         gmap: LR89_gmap.csv
## 8                                    alleles:
## 9                                         - A
## 10                                        - B
## 11                                 genotypes:
## 12                                       A: 1
## 13                                       B: 2
## 14                                na.strings:
## 15                                        - X
## 16                                       - NA
read.csv("input_LR89/LR89_gmap.csv")[1:10,]
##                    marker  lg       cM
## 1   Lcu.2RBY.Chr1p2064272 LG1  0.00000
## 2   Lcu.2RBY.Chr1p5335188 LG1  8.47102
## 3   Lcu.2RBY.Chr1p8749182 LG1 11.01769
## 4   Lcu.2RBY.Chr1p9343566 LG1 15.41840
## 5   Lcu.2RBY.Chr1p9543814 LG1 17.77799
## 6   Lcu.2RBY.Chr1p9936728 LG1 19.43318
## 7  Lcu.2RBY.Chr1p11875393 LG1 24.98043
## 8  Lcu.2RBY.Chr1p11830048 LG1 26.30925
## 9  Lcu.2RBY.Chr1p13055191 LG1 30.39120
## 10 Lcu.2RBY.Chr1p15679431 LG1 35.82659
read.csv("input_LR89/LR89_geno.csv")[1:10,1:4]
##         Id Lcu.2RBY.Chr1p114068927 Lcu.2RBY.Chr1p11830048 Lcu.2RBY.Chr1p11875393
## 1   LR89-1                       A                      A                      A
## 2   LR89-2                       B                      B                      B
## 3   LR89-5                       B                      A                      A
## 4   LR89-6                       B                      B                      B
## 5   LR89-7                       A                      B                      B
## 6   LR89-8                       B                      B                      B
## 7   LR89-9                       B                      B                      B
## 8  LR89-10                       B                      B                      B
## 9  LR89-11                       A                      A                      A
## 10 LR89-13                       A                      A                      A
read.csv("input_LR89/LR89_pheno.csv")[1:10,]
##           Id RootRot_E1 RootRot_E2 RootRot_E3 RootRot_Mean
## 1  BGE016880   85.96875   72.69792   73.71875     77.46181
## 2    IG72529   50.43750   42.68750   41.12500     44.75000
## 3     LR89-1   53.53125   58.18750   63.00000     57.28750
## 4    LR89-10   56.53125   53.56250   64.37500     58.15625
## 5   LR89-101   58.06250   59.81250   50.50000     56.63636
## 6   LR89-104   59.81250   47.37500   45.81250     51.00000
## 7   LR89-105   48.93750   65.00000   62.29167     58.17424
## 8   LR89-106   67.50000   67.56250   58.15625     64.40625
## 9   LR89-107   65.93750   55.15625   64.50000     61.86458
## 10  LR89-108   62.87500   61.37500   61.28125     61.84375

Phenotype Data

# Prep data
myTraits <- c("RootRot_E1", "RootRot_E2", "RootRot_E3", "RootRot_Mean")
myColors <- c("steelblue", "darkblue", "purple4", "darkred")
xx <- read.csv("input_LR89/LR89_Pheno.csv") %>% 
  gather(trait, value, 2:ncol(.)) %>%
  mutate(trait = factor(trait, levels = myTraits))
# Plot
mp <- ggplot(xx, aes(x = value, fill = trait)) +
  geom_histogram(color = "black", alpha = 0.7) +
  facet_grid(. ~ trait) +
  scale_fill_manual(name = NULL, values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR89 Phenotypes", x = NULL, caption = myCaption)
ggsave("qtl_tutorial_LR89_01.png", mp, width = 8, height = 4)

Run QTL Analysis

library(qtl2)
myQTL <- read_cross2("input_LR89/LR89.yaml", quiet = F)
summary(myQTL)
## Object of class cross2 (crosstype "riself")
## 
## Total individuals             187
## No. genotyped individuals     184
## No. phenotyped individuals    186
## No. with both geno & pheno    183
## 
## No. phenotypes                  4
## No. covariates                  0
## No. phenotype covariates        0
## 
## No. chromosomes                 6
## Total markers                1629
## 
## No. markers by chr:
## LG1 LG2 LG3 LG4 LG5 LG6 
## 225 254 246 191 197 516
# insert a pseudomarker every 1 cM (step = 1)
map <- insert_pseudomarkers(myQTL$gmap, step = 1)
# calculate the QTL genotype probabilities
pr <- calc_genoprob(myQTL, map)
# perform genome scan to determine LOD scores
out <- scan1(pr, myQTL$pheno)
# Find peaks
find_peaks(out, map, threshold = 3, drop = 1.5)
##   lodindex    lodcolumn chr       pos      lod     ci_lo    ci_hi
## 1        1   RootRot_E1 LG3 289.00000 3.813421 254.98344 297.9742
## 2        1   RootRot_E1 LG5  74.12573 4.087955  40.27163 192.8705
## 3        2   RootRot_E2 LG1  76.55233 3.264651  30.39120 212.8597
## 4        2   RootRot_E2 LG6 370.00000 5.774678 306.57322 557.5672
## 5        3   RootRot_E3 LG6 308.00000 5.313440 296.31080 377.9224
## 6        4 RootRot_Mean LG3 280.47138 3.613677 245.21438 297.9742
## 7        4 RootRot_Mean LG5 133.00000 3.777336  29.99727 202.4022
## 8        4 RootRot_Mean LG6 315.00000 7.094838 306.57322 367.4657
# Perform permutation tests
operm <- scan1perm(pr, myQTL$pheno, n_perm = 1000)
# Calculate thresholds
summary(operm, alpha = c(0.05, 0.01))
## LOD thresholds (1000 permutations)
##      RootRot_E1 RootRot_E2 RootRot_E3 RootRot_Mean
## 0.05       3.28       3.15       3.19         3.18
## 0.01       4.15       3.89       4.02         3.84

Save Restuls

# Save QTL peaks
write.csv(find_peaks(out, map, threshold = 3, drop = 1.5), 
          "LR89_results.csv", row.names = F)

Plot Results

# Plot
png("qtl_tutorial_LR89_02.png", width = 1200, height = 1200, res = 200)
par(mfrow = c(4,1), mar = c(2,4,2,1))
plot(out, map, lodcolumn = 1, col = myColors[1], 
     main = paste("LR89", colnames(out)[1]))
abline(h = 3, col = "red")
plot(out, map, lodcolumn = 2, col = myColors[2], 
     main = paste("LR89", colnames(out)[2]))
abline(h = 3, col = "red")
plot(out, map, lodcolumn = 3, col = myColors[3], 
     main = paste("LR89", colnames(out)[3]))
abline(h = 3, col = "red")
plot(out, map, lodcolumn = 4, col = myColors[4], 
     main = paste("LR89", colnames(out)[4]))
abline(h = 3, col = "red")
dev.off()

Marker Plots

# Find peaks
find_peaks(out, map, threshold=3, drop=1.5)
##   lodindex    lodcolumn chr       pos      lod     ci_lo    ci_hi
## 1        1   RootRot_E1 LG3 289.00000 3.813421 254.98344 297.9742
## 2        1   RootRot_E1 LG5  74.12573 4.087955  40.27163 192.8705
## 3        2   RootRot_E2 LG1  76.55233 3.264651  30.39120 212.8597
## 4        2   RootRot_E2 LG6 370.00000 5.774678 306.57322 557.5672
## 5        3   RootRot_E3 LG6 308.00000 5.313440 296.31080 377.9224
## 6        4 RootRot_Mean LG3 280.47138 3.613677 245.21438 297.9742
## 7        4 RootRot_Mean LG5 133.00000 3.777336  29.99727 202.4022
## 8        4 RootRot_Mean LG6 315.00000 7.094838 306.57322 367.4657

# Prep data
xx <- maxmarg(pr, map, chr = "LG3", pos = 289, return_char = T)
# Plot
png("qtl_tutorial_LR89_03.png", width = 1200, height = 400, res = 200)
par(mfrow = c(1,3), mar=c(2,4,2,1))
plot_pxg(xx, myQTL$pheno[,"RootRot_E1"], ylab = "RootRot_E1")
plot_pxg(xx, myQTL$pheno[,"RootRot_E2"], ylab = "RootRot_E2", main = "LR89 - LG3 Pos=289")
plot_pxg(xx, myQTL$pheno[,"RootRot_E3"], ylab = "RootRot_E3")
dev.off()

# Prep data
xx <- maxmarg(pr, map, chr = "LG5", pos = 74.78, return_char = T)
# Plot
png("qtl_tutorial_LR89_04.png", width = 1200, height = 400, res = 200)
par(mfrow = c(1,3), mar=c(2,4,2,1))
plot_pxg(xx, myQTL$pheno[,"RootRot_E1"], ylab = "RootRot_E1")
plot_pxg(xx, myQTL$pheno[,"RootRot_E2"], ylab = "RootRot_E2", main = "LR89 - LG5 Pos=74.78")
plot_pxg(xx, myQTL$pheno[,"RootRot_E3"], ylab = "RootRot_E3")
dev.off()

# Prep data
xx <- maxmarg(pr, map, chr = "LG6", pos = 370, return_char = T)
# Plot
png("qtl_tutorial_LR89_05.png", width = 1200, height = 400, res = 200)
par(mfrow = c(1,3), mar=c(2,4,2,1))
plot_pxg(xx, myQTL$pheno[,"RootRot_E1"], ylab = "RootRot_E1")
plot_pxg(xx, myQTL$pheno[,"RootRot_E2"], ylab = "RootRot_E2", main = "LR89 - LG6 pos=370")
plot_pxg(xx, myQTL$pheno[,"RootRot_E3"], ylab = "RootRot_E3")
dev.off()

# Prep data
xx <- maxmarg(pr, map, chr = "LG6", pos = 308, return_char = T)
# Plot
png("qtl_tutorial_LR89_06.png", width = 1200, height = 400, res = 200)
par(mfrow = c(1,3), mar=c(2,4,2,1))
plot_pxg(xx, myQTL$pheno[,"RootRot_E1"], ylab = "RootRot_E1")
plot_pxg(xx, myQTL$pheno[,"RootRot_E2"], ylab = "RootRot_E2", main = "LR89 - LG6 pos=308")
plot_pxg(xx, myQTL$pheno[,"RootRot_E3"], ylab = "RootRot_E3")
dev.off()

Estimated QTL effects

LG3

# Prep data
myColors2 <- c("slateblue", "violetred")
myLims <- c(-4.4,4.4)
# Plot
png("qtl_tutorial_LR89_07.png", width = 800, height = 1200, res = 200)
par(mfrow = c(3,1), mar = c(4,4,2,2.5))
c2eff <- scan1coef(pr[,"LG3"], myQTL$pheno[,"RootRot_E1"])
plot(c2eff, map["LG3"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E1")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
c2eff <- scan1coef(pr[,"LG3"], myQTL$pheno[,"RootRot_E2"])
plot(c2eff, map["LG3"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E2")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
c2eff <- scan1coef(pr[,"LG3"], myQTL$pheno[,"RootRot_E3"])
plot(c2eff, map["LG3"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E3")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) }
dev.off()

LG5

# Prep data
myLims <- c(-4.3,4.3)
# Plot
png("qtl_tutorial_LR89_08.png", width = 800, height = 1200, res = 200)
par(mfrow = c(3,1), mar = c(4,4,2,2.5))
c2eff <- scan1coef(pr[,"LG5"], myQTL$pheno[,"RootRot_E1"])
plot(c2eff, map["LG5"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E1")
last_coef <- unclass(c2eff)[nrow(c2eff),]
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
c2eff <- scan1coef(pr[,"LG5"], myQTL$pheno[,"RootRot_E2"])
plot(c2eff, map["LG5"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E2")
last_coef <- unclass(c2eff)[nrow(c2eff),]
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
c2eff <- scan1coef(pr[,"LG5"], myQTL$pheno[,"RootRot_E3"])
plot(c2eff, map["LG5"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E3")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
dev.off()

LG6

# Prep data
myLims <- c(-4.2,4.2)
# Plot
png("qtl_tutorial_LR89_09.png", width = 800, height = 1200, res = 200)
par(mfrow = c(3,1), mar = c(4,4,2,2.5))
c2eff <- scan1coef(pr[,"LG6"], myQTL$pheno[,"RootRot_E1"])
plot(c2eff, map["LG6"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E1")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i],
       tick = F, col.axis = myColors2[i]) 
}
c2eff <- scan1coef(pr[,"LG6"], myQTL$pheno[,"RootRot_E2"])
plot(c2eff, map["LG6"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E2")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
c2eff <- scan1coef(pr[,"LG6"], myQTL$pheno[,"RootRot_E3"])
plot(c2eff, map["LG6"], columns = 1:2, col = myColors2, 
     ylim = myLims, main = "LR89 RootRot_E3")
last_coef <- unclass(c2eff)[nrow(c2eff),] 
for(i in 1:2) { 
  axis(side = 4, at = last_coef[i], names(last_coef)[i], 
       tick = F, col.axis = myColors2[i]) 
}
dev.off()

Custom Plotting

# Create function to map positions of pseudomarkers
fill_psuedos <- function(xx, step = 1) {
  for(i in 1:nrow(xx)) {
    if(is.na(xx$lg[i])) { 
      xx$lg[i] <- xx$lg[i-1]
      xx$cM[i] <- xx$cM[i-1] + step
    }
  }
  xx
}
# Prep data
x_pheno <- read.csv("input_LR89/LR89_Pheno.csv")
x_gmap <- read.csv("input_LR89/LR89_gmap.csv")
xx <- out %>% as.data.frame() %>% 
  rownames_to_column(var = "marker") %>%
  left_join(x_gmap, by = "marker") %>%
  select(marker, lg, cM, everything()) %>%
  fill_psuedos() %>%
  gather(trait, value, 4:ncol(.)) %>%
  mutate(trait = factor(trait, levels = myTraits),
         chr = substr(marker, 13, 13),
         pos = substr(marker, regexpr("p",marker)+1, nchar(marker)),
         pos = as.numeric(pos))
# Plot
mp <- ggplot(xx, aes(x = cM, y = value, color = trait)) + 
  geom_line(alpha = 0.8) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ lg, scales = "free", space = "free_x") +
  scale_color_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") + 
  labs(title = "LR89 QTL Results", y = "LOD", caption = myCaption)
ggsave("qtl_tutorial_LR89_10.png", mp, width = 8, height = 6)
mp <- ggplot(xx %>% filter(!is.na(pos)), 
             aes(x = pos/1000000, y = value, color = trait)) + 
  geom_line(alpha = 0.8) +
  geom_hline(yintercept = 3, color = "red") +
  facet_grid(trait ~ chr, scales = "free", space = "free_x") +
  scale_color_manual(values = myColors) +
  theme_gwaspr(legend.position = "none") +
  labs(title = "LR89 QTL Results", y = "LOD", caption = myCaption)
ggsave("qtl_tutorial_LR89_11.png", mp, width = 8, height = 6)

Prep data

The original data was not properly formatted for running the QTL analysis. The following is the code I used to take the original files and format them to be used by qtl2.

# Prep genotype data
xx <- read.table("rawdata/LR-89 original genotyping matrix.txt") %>% 
  t() %>% as.data.frame()
colnames(xx) <- xx[1,]
xx <- xx %>% rename(Id=Pos) %>%
  slice(-1) %>%
  mutate(Id = substr(Id, regexpr("\\|", Id)+1, nchar(Id)),
         Id = gsub("-0|-00","-", Id))
write.csv(xx, "input_LR89/LR89_geno.csv", row.names = F)
# Prep genotype map
xx <- readxl::read_xlsx("rawdata/LR-89_Input-221101.xlsx", "LinkageMap", col_names = F)
colnames(xx) <- c("marker", "lg", "cM")
xx <- xx %>% mutate(lg = paste0("LG", lg))
write.csv(xx, "input_LR89/LR89_gmap.csv", row.names = F)
# Prep phenotype data
#xx <- readxl::read_xlsx("rawdata/LR-89 phenotypic data means.xlsx") %>%
#  rename(Name=name, Root.Rot=Mean)
xx <- readxl::read_xlsx("rawdata/20221202_means of repeats and combined.xlsx") %>%
  select(Id=RIL, RootRot_E1=Exp1, RootRot_E2=Exp2, RootRot_E3=Exp3, RootRot_Mean=Combined)
write.csv(xx, "input_LR89/LR89_pheno.csv", row.names = F)

iron example

iron <- read_cross2(system.file("extdata", "iron.zip", package="qtl2"))
map <- insert_pseudomarkers(iron$gmap, step = 1)
pr <- calc_genoprob(iron, map, error_prob = 0.002)
Xcovar <- get_x_covar(iron)
out <- scan1(pr, iron$pheno, Xcovar = Xcovar)
par(mar=c(5.1, 4.1, 1.1, 1.1))
ymx <- maxlod(out) # overall maximum LOD score
plot(out, map, lodcolumn = 1, col = "slateblue", ylim = c(0,ymx*1.02))
plot(out, map, lodcolumn = 2, col = "violetred", add = T)
legend("topleft", lwd = 2, col = c("slateblue", "violetred"), 
       colnames(out), bg = "gray90")


© Derek Michael Wright