SimPlus was created for testing COMA with AlphaSimR. COMA provides functions for optimum contribution selection (OCS) and optimum mate allocation (OMA). For OCS, the objective is to maximize the average GEBV of the parents, weighted by their contributions. For OMA, the objective is to maximize the average GPMP (genomic prediction of mate performance), weighted by the mate allocations.

The following code creates a founder population in mutation-drift equilibrium and sets the variance parameters for a trait with additive and dominance effects.

library(AlphaSimR)
founders <- runMacs2(nInd=500,nChr=10,segSites=NULL,Ne=100,
                     mutRate=1e-8, ploidy=2,
                     histGen=NULL,histNe=NULL,genLen=1)
SP <- SimParam$new(founders)
SP$setTrackRec(TRUE)
SP$addTraitADG(nQtlPerChr = 100, mean = 0, var = 1, varDD = 0.5, 
               meanDD = 0.5, useVarA = FALSE)
SP$setVarE(varE=2)
pop <- newPop(founders, simParam=SP)

OCS-A-Pheno

The following code runs 20 generations of mass selection at 1% inbreeding, using phenotypes for genetic merit. The population is saved after 5 generations to compare the response with genomic selection.

library(SimPlus)
gp <- genParam(pop, simParam = SP)
total.gen <- 20
results <- data.frame(gen=1:total.gen, mu=numeric(total.gen), 
                      genicVar=numeric(total.gen), 
                      F.coeff=numeric(total.gen), 
                      n.parent=numeric(total.gen), 
                      n.mate=numeric(total.gen),
                      accuracy=numeric(total.gen))  
results$mu[1] <- as.numeric(gp$mu)
results$genicVar[1] <- as.numeric(gp$genicVarG) 

p.ref <- update_TP(pop, SP, geno.file="geno.csv.gz", 
                   pheno.file="pheno.csv", ped.file="ped.csv", 
                   gen.TP=0)
update_K(pop, SP, K.method="A", K.file="kinship.csv", ped.file="ped.csv")

for (gen in 1:total.gen) {
  print(sub("X",gen,"Generation X"))
  ans1 <- sim_OCS(pop, SP, n.progeny=500, dF=0.01, solver="MOSEK",
                  COMA.file="pheno.csv", K.file="kinship.csv")
  
  #optional, measure accuracy
  acc <- sim_accuracy(pop, SP, COMA.file="pheno.csv")
  results$accuracy[gen] <- acc$ocs
  
  ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
  results$n.parent[gen] <- ans2$n.parent
  results$n.mate[gen] <- nrow(ans2$matings)
  
  pop <- setPheno(ans2$progeny,simParam = SP) #AlphaSimR function
  gp <- genParam(pop, simParam = SP)
  results$mu[gen] <- as.numeric(gp$mu)
  results$genicVar[gen] <- as.numeric(gp$genicVarG) 
  
  update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv", 
            ped.file="ped.csv", gen.TP=3)
  results$F.coeff[gen] <- update_K(pop, SP, K.method="A", K.file="kinship.csv", 
                               ped.file="ped.csv")
  if (gen==5)
    sim_save("gen5.rda",pop,SP,"geno.csv.gz","pheno.csv","ped.csv",p.ref=p.ref)
}

results.all <- data.frame(results,method="OCS-A-Pheno")

Even though there is no genomic selection in the above simulation, the function update_TP is still required to update the phenotype and pedigree files for OCS. The argument gen.TP controls the number of generations in the TP (including the candidates, which are selected after phenotyping). It is set at 3 to anticipate the simulation below, ensuring the full TP is available at the onset of GS in generation 5.

OCS-A-GEBV

To simulate GS requires another function, sim_StageWise, to predict the marker effects using the StageWise package. The file with the predicted marker effects (COMA.file) is now used as input for sim_OCS instead of the phenotype file.

gen5 <- sim_load("gen5.rda","geno.csv.gz","pheno.csv","ped.csv")
pop <- gen5$pop
SP <- gen5$SP

results$F.coeff[5] <- update_K(pop, SP, K.method="A", 
                           K.file="kinship.csv", ped.file="ped.csv")

for (gen in 6:total.gen) {
  print(sub("X",gen,"Generation X"))
  ans0 <- sim_StageWise("geno.csv.gz","pheno.csv",ploidy=2,
                        COMA.file="COMA.csv", gen.TP=3)
  
  ans1 <- sim_OCS(pop, SP, n.progeny=500, dF=0.01, solver="MOSEK",
                  COMA.file="COMA.csv", K.file="kinship.csv")
  
  acc <- sim_accuracy(pop, SP, COMA.file="COMA.csv", K.file="kinship.csv")
  results$accuracy[gen] <- acc$ocs
  
  ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
  results$n.parent[gen] <- ans2$n.parent
  results$n.mate[gen] <- nrow(ans2$matings)
  
  pop <- setPheno(ans2$progeny,simParam = SP) 
  gp <- genParam(pop, simParam = SP)
  results$mu[gen] <- as.numeric(gp$mu)
  results$genicVar[gen] <- as.numeric(gp$genicVarG) 
  
  update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
            ped.file="ped.csv", gen.TP=3)
  results$F.coeff[gen] <- update_K(pop, SP, K.method="A", K.file="kinship.csv", 
                               ped.file="ped.csv")
}

results.all <- rbind(results.all, data.frame(results[5:20,], method="OCS-A-GEBV"))

OCS-G.IBD-GEBV

Instead of pedigree kinship, genomic IBD (G.IBD) kinship can be used to control inbreeding. To reduce computational time, not all segregating sites are needed for this calculation. The parameter “ibd.loci” in the function update_K controls how many loci per chromosome to use. The default is 100, and since the chromosomes are 100 cM, this implies 1 marker per cM.

gen5 <- sim_load("gen5.rda","geno.csv.gz","pheno.csv","ped.csv")
pop <- gen5$pop
SP <- gen5$SP

results$F.coeff[5] <- update_K(pop, SP, K.method="G.IBD", 
                               K.file="kinship.csv", 
                               ped.file="ped.csv", ibd.loci=100, n.core=2)

for (gen in 6:total.gen) {
  print(sub("X",gen,"Generation X"))
  ans0 <- sim_StageWise("geno.csv.gz","pheno.csv",ploidy=2,
                        COMA.file="COMA.csv", gen.TP=3)
  
  ans1 <- sim_OCS(pop, SP, n.progeny=500, dF=0.01, solver="MOSEK",
                  COMA.file="COMA.csv", K.file="kinship.csv")
  
  acc <- sim_accuracy(pop, SP, COMA.file="COMA.csv", K.file="kinship.csv")
  results$accuracy[gen] <- acc$ocs
  
  ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
  results$n.parent[gen] <- ans2$n.parent
  results$n.mate[gen] <- nrow(ans2$matings)
  
  pop <- setPheno(ans2$progeny,simParam = SP) 
  gp <- genParam(pop, simParam = SP)
  results$mu[gen] <- as.numeric(gp$mu)
  results$genicVar[gen] <- as.numeric(gp$genicVarG) 
  
  update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
            ped.file="ped.csv", gen.TP=3)
  results$F.coeff[gen] <- update_K(pop, SP, K.method="G.IBD", 
                                   K.file="kinship.csv", ped.file="ped.csv",
                                   ibd.loci=100, n.core=2)
}

results.all <- rbind(results.all, data.frame(results[5:20,], method="OCS-G.IBD-GEBV"))

OMA-A-GPMP

Simulating OMA follows the same code as above but replaces sim_OCS with sim_OMA, which has an additional argument max.parent. To limit the size of the computational problem for OMA, OCS is first used to reduce the number of candidates to max.parent. The dF argument for sim_OMA is a vector of 2 numbers for the lower and upper bounds. In this case, the numbers are identical to create an equality constraint.

gen5 <- sim_load("gen5.rda","geno.csv.gz","pheno.csv","ped.csv")
pop <- gen5$pop
SP <- gen5$SP

results$F.coeff[5] <- update_K(pop, SP, K.method="A", 
                               K.file="kinship.csv", ped.file="ped.csv")

for (gen in 6:total.gen) {
  print(sub("X",gen,"Generation X"))
  ans0 <- sim_StageWise("geno.csv.gz","pheno.csv",ploidy=2,
                        COMA.file="COMA.csv", gen.TP=3)
  
  ans1 <- sim_OMA(pop, SP, n.progeny=500, dF=c(0.01,0.01), solver="MOSEK",
                  COMA.file="COMA.csv", K.file="kinship.csv",
                  max.parent=200)
  
  acc <- sim_accuracy(pop, SP, COMA.file="COMA.csv", K.file="kinship.csv")
  results$accuracy[gen] <- acc$oma
  
  ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
  results$n.parent[gen] <- ans2$n.parent
  results$n.mate[gen] <- nrow(ans2$matings)
  
  pop <- setPheno(ans2$progeny,simParam = SP) 
  gp <- genParam(pop, simParam = SP)
  results$mu[gen] <- as.numeric(gp$mu)
  results$genicVar[gen] <- as.numeric(gp$genicVarG) 
  
  update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
            ped.file="ped.csv", gen.TP=3)
  results$F.coeff[gen] <- update_K(pop, SP, K.method="A", 
                                   K.file="kinship.csv", ped.file="ped.csv")
}

results.all <- rbind(results.all, data.frame(results[5:20,], method="OMA-A-GPMP"))

The average inbreeding rate at time \(t\) follows

\[ \Delta F_t = 1-\left( \frac{1-F_t}{1-F_0} \right)^{1/t}\]

library(dplyr)
results.all$dF <- NA*numeric(nrow(results.all))

fdF <- function(gen,Ft,F0) {1-((1-Ft)/(1-F0))^(1/gen)}

results0 <- filter(results.all,gen==5)
methods <- unique(results0$method)
m <- length(methods)

for (i in 1:m) {
  ix <- which(results.all$method==methods[i] & results.all$gen > 5)
  results.all$dF[ix] <- 100*fdF(gen=1:15, 
                            Ft=results.all$F.coeff[ix],
                            F0=results0$F.coeff[results0$method==methods[i]])
}

We can now plot the results.

library(ggplot2)
library(ggpubr)

p1 <- ggplot(results.all,aes(x=gen,y=mu,colour=method)) + geom_line() +
  ylab("Genetic Gain") + xlab("Generation") + 
  scale_colour_brewer(name="",palette="Set1")
p2 <- ggplot(results.all,aes(x=gen,y=genicVar,colour=method)) + geom_line() +
  ylab("Genic Variance") + xlab("Generation") +
  scale_colour_brewer(name="",palette="Set1")
p3 <- ggplot(results.all,aes(x=gen,y=dF,colour=method)) + geom_line() +
  ylab("Inbreeding Rate (%)") + xlab("Generation") + ylim(0,2) + 
  scale_colour_brewer(name="",palette="Set1")
p4 <- ggplot(results.all,aes(x=gen,y=accuracy,colour=method)) + geom_line() +
  ylab("Selection Accuracy") + xlab("Generation") +
  scale_colour_brewer(name="",palette="Set1")
p5 <- ggplot(results.all,aes(x=gen,y=n.parent,colour=method)) + geom_line() +
  ylab("# Parents") + xlab("Generation") +
  scale_colour_brewer(name="",palette="Set1")
p6 <- ggplot(results.all,aes(x=gen,y=n.mate,colour=method)) + geom_line() +
  ylab("# Matings") + xlab("Generation") +
  scale_colour_brewer(name="",palette="Set1")

ggarrange(p1,p2,p3,p4,p5,p6,nrow=3,ncol=2,common.legend=T,legend = "right")

This short simulation illustrates some key points, which are discussed in greater depth in the publication. Compared to OCS, OMA has higher accuracy for predicting mate performance and sparser mating designs. OMA generated more gain in the short term, but long-term gains were similar.