SimPlus was created for testing COMA with AlphaSimR. COMA provides functions for optimum contribution selection (OCS) and optimum mate allocation (OMA). For OCS, the objective is to maximize the average GEBV of the parents, weighted by their contributions. For OMA, the objective is to maximize the average GPMP (genomic prediction of mate performance), weighted by the mate allocations.
The following code creates a founder population in mutation-drift equilibrium and sets the variance parameters for a trait with additive and dominance effects.
library(AlphaSimR)
founders <- runMacs2(nInd=500,nChr=10,segSites=NULL,Ne=100,
mutRate=1e-8, ploidy=2,
histGen=NULL,histNe=NULL,genLen=1)
SP <- SimParam$new(founders)
SP$setTrackRec(TRUE)
SP$addTraitADG(nQtlPerChr = 100, mean = 0, var = 1, varDD = 0.5,
meanDD = 0.5, useVarA = FALSE)
SP$setVarE(varE=2)
pop <- newPop(founders, simParam=SP)
The following code runs 20 generations of mass selection at 1% inbreeding, using phenotypes for genetic merit. The population is saved after 5 generations to compare the response with genomic selection.
library(SimPlus)
gp <- genParam(pop, simParam = SP)
total.gen <- 20
results <- data.frame(gen=1:total.gen, mu=numeric(total.gen),
genicVar=numeric(total.gen),
F.coeff=numeric(total.gen),
n.parent=numeric(total.gen),
n.mate=numeric(total.gen),
accuracy=numeric(total.gen))
results$mu[1] <- as.numeric(gp$mu)
results$genicVar[1] <- as.numeric(gp$genicVarG)
p.ref <- update_TP(pop, SP, geno.file="geno.csv.gz",
pheno.file="pheno.csv", ped.file="ped.csv",
gen.TP=0)
update_K(pop, SP, K.method="A", K.file="kinship.csv", ped.file="ped.csv")
for (gen in 1:total.gen) {
print(sub("X",gen,"Generation X"))
ans1 <- sim_OCS(pop, SP, n.progeny=500, dF=0.01, solver="MOSEK",
COMA.file="pheno.csv", K.file="kinship.csv")
#optional, measure accuracy
acc <- sim_accuracy(pop, SP, COMA.file="pheno.csv")
results$accuracy[gen] <- acc$ocs
ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
results$n.parent[gen] <- ans2$n.parent
results$n.mate[gen] <- nrow(ans2$matings)
pop <- setPheno(ans2$progeny,simParam = SP) #AlphaSimR function
gp <- genParam(pop, simParam = SP)
results$mu[gen] <- as.numeric(gp$mu)
results$genicVar[gen] <- as.numeric(gp$genicVarG)
update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
ped.file="ped.csv", gen.TP=3)
results$F.coeff[gen] <- update_K(pop, SP, K.method="A", K.file="kinship.csv",
ped.file="ped.csv")
if (gen==5)
sim_save("gen5.rda",pop,SP,"geno.csv.gz","pheno.csv","ped.csv",p.ref=p.ref)
}
results.all <- data.frame(results,method="OCS-A-Pheno")
Even though there is no genomic selection in the above simulation,
the function update_TP
is still required to update the
phenotype and pedigree files for OCS. The argument gen.TP
controls the number of generations in the TP (including the candidates,
which are selected after phenotyping). It is set at 3 to anticipate the
simulation below, ensuring the full TP is available at the onset of GS
in generation 5.
To simulate GS requires another function, sim_StageWise
,
to predict the marker effects using the StageWise package. The
file with the predicted marker effects (COMA.file) is now used as input
for sim_OCS
instead of the phenotype file.
gen5 <- sim_load("gen5.rda","geno.csv.gz","pheno.csv","ped.csv")
pop <- gen5$pop
SP <- gen5$SP
results$F.coeff[5] <- update_K(pop, SP, K.method="A",
K.file="kinship.csv", ped.file="ped.csv")
for (gen in 6:total.gen) {
print(sub("X",gen,"Generation X"))
ans0 <- sim_StageWise("geno.csv.gz","pheno.csv",ploidy=2,
COMA.file="COMA.csv", gen.TP=3)
ans1 <- sim_OCS(pop, SP, n.progeny=500, dF=0.01, solver="MOSEK",
COMA.file="COMA.csv", K.file="kinship.csv")
acc <- sim_accuracy(pop, SP, COMA.file="COMA.csv", K.file="kinship.csv")
results$accuracy[gen] <- acc$ocs
ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
results$n.parent[gen] <- ans2$n.parent
results$n.mate[gen] <- nrow(ans2$matings)
pop <- setPheno(ans2$progeny,simParam = SP)
gp <- genParam(pop, simParam = SP)
results$mu[gen] <- as.numeric(gp$mu)
results$genicVar[gen] <- as.numeric(gp$genicVarG)
update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
ped.file="ped.csv", gen.TP=3)
results$F.coeff[gen] <- update_K(pop, SP, K.method="A", K.file="kinship.csv",
ped.file="ped.csv")
}
results.all <- rbind(results.all, data.frame(results[5:20,], method="OCS-A-GEBV"))
Instead of pedigree kinship, genomic IBD (G.IBD) kinship can be used
to control inbreeding. To reduce computational time, not all segregating
sites are needed for this calculation. The parameter “ibd.loci” in the
function update_K
controls how many loci per chromosome to
use. The default is 100, and since the chromosomes are 100 cM, this
implies 1 marker per cM.
gen5 <- sim_load("gen5.rda","geno.csv.gz","pheno.csv","ped.csv")
pop <- gen5$pop
SP <- gen5$SP
results$F.coeff[5] <- update_K(pop, SP, K.method="G.IBD",
K.file="kinship.csv",
ped.file="ped.csv", ibd.loci=100, n.core=2)
for (gen in 6:total.gen) {
print(sub("X",gen,"Generation X"))
ans0 <- sim_StageWise("geno.csv.gz","pheno.csv",ploidy=2,
COMA.file="COMA.csv", gen.TP=3)
ans1 <- sim_OCS(pop, SP, n.progeny=500, dF=0.01, solver="MOSEK",
COMA.file="COMA.csv", K.file="kinship.csv")
acc <- sim_accuracy(pop, SP, COMA.file="COMA.csv", K.file="kinship.csv")
results$accuracy[gen] <- acc$ocs
ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
results$n.parent[gen] <- ans2$n.parent
results$n.mate[gen] <- nrow(ans2$matings)
pop <- setPheno(ans2$progeny,simParam = SP)
gp <- genParam(pop, simParam = SP)
results$mu[gen] <- as.numeric(gp$mu)
results$genicVar[gen] <- as.numeric(gp$genicVarG)
update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
ped.file="ped.csv", gen.TP=3)
results$F.coeff[gen] <- update_K(pop, SP, K.method="G.IBD",
K.file="kinship.csv", ped.file="ped.csv",
ibd.loci=100, n.core=2)
}
results.all <- rbind(results.all, data.frame(results[5:20,], method="OCS-G.IBD-GEBV"))
Simulating OMA follows the same code as above but replaces
sim_OCS
with sim_OMA
, which has an additional
argument max.parent
. To limit the size of the computational
problem for OMA, OCS is first used to reduce the number of candidates to
max.parent
. The dF argument for sim_OMA
is a
vector of 2 numbers for the lower and upper bounds. In this case, the
numbers are identical to create an equality constraint.
gen5 <- sim_load("gen5.rda","geno.csv.gz","pheno.csv","ped.csv")
pop <- gen5$pop
SP <- gen5$SP
results$F.coeff[5] <- update_K(pop, SP, K.method="A",
K.file="kinship.csv", ped.file="ped.csv")
for (gen in 6:total.gen) {
print(sub("X",gen,"Generation X"))
ans0 <- sim_StageWise("geno.csv.gz","pheno.csv",ploidy=2,
COMA.file="COMA.csv", gen.TP=3)
ans1 <- sim_OMA(pop, SP, n.progeny=500, dF=c(0.01,0.01), solver="MOSEK",
COMA.file="COMA.csv", K.file="kinship.csv",
max.parent=200)
acc <- sim_accuracy(pop, SP, COMA.file="COMA.csv", K.file="kinship.csv")
results$accuracy[gen] <- acc$oma
ans2 <- sim_mate(pop, SP, matings=ans1$om, total.progeny=500, min.progeny=5)
results$n.parent[gen] <- ans2$n.parent
results$n.mate[gen] <- nrow(ans2$matings)
pop <- setPheno(ans2$progeny,simParam = SP)
gp <- genParam(pop, simParam = SP)
results$mu[gen] <- as.numeric(gp$mu)
results$genicVar[gen] <- as.numeric(gp$genicVarG)
update_TP(pop, SP, geno.file="geno.csv.gz", pheno.file="pheno.csv",
ped.file="ped.csv", gen.TP=3)
results$F.coeff[gen] <- update_K(pop, SP, K.method="A",
K.file="kinship.csv", ped.file="ped.csv")
}
results.all <- rbind(results.all, data.frame(results[5:20,], method="OMA-A-GPMP"))
The average inbreeding rate at time \(t\) follows
\[ \Delta F_t = 1-\left( \frac{1-F_t}{1-F_0} \right)^{1/t}\]
library(dplyr)
results.all$dF <- NA*numeric(nrow(results.all))
fdF <- function(gen,Ft,F0) {1-((1-Ft)/(1-F0))^(1/gen)}
results0 <- filter(results.all,gen==5)
methods <- unique(results0$method)
m <- length(methods)
for (i in 1:m) {
ix <- which(results.all$method==methods[i] & results.all$gen > 5)
results.all$dF[ix] <- 100*fdF(gen=1:15,
Ft=results.all$F.coeff[ix],
F0=results0$F.coeff[results0$method==methods[i]])
}
We can now plot the results.
library(ggplot2)
library(ggpubr)
p1 <- ggplot(results.all,aes(x=gen,y=mu,colour=method)) + geom_line() +
ylab("Genetic Gain") + xlab("Generation") +
scale_colour_brewer(name="",palette="Set1")
p2 <- ggplot(results.all,aes(x=gen,y=genicVar,colour=method)) + geom_line() +
ylab("Genic Variance") + xlab("Generation") +
scale_colour_brewer(name="",palette="Set1")
p3 <- ggplot(results.all,aes(x=gen,y=dF,colour=method)) + geom_line() +
ylab("Inbreeding Rate (%)") + xlab("Generation") + ylim(0,2) +
scale_colour_brewer(name="",palette="Set1")
p4 <- ggplot(results.all,aes(x=gen,y=accuracy,colour=method)) + geom_line() +
ylab("Selection Accuracy") + xlab("Generation") +
scale_colour_brewer(name="",palette="Set1")
p5 <- ggplot(results.all,aes(x=gen,y=n.parent,colour=method)) + geom_line() +
ylab("# Parents") + xlab("Generation") +
scale_colour_brewer(name="",palette="Set1")
p6 <- ggplot(results.all,aes(x=gen,y=n.mate,colour=method)) + geom_line() +
ylab("# Matings") + xlab("Generation") +
scale_colour_brewer(name="",palette="Set1")
ggarrange(p1,p2,p3,p4,p5,p6,nrow=3,ncol=2,common.legend=T,legend = "right")
This short simulation illustrates some key points, which are discussed in greater depth in the publication. Compared to OCS, OMA has higher accuracy for predicting mate performance and sparser mating designs. OMA generated more gain in the short term, but long-term gains were similar.