Site icon R-bloggers

Additive vs. dominance two-allele genetic model by DIC

[This article was first published on Gregor Gorjanc (gg), and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Two-allele model has alleles A1 and A2 and genotypes A1A1, A1A2, and A2A2. This model can be fitted using only additive effect (restricted model) or additve+dominance effect. (full model) The later model has one parameter more than the former. I was using this model lately and got weird DIC results – the full model had less number of parameters. Bellow is a simulation (using R and BUGS), that shows expected DIC results.
## Needed packages and functions
library(package="e1071")
library(package="doBy")
library(package="R2WinBUGS")

rvalue <- function(n, p, value, mean=0, sdE=0)
{
  if(!is.matrix(value)) value <- as.matrix(value)
 
  ## Frequencies
  k <- length(p)
  q <- 1 - p
  P <- p * p
  Q <- q * q
  H <- 2 * p * q

  ## Allocate matrix of arbitrary values by loci
  x <- matrix(data=1, nrow=n, ncol=k)
  y <- matrix(data=1, nrow=n, ncol=k)

  ## Simulate arbitrary values
  for(i in 1:k) {
    x[, i] <- rdiscrete(n=n, probs=c(P[i], H[i], Q[i]))
    y[, i] <- value[x[, i], i]
  }
  x <- x - 1 ## convert 1:3 to 0:2, i.e., number of A1 alleles

  ## Sum all values
  y <- rowSums(y)

  ## Add mean and normal deviate (error)
  y <- y + rnorm(n=n, mean=mean, sd=sdE)

  return(as.data.frame(cbind(y, x)))
}

prepairData <- function(x, dominance=FALSE)
{
  ret <- list()
  ret$n <- nrow(x)
  ret$y <- x$y
  if(!dominance) {
    ret$x <- x$V2 - 1 ## scale to the mean
  } else {
    ret$x <- x$V2 + 1 ## BUGS does not allow zero indexing!
  }
  ret
}

### SIMULATE DATA
###-----------------------------------------------------------------------

## --- Purely additive ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 0, 1)
p <- 0.5

## Simulate
podatkiA <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiA$y, xlab="Phenotype")

plot(podatkiA$y ~ jitter(podatkiA$V2), xlab="Genotype", ylab="Phenotype")

tmpA1 <- lm(podatkiA$y ~ podatkiA$V2)
abline(coef(tmpA1), lwd="2", col="blue")
tmpA2 <- summaryBy(y ~ V2, data=podatkiA)
points(tmpA2$y.mean ~ tmpA2$V2, pch=19, col="red")

## --- Additive + Dominance ---

set.seed(seed=19791123)

## Model parameters
n <- 1000
mu <- 100
sigma <- 1
value <- c(-1, 1, -1)
p <- 0.5

## Simulate
podatkiD <- rvalue(n=n, p=p, value=value, mean=mu, sdE=sigma)

par(mfrow=c(2, 1))
hist(podatkiD$y, xlab="Phenotype")

plot(podatkiD$y ~ jitter(podatkiD$V2), xlab="Genotype", ylab="Phenotype")

tmpD1 <- lm(podatkiD$y ~ podatkiD$V2)
abline(coef(tmpD1), lwd="2", col="blue")
tmpD2 <- summaryBy(y ~ V2, data=podatkiD)
points(tmpD2$y.mean ~ tmpD2$V2, pch=19, col="red")

### ESTIMATE MODEL PARAMETERS AND DIC
###-----------------------------------------------------------------------

modelA <- function()
{
  ## --- Additive model (regression on gene content) ---
  ## Phenotypes
  for(i in 1:n) {
    y[i] ~ dnorm(mu[i], tau2)
    mu[i] <- a + b * x[i]
  }
  ## Location prior
  a ~ dnorm(0, 1.0E-6)  
  b ~ dnorm(0, 1.0E-6)
  ## Variance prior
  lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
  tau2 <- pow(sigma, -2)
  sigma <- exp(lsigma)
}

modelD <- function()
{
  ## --- Additive + Dominance model (genotype as a factor) ---
  ## Phenotypes
  for(i in 1:n) {
    y[i] ~ dnorm(mu[i], tau2)
    mu[i] <- g[x[i]]
  }
  ## Location prior
  g[1] ~ dnorm(0, 1.0E-6)
  g[2] ~ dnorm(0, 1.0E-6)
  g[3] ~ dnorm(0, 1.0E-6)  
  ## Variance prior
  lsigma ~ dunif(-10, 10) ## sigma between 4.539993e-05 and 2.202647e+04
  tau2 <- pow(sigma, -2)
  sigma <- exp(lsigma)
}

tmpAA <- prepairData(x=podatkiA)
tmpAD <- prepairData(x=podatkiA, dominance=TRUE)
tmpDA <- prepairData(x=podatkiD)
tmpDD <- prepairData(x=podatkiD, dominance=TRUE)

initsA <- function() list(a=rnorm(n=1, mean=mu), b=rnorm(n=1), lsigma=0)
initsD <- function() list(g=rnorm(n=3, mean=mu),               lsigma=0)

(fitAA <- bugs(model=modelA, data=tmpAA, inits=initsA,
               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
               bugs.seed=19791123, parameters=c("a", "b", "sigma"),
               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
##            mean  sd   2.5%    25%  50%  75%  97.5% Rhat n.eff
## a          99.9 0.0   99.9   99.9  100  100  100.0    1  1500
## b           1.0 0.0    0.9    1.0    1    1    1.1    1  1500
## sigma       1.0 0.0    1.0    1.0    1    1    1.0    1   400
## deviance 2828.3 2.4 2826.0 2826.0 2828 2829 2834.0    1  1500
## pD = 2.9 and DIC = 2831.2

(fitAD <- bugs(model=modelD, data=tmpAD, inits=initsD,
               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
               bugs.seed=19791123, parameters=c("g", "sigma"),
               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
##            mean  sd   2.5%    25%    50%  75%  97.5% Rhat n.eff
## g[1]       98.9 0.1   98.8   98.9   98.9   99   99.1    1   660
## g[2]       99.9 0.0   99.8   99.9   99.9  100  100.0    1  1500
## g[3]      101.0 0.1  100.8  100.9  101.0  101  101.1    1  1500
## sigma       1.0 0.0    1.0    1.0    1.0    1    1.0    1  1300
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0    1  1500
## pD = 4.0 and DIC = 2833.2

## pD seem OK and DIC agrees with simulated data - it is worse
## for AD model for data following additive model

(fitDA <- bugs(model=modelA, data=tmpDA, inits=initsA,
               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
               bugs.seed=19791123, parameters=c("a", "b", "sigma"),
               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
##            mean  sd   2.5%    25%    50%    75%  97.5% Rhat n.eff
## a         100.0 0.0   99.9  100.0  100.0  100.0  100.1    1   570
## b           0.0 0.1   -0.1    0.0    0.0    0.1    0.2    1  1500
## sigma       1.4 0.0    1.3    1.4    1.4    1.4    1.5    1  1300
## deviance 3512.9 2.6 3510.0 3511.0 3512.0 3514.0 3520.0    1  1500
## pD = 3.0 and DIC = 3515.9

(fitDD <- bugs(model=modelD, data=tmpDD, inits=initsD,
               n.chains=3, n.burnin=5000, n.iter=10000, n.thin=10,
               bugs.seed=19791123, parameters=c("g", "sigma"),
               bugs.directory="C:/Programs/BUGS/WinBUGS14_orig"))
##            mean  sd   2.5%    25%    50%  75%  97.5% Rhat n.eff
## g[1]       98.9 0.1   98.8   98.9   99.0   99   99.1    1  1500
## g[2]      100.9 0.1  100.8  100.9  100.9  101  101.0    1  1500
## g[3]       99.0 0.1   98.8   98.9   99.0   99   99.1    1  1500
## sigma       1.0 0.0    1.0    1.0    1.0    1    1.0    1   530
## deviance 2829.2 2.9 2826.0 2827.0 2829.0 2831 2837.0    1  1500
## pD = 4.1 and DIC = 2833.3

## pD seems OK and DIC also agrees with the model --> dominance
## model is better for this dataset

To leave a comment for the author, please follow the link and comment on their blog: Gregor Gorjanc (gg).

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.