## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  fig.width  = 7,
  fig.height = 4
)

## ----simulate-----------------------------------------------------------------
library(twoPhaseGAS)

data.table::setDTthreads(1)

set.seed(42)
data <- DataGeneration_TPD(
  Beta0  = 2,    # intercept
  Beta1  = 0.5,  # genetic effect (G on Y)
  Disp = 1,      # error variance for default gaussian family
  N      = 3000, # phase 1 cohort size
  LD.r   = 0.75, # LD (r) between causal SNP G and GWAS SNP Z
  P_g    = 0.20, # MAF of G
  P_z    = 0.30  # MAF of Z
)
head(data)

## ----subsample----------------------------------------------------------------
set.seed(1)
n2   <- 500                              # phase 2 size
R    <- rep(0L, nrow(data))
R[sample(nrow(data), n2)] <- 1L         # 1 = selected for phase 2

data[R == 0, c("G1")]  <- NA # Make all phase 1 data complement G1 values  missing

cat("Phase 1 complement:", sum(R == 0), "individuals\n")
cat("Phase 2:           ", sum(R == 1), "individuals\n")

## ----null-model---------------------------------------------------------------
res_Ho <- twoPhaseSPML(
  formula = Y ~ Z,
  miscov  = ~ G1,
  auxvar  = ~ Z,
  data   = data
)

print(res_Ho)

## ----alt-model----------------------------------------------------------------
res_Ha <- twoPhaseSPML(
  formula = Y ~ Z + G1,
  miscov  = ~ G1,
  auxvar  = ~ Z,
  data   = data
)

print(res_Ha)

## ----access-------------------------------------------------------------------
# Regression coefficients
res_Ha$theta

# Log-likelihood
res_Ha$ll

# Estimated joint distribution of G1 and Z
head(res_Ha$qGZ)

# Wald statistic and degrees of freedom
cat("Wobs =", res_Ha$Wobs, "  df =", res_Ha$df, "\n")
cat("p-value =", pchisq(res_Ha$Wobs, df = res_Ha$df, lower.tail = FALSE), "\n")

## ----warm-start, eval = FALSE-------------------------------------------------
#  # Use null-model estimates as starting point for the alternative model
#  res_warm <- twoPhaseSPML(
#    formula     = Y ~ Z + G1,
#    miscov      = ~ G1,
#    auxvar      = ~ Z,
#    data       = data,
#    start.values = list(betas = res_Ho$theta,
#                        q     = res_Ho$qGZ)
#  )

## ----session------------------------------------------------------------------
sessionInfo()

