3.2 Calculation of Epistemic Uncertainty as a Function of Data

This section details how the graphs presented in Figure 2.7 and Figure 2.8 were created. The graphs illustrate how the epistemic uncertainty regarding the value of two different covariates behaves as a function of available observational data. Creation of simulated observational data sets, fitting of data to the proportional hazards model and creation of the graphs was all done using the language ’R’ (R Development Core Team 2011), and the corresponding survival library (Therneau and Splus-¿R port by Thomas Lumley 2011) in particular.

The following Rscript code was used to generate the graphs:

  #!/usr/bin/Rscript
  library(survival)
  
  n_components <- 1000
  n_hours_observed <- 45
  failure_rate <- 1/60
  a1 <- 0.05
  a2 <- 0.1
  
  print("Constructing data frame with simulated observations")
  ep_vs_data <- matrix(0,n_components*n_hours_observed,5)
  colnames(ep_vs_data) <- c(start’,stop’,z1’,z2’,failure’)
  row <- 0
  for (c in 1:n_components) {
      for (h in 1:n_hours_observed) {
       row <- row+1
       start <- h-1
   stop <- h
   z1 <- abs(rnorm(1,0,10))
   z2 <- runif(1,0,5)
   if (failure_rate*exp(a1*z1+a2*z2) >= runif(1)) {
      # failure occured
      ep_vs_data[row,] = c(start,stop,z1,z2,1)
      break
   } else {
      ep_vs_data[row,] = c(start,stop,z1,z2,0)
   }
      }
  }
  ep_vs_data <- as.data.frame(ep_vs_data)
  
  # Create estimated parameter variance as a function of the 
  # number of observations used to create the parameter estimate
  print("Estimating coefficients")
  step <- 100
  length <- floor(row/step)
  ep_vs_data_a1 <- matrix(0,length,2)
  ep_vs_data_a2 <- matrix(0,length,2)
  for (i in 1:length) {
      ep_vs_data_fit <- coxph(Surv(start,stop,failure) ~ z1 + z2,
                          data=ep_vs_data[1:(step*i),])
      attach(ep_vs_data_fit)
      ep_vs_data_a1[i,1] = coefficients[1]
      ep_vs_data_a1[i,2] = var[1,1]
      ep_vs_data_a2[i,1] = coefficients[2]
      ep_vs_data_a2[i,2] = var[2,2]
      detach()
  }
  
  # Plot estimated values for the two coefficients
  print("Plotting results")
  # a1
  png(filename="estimated_a1_values.png",width=800,height=600)
  plot((1:length)*step,ep_vs_data_a1[1:length,1],
       main="Coefficient a1",
       xlab="Hours of observation",
       ylab="Estimated value",
       xlim=c(0,length*step),ylim=c(0.0,0.1))
  # Add upper and lower confidence bounds
  lines((15:length)*step,qnorm(0.975,
                               ep_vs_data_a1[15:length,1],
                               sqrt(ep_vs_data_a1[15:length,2])))
  lines((15:length)*step,qnorm(1-0.975,
                               ep_vs_data_a1[15:length,1],
                               sqrt(ep_vs_data_a1[15:length,2])))
  dev.off()
  # a2
  png(filename="estimated_a2_values.png",width=800,height=600)
  plot((1:length)*step,ep_vs_data_a2[1:length,1],
       main="Coefficient a2",
       xlab="Hours of observation",
       ylab="Estimated value",
       xlim=c(0,length*step),ylim=c(-0.1,0.3))
  # Add upper and lower confidence bounds
  lines((15:length)*step,qnorm(0.975,
                               ep_vs_data_a2[15:length,1],
                               sqrt(ep_vs_data_a2[15:length,2])))
  lines((15:length)*step,qnorm(1-0.975,
                               ep_vs_data_a2[15:length,1],
                               sqrt(ep_vs_data_a2[15:length,2])))
  dev.off()
  
  summary(ep_vs_data_fit)
  print("done.")