Example on a simulated dataset • SHIR

We propose a simple example in which we apply SHIR to a generated dataset that mimicks data from M=4 sites. We show how to fit and derive summary data at each local site, and then aggregate the derived data.

Load the library:

library(SHIR)

Load the simulated dataset:

data(simu_data)

The variables X are the individual covariates matrices, Y is the individual response vector:

X_lst <- simu_data$X
Y_lst <- simu_data$Y

Number of sites:

M <- length(X_lst)

Vector for sample size of the local sites:

length_lst <- c()

Initialize the lists of the derived hessian matrices, gradient vectors and lasso estimators respectively:

I_lst <- vector('list', M)
U_lst <- vector('list', M)
beta_lst <- vector('list', M)

Locally fit and derive the individual data at site m:

system.time(
for (m in 1:M){
  Y <- Y_lst[[m]]
  X <- X_lst[[m]]

  local_summary_m <- Local_fit(Y, X, lambda_lst = 0.1 * c(4:20) * sqrt((log(p)) / n))

  I_lst[[m]] <- local_summary_m$hessian
  U_lst[[m]] <- local_summary_m$gradient
  beta_lst[[m]] <- local_summary_m$beta
  length_lst <- c(length_lst, length(Y))
}
)
#>    user  system elapsed 
#>   1.247   0.079   1.465

Aggregate the derived data:

SHIR_train <- SHIR_fit(I_lst, U_lst, length_lst,
                       lambda_lst = sqrt(n * log(p)) * 0.3 * c(4:30),
                       lambda_g_lst = c(0.75, 1), tune = 'BIC')

For convenience, one can also use the default parameters:

system.time(
invisible(
SHIR_train <- SHIR_fit(I_lst, U_lst, length_lst)
)
)
#> [1] "Finishing: 2.38%"
#> [1] "Finishing: 4.76%"
#> [1] "Finishing: 7.14%"
#> [1] "Finishing: 9.52%"
#> [1] "Finishing: 11.9%"
#> [1] "Finishing: 14.29%"
#> [1] "Finishing: 16.67%"
#> [1] "Finishing: 19.05%"
#> [1] "Finishing: 21.43%"
#> [1] "Finishing: 23.81%"
#> [1] "Finishing: 26.19%"
#> [1] "Finishing: 28.57%"
#> [1] "Finishing: 30.95%"
#> [1] "Finishing: 33.33%"
#> [1] "Finishing: 35.71%"
#> [1] "Finishing: 38.1%"
#> [1] "Finishing: 40.48%"
#> [1] "Finishing: 42.86%"
#> [1] "Finishing: 45.24%"
#> [1] "Finishing: 47.62%"
#> [1] "Finishing: 50%"
#> [1] "Finishing: 52.38%"
#> [1] "Finishing: 54.76%"
#> [1] "Finishing: 57.14%"
#> [1] "Finishing: 59.52%"
#> [1] "Finishing: 61.9%"
#> [1] "Finishing: 64.29%"
#> [1] "Finishing: 66.67%"
#> [1] "Finishing: 69.05%"
#> [1] "Finishing: 71.43%"
#> [1] "Finishing: 73.81%"
#> [1] "Finishing: 76.19%"
#> [1] "Finishing: 78.57%"
#> [1] "Finishing: 80.95%"
#> [1] "Finishing: 83.33%"
#> [1] "Finishing: 85.71%"
#> [1] "Finishing: 88.1%"
#> [1] "Finishing: 90.48%"
#> [1] "Finishing: 92.86%"
#> [1] "Finishing: 95.24%"
#> [1] "Finishing: 97.62%"
#> [1] "Finishing: 100%"
#>    user  system elapsed 
#> 180.364  33.887 240.297

Fitted beta. The m-th column loads the fitted beta for the m-th site, the first row displays the intercepts:

head(SHIR_train$min.beta)
#>             [,1]        [,2]        [,3]        [,4]
#> [1,] -0.01764867 -0.06554282  0.08749004  0.03461409
#> [2,]  0.22592580  0.22592580  0.22592580  0.22592580
#> [3,] -0.26696537 -0.26696537 -0.26696537 -0.26696537
#> [4,]  0.22921814  0.02390544  0.24047801  0.08244291
#> [5,] -0.09828718 -0.38953301 -0.10038749 -0.40982462
#> [6,]  0.28998698  0.13737553  0.31343117  0.15741986

To obtain the fitted mean effect mu:

head(rowMeans(SHIR_train$min.beta))
#> [1]  0.00972816  0.22592580 -0.26696537  0.14401112 -0.24950808  0.22455339

To obtain the fitted heterogeneous effect alpha (the m-th column loads alpha for the m-th site):

head(SHIR_train$min.beta - rowMeans(SHIR_train$min.beta))
#>             [,1]        [,2]       [,3]        [,4]
#> [1,] -0.02737683 -0.07527098 0.07776188  0.02488593
#> [2,]  0.00000000  0.00000000 0.00000000  0.00000000
#> [3,]  0.00000000  0.00000000 0.00000000  0.00000000
#> [4,]  0.08520702 -0.12010568 0.09646688 -0.06156822
#> [5,]  0.15122089 -0.14002494 0.14912059 -0.16031655
#> [6,]  0.06543360 -0.08717786 0.08887779 -0.06713353