penalized argument to loglik

helske · Sep 6, 2024 · f141899 · f141899
1 parent 7cb426b
commit f141899
Show file tree

Hide file tree

Showing 15 changed files with 149 additions and 88 deletions.
diff --git a/R/fit_mnhmm.R b/R/fit_mnhmm.R
@@ -29,6 +29,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
     if (is.null(inits$emission_probs)) inits$emission_probs <- NULL
     if (is.null(inits$cluster_probs)) inits$cluster_probs <- NULL
   }
+  model_code <- stanmodels[[attr(model, "type")]]
   if (restarts > 0L) {
     if (threads > 1L) {
       plan(multisession, workers = threads)
@@ -50,7 +51,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
       do.call(
         optimizing, 
         c(list(
-          stanmodels[[attr(model, "type")]], init = init,
+          model_code, init = init,
           data = list(
             N = model$n_sequences,
             T = model$sequence_lengths,
@@ -91,7 +92,7 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
   out <-  do.call(
     optimizing, 
     c(list(
-      stanmodels[[attr(model, "type")]], 
+      model_code, 
       data = list(
         N = model$n_sequences,
         T = model$sequence_lengths,
@@ -116,12 +117,15 @@ fit_mnhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
     ), dots)
   )[c("par", "value", "return_code", "hessian")]
 
+  model$coefficients <- out$par[c("beta_i_raw", "beta_s_raw", "beta_o_raw")]
+  model$stan_model <- model_code
   model$estimation_results <- list(
-    parameters = out$par[lengths(out$par) > 0], 
     hessian = out$hessian,
-    logLik = out$value, 
+    penalized_loglik = out$value, 
+    loglik = out$par["log_lik"], 
+    penalty = out$par["prior"],
     return_code = out$return_code,
-    logLiks_of_restarts = if(restarts > 1L) logliks else NULL, 
+    plogliks_of_restarts = if(restarts > 1L) logliks else NULL, 
     return_codes_of_restarts = if(restarts > 1L) return_codes else NULL,
     stan_model = stanmodels[[attr(model, "type")]]
   )

diff --git a/R/fit_nhmm.R b/R/fit_nhmm.R
@@ -25,6 +25,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
     if (is.null(inits$transition_probs)) inits$transition_probs <- NULL
     if (is.null(inits$emission_probs)) inits$emission_probs <- NULL
   }
+  model_code <- stanmodels[[attr(model, "type")]]
   if (restarts > 0L) {
     if (threads > 1L) {
       plan(multisession, workers = threads)
@@ -46,7 +47,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
       do.call(
         optimizing, 
         c(list(
-          stanmodels[[attr(model, "type")]], init = init,
+          model_code, init = init,
           data = list(
             N = model$n_sequences,
             T = model$sequence_lengths,
@@ -84,7 +85,7 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
   out <-  do.call(
     optimizing, 
     c(list(
-      stanmodels[[attr(model, "type")]], 
+      model_code, 
       data = list(
         N = model$n_sequences,
         T = model$sequence_lengths,
@@ -106,14 +107,16 @@ fit_nhmm <- function(model, inits, init_sd, restarts, threads, verbose, ...) {
     ), dots)
   )[c("par", "value", "return_code", "hessian")]
 
+  model$coefficients <- out$par[c("beta_i_raw", "beta_s_raw", "beta_o_raw")]
+  model$stan_model <- model_code
   model$estimation_results <- list(
-    parameters = out$par[lengths(out$par) > 0], 
     hessian = out$hessian,
-    logLik = out$value, 
+    penalized_loglik = out$value, 
+    loglik = out$par["log_lik"], 
+    penalty = out$par["prior"],
     return_code = out$return_code,
-    logLiks_of_restarts = if(restarts > 1L) logliks else NULL, 
-    return_codes_of_restarts = if(restarts > 1L) return_codes else NULL,
-    stan_model = stanmodels[[attr(model, "type")]]
+    plogliks_of_restarts = if(restarts > 1L) logliks else NULL, 
+    return_codes_of_restarts = if(restarts > 1L) return_codes else NULL
   )
   model
 }
diff --git a/R/forwardBackward.R b/R/forwardBackward.R
@@ -155,13 +155,13 @@ forward_backward.nhmm <- function(model, forward_only = FALSE,
   X_emission <- aperm(model$X_emission, c(3, 1, 2))
   obsArray <- create_obsArray(model)
   beta_i_raw <- stan_to_cpp_initial(
-    model$estimation_results$parameters$beta_i_raw
+    model$coefficientsbeta_i_raw
   )
   beta_s_raw <- stan_to_cpp_transition(
-    model$estimation_results$parameters$beta_s_raw
+    model$coefficientsbeta_s_raw
   )
   beta_o_raw <- stan_to_cpp_emission(
-    model$estimation_results$parameters$beta_o_raw,
+    model$coefficientsbeta_o_raw,
     1,
     model$n_channels > 1
   )
@@ -237,19 +237,19 @@ forward_backward.mnhmm <- function(model, forward_only = FALSE,
   S <- model$n_states
   D <- model$n_clusters
   beta_i_raw <- stan_to_cpp_initial(
-    model$estimation_results$parameters$beta_i_raw,
+    model$coefficientsbeta_i_raw,
     model$n_clusters
   )
   beta_s_raw <- stan_to_cpp_transition(
-    model$estimation_results$parameters$beta_s_raw,
+    model$coefficientsbeta_s_raw,
     model$n_clusters
   )
   beta_o_raw <- stan_to_cpp_emission(
-    model$estimation_results$parameters$beta_o_raw,
+    model$coefficientsbeta_o_raw,
     model$n_clusters,
     model$n_channels > 1
   )
-  theta_raw <- model$estimation_results$parameters$theta_raw
+  theta_raw <- model$coefficientstheta_raw
   out <- list()
   if (model$n_channels == 1) {
     out$forward_probs <- forward_mnhmm_singlechannel(

diff --git a/R/get_coefs.R b/R/get_coefs.R
@@ -18,9 +18,9 @@ coef.nhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
   )
   S <- object$n_states
   M <- object$n_symbols
-  beta_i_raw <- c(object$estimation_results$parameters$beta_i_raw)
-  beta_s_raw <- c(object$estimation_results$parameters$beta_s_raw)
-  beta_o_raw <- c(object$estimation_results$parameters$beta_o_raw)
+  beta_i_raw <- c(object$coefficients$beta_i_raw)
+  beta_s_raw <- c(object$coefficients$beta_s_raw)
+  beta_o_raw <- c(object$coefficients$beta_o_raw)
   beta_i <- data.frame(
     state = object$state_names[-1],
     parameter = rep(object$coef_names_initial, each = S - 1),
@@ -93,10 +93,10 @@ coef.mnhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
   S <- object$n_states
   M <- object$n_symbols
   D <- object$n_clusters
-  beta_i_raw <- c(object$estimation_results$parameters$beta_i_raw)
-  beta_s_raw <- c(object$estimation_results$parameters$beta_s_raw)
-  beta_o_raw <- c(object$estimation_results$parameters$beta_o_raw)
-  theta_raw <- c(object$estimation_results$parameters$theta_raw)
+  beta_i_raw <- c(object$coefficients$beta_i_raw)
+  beta_s_raw <- c(object$coefficients$beta_s_raw)
+  beta_o_raw <- c(object$coefficients$beta_o_raw)
+  theta_raw <- c(object$coefficients$theta_raw)
   beta_i <- data.frame(
     state = rep(object$state_names[-1], each = D),
     parameter = rep(object$coef_names_initial, each = (S - 1) * D),
@@ -172,4 +172,4 @@ coef.mnhmm <- function(object, nsim = 0, probs = c(0.025, 0.5, 0.975), ...) {
     beta_emission = beta_o,
     beta_cluster = theta
   )
-}
+}
diff --git a/R/get_probs.R b/R/get_probs.R
@@ -32,13 +32,13 @@ get_probs.nhmm <- function(model, newdata = NULL, nsim = 0,
   M <- model$n_symbols
   C <- model$n_channels
   beta_i_raw <- stan_to_cpp_initial(
-    model$estimation_results$parameters$beta_i_raw
+    model$coefficientsbeta_i_raw
   )
   beta_s_raw <- stan_to_cpp_transition(
-    model$estimation_results$parameters$beta_s_raw
+    model$coefficientsbeta_s_raw
   )
   beta_o_raw <- stan_to_cpp_emission(
-    model$estimation_results$parameters$beta_o_raw,
+    model$coefficientsbeta_o_raw,
     1,
     C > 1
   )
@@ -118,31 +118,31 @@ get_probs.mnhmm <- function(model, newdata = NULL, nsim = 0,
   X_transition <- aperm(model$X_transition, c(3, 1, 2))
   X_emission <- aperm(model$X_emission, c(3, 1, 2))
   X_cluster <- t(model$X_cluster)
-  theta_raw <- model$estimation_results$parameters$theta_raw
+  theta_raw <- model$coefficientstheta_raw
   initial_probs <- vector("list", D)
   transition_probs <- vector("list", D)
   emission_probs <- vector("list", D)
   for (d in seq_len(D)) {
     beta_i_raw <- stan_to_cpp_initial(
       matrix(
-        model$estimation_results$parameters$beta_i_raw[d, ,], 
+        model$coefficientsbeta_i_raw[d, ,], 
         S - 1, nrow(X_initial)
       )
     )
     beta_s_raw <- stan_to_cpp_transition(
       array(
-        model$estimation_results$parameters$beta_s_raw[d, , ,], 
+        model$coefficientsbeta_s_raw[d, , ,], 
         dim = c(S, S - 1, nrow(X_transition))
       )
     )
     beta_o_raw <- stan_to_cpp_emission(
       if (C == 1) {
         array(
-          model$estimation_results$parameters$beta_o_raw[d, , ,],
+          model$coefficientsbeta_o_raw[d, , ,],
           dim = c(S, M - 1, nrow(X_emission))
         )
       } else {
-        model$estimation_results$parameters$beta_o_raw[d, ]
+        model$coefficientsbeta_o_raw[d, ]
       },
       1,
       C > 1
@@ -224,4 +224,4 @@ get_probs.mnhmm <- function(model, newdata = NULL, nsim = 0,
     emission_probs = remove_voids(model, emission_probs),
     cluster_probs = cluster_probs
   )
-}
+}
diff --git a/R/hidden_paths.R b/R/hidden_paths.R
@@ -77,13 +77,13 @@ hidden_paths.nhmm <- function(model, respect_void = TRUE, ...) {
   X_emission <- aperm(model$X_emission, c(3, 1, 2))
   obsArray <- create_obsArray(model)
   beta_i_raw <- stan_to_cpp_initial(
-    model$estimation_results$parameters$beta_i_raw
+    model$coefficientsbeta_i_raw
   )
   beta_s_raw <- stan_to_cpp_transition(
-    model$estimation_results$parameters$beta_s_raw
+    model$coefficientsbeta_s_raw
   )
   beta_o_raw <- stan_to_cpp_emission(
-    model$estimation_results$parameters$beta_o_raw,
+    model$coefficientsbeta_o_raw,
     1,
     model$n_channels > 1
   )
@@ -114,19 +114,19 @@ hidden_paths.mnhmm <- function(model, respect_void = TRUE, ...) {
   S <- model$n_states
   D <- model$n_clusters
   beta_i_raw <- stan_to_cpp_initial(
-    model$estimation_results$parameters$beta_i_raw,
+    model$coefficientsbeta_i_raw,
     model$n_clusters
   )
   beta_s_raw <- stan_to_cpp_transition(
-    model$estimation_results$parameters$beta_s_raw,
+    model$coefficientsbeta_s_raw,
     model$n_clusters
   )
   beta_o_raw <- stan_to_cpp_emission(
-    model$estimation_results$parameters$beta_o_raw,
+    model$coefficientsbeta_o_raw,
     model$n_clusters,
     model$n_channels > 1
   )
-  theta_raw <- model$estimation_results$parameters$theta_raw
+  theta_raw <- model$coefficientstheta_raw
   if (model$n_channels == 1) {
     out <- viterbi_mnhmm_singlechannel(
       beta_i_raw, X_initial,

diff --git a/R/logLik.R b/R/logLik.R
@@ -1,7 +1,6 @@
-#' Log-likelihood of the Hidden Markov Model
+#' Log-likelihood of a Hidden Markov Model
 #'
-#'
-#' @param object Ahidden Markov model.
+#' @param object A hidden Markov model.
 #' @param partials Return a vector containing the individual contributions of 
 #' each sequence to the total log-likelihood. The default is `FALSE`, which 
 #' returns the sum of all log-likelihood components.
@@ -13,7 +12,7 @@
 #' @param ... Ignored.
 #' @return Log-likelihood of the hidden Markov model. This is an object of class
 #' `logLik` with attributes `nobs` and `df` inherited from the model object.
-#' @rdname logLik
+#' @rdname logLik_hmm
 #' @export
 logLik.hmm <- function(object, partials = FALSE, threads = 1, 
                        log_space = TRUE, ...) {
@@ -42,7 +41,7 @@ logLik.hmm <- function(object, partials = FALSE, threads = 1,
     df = df, nobs = nobs
   )
 }
-#' @rdname logLik
+#' @rdname logLik_hmm
 #' @export
 logLik.mhmm <- function(object, partials = FALSE, threads = 1, 
                         log_space = TRUE, ...) {
@@ -72,26 +71,57 @@ logLik.mhmm <- function(object, partials = FALSE, threads = 1,
     df = df, nobs = nobs
   )
 }
-#' @rdname logLik
+#' Log-likelihood of a Non-homogeneous Hidden Markov Model
+#'
+#' @param object A hidden Markov model.
+#' @param partials Return a vector containing the individual contributions of 
+#' each sequence to the total log-likelihood. The default is `FALSE`, which 
+#' returns the sum of all log-likelihood components.
+#' @param penalized if `TRUE` (the default), returns the final penalized 
+#' log-likelihood value from the optimization. If `FALSE`, returns the standard 
+#' log-likelihood of non-homogenous HMM. if `partials = TRUE`, this argument is
+#' set to `FALSE`.
+#' @param ... Ignored.
+#' @return (Penalized) log-likelihood of the hidden Markov model. This is an 
+#' object of class `logLik` with attributes `nobs` and `df` inherited from the 
+#' model object.
+#' @rdname logLik_nhmm
 #' @export
-logLik.nhmm <- function(object, partials = FALSE, ...) {
+#' @export
+logLik.nhmm <- function(object, partials = FALSE, penalized = TRUE, ...) {
   df <- attr(object, "df")
   nobs <- attr(object, "nobs")
-  out <- forward_backward(object, forward_only = TRUE)
-  ll <- apply(out$forward_probs[, object$length_of_sequences, ], 2, logSumExp)
+  if (partials) {
+    out <- forward_backward(object, forward_only = TRUE)
+    ll <- apply(out$forward_probs[, object$length_of_sequences, ], 2, logSumExp)
+  } else {
+    if (penalized) {
+      ll <- object$estimation_results$penalized_loglik
+    } else {
+      ll <- object$estimation_results$loglik
+    }
+  }
   structure(
     if (partials) ll else sum(ll), 
     class = "logLik", 
     df = df, nobs = nobs
   )
 }
-#' @rdname logLik
+#' @rdname logLik_nhmm
 #' @export
-logLik.mnhmm <- function(object, partials = FALSE, ...) {
+logLik.mnhmm <- function(object, partials = FALSE, penalized = TRUE,...) {
   df <- attr(object, "df")
   nobs <- attr(object, "nobs")
-  out <- forward_backward(object, forward_only = TRUE)
-  ll <- apply(out$forward_probs[, object$length_of_sequences, ], 2, logSumExp)
+  if (partials) {
+    out <- forward_backward(object, forward_only = TRUE)
+    ll <- apply(out$forward_probs[, object$length_of_sequences, ], 2, logSumExp)
+  } else {
+    if (penalized) {
+      ll <- object$estimation_results$penalized_loglik
+    } else {
+      ll <- object$estimation_results$loglik
+    }
+  }
   structure(
     if (partials) ll else sum(ll), 
     class = "logLik", 

diff --git a/R/most_probable_cluster.R b/R/most_probable_cluster.R
@@ -23,11 +23,7 @@ most_probable_cluster <- function(x, type = "viterbi", hp = NULL) {
     clusters <- numeric(x$n_sequences)
     for (i in seq_len(x$n_clusters)) {
       # Find matching cluster names from the first hidden state of each individual
-      if (length(unique(state_names)) == length(state_names)) {
-        idx <- which(hp[, 1] %in% x$state_names[[i]])
-      } else {
-        idx <- which(grepl(paste0(x$cluster_names[i], ": "), hp[, 1]))
-      }
+      idx <- which(grepl(paste0(x$cluster_names[i], ": "), hp[, 1]))
       clusters[idx] <- i
     }
   } else {