zdk123 · zdk123 · Nov 7, 2021 · Nov 7, 2021 · Dec 4, 2021 · Dec 4, 2021
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SpiecEasi
 Title: Sparse Inverse Covariance for Ecological Statistical Inference
-Version: 1.1.3
+Version: 2.0.1
 Authors@R: c(
     person("Zachary", "Kurtz", role = c("aut", "cre"), email="[email protected]"),
     person("Christian", "Mueller", role = "aut"),
@@ -10,7 +10,7 @@ Authors@R: c(
 Description: Estimate networks from the precision matrix of compositional microbial abundance data.
 Encoding: UTF-8
 Depends:
-   R (>= 3.6.0),
+   R (>= 4.0.0),
 Imports:
    stats,
    methods,
@@ -21,7 +21,8 @@ Imports:
    MASS,
    VGAM,
    Matrix (>= 1.5),
-   glmnet
+   glmnet,
+   latentcor (>= 1.2.0),
 Suggests:
    parallel,
    boot,

diff --git a/NAMESPACE b/NAMESPACE
@@ -34,13 +34,19 @@ export(getOptiCov)
 export(getRefit)
 export(getStability)
 export(get_comm_params)
+export(get_types)
 export(graph2prec)
+export(is.count)
+export(is.normalized)
+export(latentcor)
 export(make_graph)
+export(mclr)
 export(multi.spiec.easi)
 export(neff)
 export(norm_pseudo)
 export(norm_rdiric)
 export(norm_to_total)
+export(pclr)
 export(prec2cov)
 export(pval.sparccboot)
 export(qqdplot_comm)

diff --git a/R/SparseICov.R b/R/SparseICov.R
@@ -9,6 +9,7 @@
 #' @param npn perform Nonparanormal (npn) transformation before estimation?
 #' @param verbose print progress to standard out
 #' @param cov.output return the covariance matrix as well.
+#' @param types if cov.method is latentcor, the column types parameter can be supplied as a character vector or (if NULL) inferred
 #' @param ... further arguments to huge/estimation functions. See details.
 #' @details
 #' This is a wrapper function for sparse iCov estimations performed by glasso in the huge package.
@@ -42,15 +43,19 @@
 #'  image(as.matrix(est.log$path[[3]][1:5,1:5]))
 #'  image(as.matrix(est.clr$path[[3]][1:5,1:5]))
 #'  image(as.matrix(est.f$path[[3]][1:5,1:5]))
-sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE, ...) {
+sparseiCov <- function(data, method, cov.fun='cor', npn=FALSE, verbose=FALSE, cov.output = TRUE, types=NULL, ...) {
 
   if (npn) data <- huge::huge.npn(data, verbose=verbose)
+  if (isSymmetric(data)) SigmaO <- data
+  else {
+    SigmaO <- .match.cov(cov.fun, data, types)
+  }
 
   args <- list(...)
   method <- switch(method, glasso = "glasso", mb = "mb", stop("Method not supported"))
 
   if (is.null(args$lambda.min.ratio)) args$lambda.min.ratio <- 1e-3
-  est <- do.call(huge::huge, c(args, list(x=data,
+  est <- do.call(huge::huge, c(args, list(x=SigmaO,
                                           method=method,
                                           verbose=verbose,
                                           cov.output = cov.output)))
@@ -63,6 +68,8 @@ sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE
   #   est$data <- data
   #   est$sym  <- ifelse(!is.null(args$sym), args$sym, 'or')
   # }
+  est$data <- data
+  est$types <- types
   return(est)
 }
 
@@ -167,6 +174,3 @@ glm.neighborhood <- function(X, Y, lambda, link='binomial', ...) {
 #   }
 #   return(Bmat)
 # }
-
-dclr <- function(x) t(clr(apply(x, 1, norm_diric),2))
-dclrNPN <- function(x) huge::huge.npn(t(clr(apply(x, 1, norm_diric),2)), verbose=FALSE)
diff --git a/R/SparseLowRankICov.R b/R/SparseLowRankICov.R
@@ -5,7 +5,8 @@
 #' @param data the n x p data matrix
 #' @param npn flag to first fit nonparametric normal transform to the data
 #' @param verbose flag to turn on verbose output
-#' @param cor flag to use correlation matrix as the input (default: false - uses covariance)
+# DEPRECATED
+## #' @param cor flag to use correlation matrix as the input (default: false - uses covariance)
 #' @param ... arguments to override default algorithm settings (see details)
 #' @export
 #' @details
@@ -19,16 +20,19 @@
 #' The argument \code{nlambda} determines the number of penalties - somewhere between 10-100 is usually good, depending on how the values of empirical correlation are distributed.#' @export
 #'
 #' One of \code{beta} (penalty for the nuclear norm) or \code{r} (number of ranks) should be supplied or \code{r=2} is chosen by default.
-sparseLowRankiCov <- function(data, npn=FALSE, verbose=FALSE, cor=FALSE, ...) {
+sparseLowRankiCov <- function(data, cov.fun='cor', npn=FALSE, verbose=FALSE, cor=FALSE, types=NULL, ...) {
 ## TODO: make args to admm2 explicit
   args <- list(...)
   if (length(args$r) > 1 || length(args$beta) > 1)
     stop("Only single value allowed for \'r\' or \'beta\'")
 
   if (npn) data <- huge::huge.npn(data, verbose=verbose)
   if (isSymmetric(data)) SigmaO <- data
-  else SigmaO <- cov(data)
-  if (cor) SigmaO <- cov2cor(SigmaO)
+  else {
+    SigmaO <- .match.cov(cov.fun, data, types)
+  }
+  # else SigmaO <- cov(data)
+  # if (cor) SigmaO <- cov2cor(SigmaO)
 
   if (!is.null(args[[ "lambda.max" ]])) maxlam <- args$lambda.max
   else maxlam <- 1
@@ -75,7 +79,7 @@ sparseLowRankiCov <- function(data, npn=FALSE, verbose=FALSE, cor=FALSE, ...) {
     loglik[[i]] <- log(Matrix::det(R[z,z])) - sum(Matrix::diag(R[z,z] %*% SigmaO[z,z])) - (p-q)
 #    args$opts$eta <- max(.5, (p-(n-i))/p)
   }
-  list(icov=icov, path=path, resid=resid, lambda=lambda, loglik=loglik, data=data)
+  list(icov=icov, path=path, resid=resid, lambda=lambda, loglik=loglik, data=data, types=types)
 }
 
 #' @useDynLib SpiecEasi

diff --git a/R/correlations.R b/R/correlations.R
@@ -0,0 +1,19 @@
+#' @export
+latentcor <- function(data, types = NULL) {
+  ## remove custom 'count' types ## TODO: should this be done here??
+  types <- gsub("_count", "", types)
+  suppressMessages(
+    latentcor::latentcor(X = data, types = types, use.nearPD=FALSE, nu = 0, showplot = FALSE)$Rpointwise
+  )
+}
+
+#' @keywords internal
+.match.cov <- function(cov.fun, data, types) {
+  # match string or function's name
+  cov.fun <- rev(as.character(substitute(base::cov)))[1]
+  switch(cov.fun,
+         cor=cor(data),
+         cov=cov(data),
+         latentcor=SpiecEasi::latentcor(data, types=types),
+        stop("covariance function not recognized"))
+}
diff --git a/R/data_types.R b/R/data_types.R
@@ -0,0 +1,26 @@
+#' @export
+is.count <- function(x) {
+  stopifnot(is.vector(x))
+  if (!is.numeric(x) && !is.integer(x)) return(FALSE)
+  if (!isTRUE(all.equal(as.numeric(x), as.integer(x)))) return(FALSE)
+  if (any(x<0)) return(FALSE)
+  return(TRUE)
+}
+
+#' @export
+is.normalized <- function(X) {
+  # assuming n x p matrix
+  stopifnot(is.matrix(X) | is.data.frame(X))
+  isTRUE(all.equal(rowSums(X), rep(1L, nrow(X))))
+}
+
+
+#' @export
+get_types <- function(X, ...) {
+  types <- latentcor::get_types(X)
+  ## determine count and compositional types as a subset of truncated or continuous
+  tc_ind <- (types=="tru") | (types=="con")
+  is_count <- apply(X[,tc_ind], 2, is.count)
+  types[tc_ind][is_count] <- paste(types[tc_ind][is_count], "count", sep="_")
+  types
+}
diff --git a/R/normalization.R b/R/normalization.R
@@ -85,6 +85,71 @@ clr.data.frame <- function(x.f, mar=2, ...) {
     clr(as.matrix(x.f), mar, ...)
 }
 
+#' Centered log-ratio functions
+#' @param x input count data
+#' @param pseudo value of the pseudocount
+#' @param ... pass through arguments
+#' @rdname clr
+#' @export
+pclr <- function(x, mar=2, pseudo=1, ...) {
+    clr(x+pseudo, mar=mar, ...)
+}
+
+
+#' Modified central log ratio (mclr) transformation
+#'
+#' Introduced by GraceYoon/SPRING
+#'
+#' @param data raw count data or compositional data (n by p) does not matter.
+#' @param base exp(1) for natural log
+#' @param tol tolerance for checking zeros
+
+# For eps and atleast, users do not have to specify any values. Default should be enough.
+#' @param eps epsilon in eq (2) of the paper "Yoon, Gaynanova, M\"{u}ller (2019), Frontiers in Genetics". positive shifts to all non-zero compositions. Refer to the paper for more details. eps = absolute value of minimum of log ratio counts plus c.
+#' @param atleast default value is 1. Constant c which ensures all nonzero values to be strictly positive. default is 1.
+#'
+#'
+#' @return \code{mclr} returns a data matrix of the same dimension with input data matrix.
+#' @export
+## from GraceYoon/SPRING/src/R/helpers.R
+mclr <- function(data, base = exp(1), tol = 1e-16, eps = NULL, atleast = 1){
+  data <- as.matrix(data)
+  nzero <- (data >= tol)  # index for nonzero part
+  LOG <- ifelse(nzero, log(data, base), 0.0) # take log for only nonzero values. zeros stay as zeros.
+
+  # centralize by the log of "geometric mean of only nonzero part" # it should be calculated by each row.
+  if (nrow(data) > 1){
+    clrdata <- ifelse(nzero, LOG - rowMeans(LOG)/rowMeans(nzero), 0.0)
+  } else if (nrow(data) == 1){
+    clrdata <- ifelse(nzero, LOG - mean(LOG)/mean(nzero), 0.0)
+  }
+
+  if (is.null(eps)){
+    if(atleast < 0){
+      warning("atleast should be positive. The functions uses default value 1 instead.")
+      atleast = 1
+    }
+    if( min(clrdata) < 0 ){ # to find the smallest negative value and add 1 to shift all dataa larger than zero.
+      positivecst <- abs(min(clrdata)) + atleast # "atleast" has default 1.
+    }else{
+      positivecst <- 0
+    }
+    # positive shift
+    ADDpos <- ifelse(nzero, clrdata + positivecst, 0.0) ## make all non-zero values strictly positive.
+    return(ADDpos)
+  } else if(eps == 0){
+    ## no shift. clr transform applied to non-zero proportions only. without pseudo count.
+    return(clrdata)
+  } else if(eps > 0){
+    ## use user-defined eps for additional positive shift.
+    ADDpos <- ifelse(nzero, clrdata + eps, 0.0)
+    return(ADDpos)
+  } else {
+    stop("check your eps value for additional positive shift. Otherwise, leave it as NULL.")
+  }
+}
+
+
 #' Additive log-ratio functions
 #' @param x.f input data
 #' @param ... pass through arguments
@@ -131,6 +196,14 @@ alr.data.frame <- function(x.f, mar=2, ...) {
     alr(as.matrix(x.f), mar, ...)
 }
 
+# TODO: should these be exported
+#' @keywords internal
+dclr <- function(x) t(clr(apply(x, 1, norm_diric),2))
+
+#' @keywords internal
+dclrNPN <- function(x) huge::huge.npn(t(clr(apply(x, 1, norm_diric),2)), verbose=FALSE)
+
+
 #' @keywords internal
 triu <- function(x) x[upper.tri(x)]
 #' @keywords internal