Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add latentcor #219

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: SpiecEasi
Title: Sparse Inverse Covariance for Ecological Statistical Inference
Version: 1.1.3
Version: 2.0.1
Authors@R: c(
person("Zachary", "Kurtz", role = c("aut", "cre"), email="[email protected]"),
person("Christian", "Mueller", role = "aut"),
Expand All @@ -10,7 +10,7 @@ Authors@R: c(
Description: Estimate networks from the precision matrix of compositional microbial abundance data.
Encoding: UTF-8
Depends:
R (>= 3.6.0),
R (>= 4.0.0),
Imports:
stats,
methods,
Expand All @@ -21,7 +21,8 @@ Imports:
MASS,
VGAM,
Matrix (>= 1.5),
glmnet
glmnet,
latentcor (>= 1.2.0),
Suggests:
parallel,
boot,
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,19 @@ export(getOptiCov)
export(getRefit)
export(getStability)
export(get_comm_params)
export(get_types)
export(graph2prec)
export(is.count)
export(is.normalized)
export(latentcor)
export(make_graph)
export(mclr)
export(multi.spiec.easi)
export(neff)
export(norm_pseudo)
export(norm_rdiric)
export(norm_to_total)
export(pclr)
export(prec2cov)
export(pval.sparccboot)
export(qqdplot_comm)
Expand Down
14 changes: 9 additions & 5 deletions R/SparseICov.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param npn perform Nonparanormal (npn) transformation before estimation?
#' @param verbose print progress to standard out
#' @param cov.output return the covariance matrix as well.
#' @param types if cov.method is latentcor, the column types parameter can be supplied as a character vector or (if NULL) inferred
#' @param ... further arguments to huge/estimation functions. See details.
#' @details
#' This is a wrapper function for sparse iCov estimations performed by glasso in the huge package.
Expand Down Expand Up @@ -42,15 +43,19 @@
#' image(as.matrix(est.log$path[[3]][1:5,1:5]))
#' image(as.matrix(est.clr$path[[3]][1:5,1:5]))
#' image(as.matrix(est.f$path[[3]][1:5,1:5]))
sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE, ...) {
sparseiCov <- function(data, method, cov.fun='cor', npn=FALSE, verbose=FALSE, cov.output = TRUE, types=NULL, ...) {

if (npn) data <- huge::huge.npn(data, verbose=verbose)
if (isSymmetric(data)) SigmaO <- data
else {
SigmaO <- .match.cov(cov.fun, data, types)
}

args <- list(...)
method <- switch(method, glasso = "glasso", mb = "mb", stop("Method not supported"))

if (is.null(args$lambda.min.ratio)) args$lambda.min.ratio <- 1e-3
est <- do.call(huge::huge, c(args, list(x=data,
est <- do.call(huge::huge, c(args, list(x=SigmaO,
method=method,
verbose=verbose,
cov.output = cov.output)))
Expand All @@ -63,6 +68,8 @@ sparseiCov <- function(data, method, npn=FALSE, verbose=FALSE, cov.output = TRUE
# est$data <- data
# est$sym <- ifelse(!is.null(args$sym), args$sym, 'or')
# }
est$data <- data
est$types <- types
return(est)
}

Expand Down Expand Up @@ -167,6 +174,3 @@ glm.neighborhood <- function(X, Y, lambda, link='binomial', ...) {
# }
# return(Bmat)
# }

dclr <- function(x) t(clr(apply(x, 1, norm_diric),2))
dclrNPN <- function(x) huge::huge.npn(t(clr(apply(x, 1, norm_diric),2)), verbose=FALSE)
14 changes: 9 additions & 5 deletions R/SparseLowRankICov.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
#' @param data the n x p data matrix
#' @param npn flag to first fit nonparametric normal transform to the data
#' @param verbose flag to turn on verbose output
#' @param cor flag to use correlation matrix as the input (default: false - uses covariance)
# DEPRECATED
## #' @param cor flag to use correlation matrix as the input (default: false - uses covariance)
#' @param ... arguments to override default algorithm settings (see details)
#' @export
#' @details
Expand All @@ -19,16 +20,19 @@
#' The argument \code{nlambda} determines the number of penalties - somewhere between 10-100 is usually good, depending on how the values of empirical correlation are distributed.#' @export
#'
#' One of \code{beta} (penalty for the nuclear norm) or \code{r} (number of ranks) should be supplied or \code{r=2} is chosen by default.
sparseLowRankiCov <- function(data, npn=FALSE, verbose=FALSE, cor=FALSE, ...) {
sparseLowRankiCov <- function(data, cov.fun='cor', npn=FALSE, verbose=FALSE, cor=FALSE, types=NULL, ...) {
## TODO: make args to admm2 explicit
args <- list(...)
if (length(args$r) > 1 || length(args$beta) > 1)
stop("Only single value allowed for \'r\' or \'beta\'")

if (npn) data <- huge::huge.npn(data, verbose=verbose)
if (isSymmetric(data)) SigmaO <- data
else SigmaO <- cov(data)
if (cor) SigmaO <- cov2cor(SigmaO)
else {
SigmaO <- .match.cov(cov.fun, data, types)
}
# else SigmaO <- cov(data)
# if (cor) SigmaO <- cov2cor(SigmaO)

if (!is.null(args[[ "lambda.max" ]])) maxlam <- args$lambda.max
else maxlam <- 1
Expand Down Expand Up @@ -75,7 +79,7 @@ sparseLowRankiCov <- function(data, npn=FALSE, verbose=FALSE, cor=FALSE, ...) {
loglik[[i]] <- log(Matrix::det(R[z,z])) - sum(Matrix::diag(R[z,z] %*% SigmaO[z,z])) - (p-q)
# args$opts$eta <- max(.5, (p-(n-i))/p)
}
list(icov=icov, path=path, resid=resid, lambda=lambda, loglik=loglik, data=data)
list(icov=icov, path=path, resid=resid, lambda=lambda, loglik=loglik, data=data, types=types)
}

#' @useDynLib SpiecEasi
Expand Down
19 changes: 19 additions & 0 deletions R/correlations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#' @export
latentcor <- function(data, types = NULL) {
## remove custom 'count' types ## TODO: should this be done here??
types <- gsub("_count", "", types)
suppressMessages(
latentcor::latentcor(X = data, types = types, use.nearPD=FALSE, nu = 0, showplot = FALSE)$Rpointwise
)
}

#' @keywords internal
.match.cov <- function(cov.fun, data, types) {
# match string or function's name
cov.fun <- rev(as.character(substitute(base::cov)))[1]
switch(cov.fun,
cor=cor(data),
cov=cov(data),
latentcor=SpiecEasi::latentcor(data, types=types),
stop("covariance function not recognized"))
}
26 changes: 26 additions & 0 deletions R/data_types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#' @export
is.count <- function(x) {
stopifnot(is.vector(x))
if (!is.numeric(x) && !is.integer(x)) return(FALSE)
if (!isTRUE(all.equal(as.numeric(x), as.integer(x)))) return(FALSE)
if (any(x<0)) return(FALSE)
return(TRUE)
}

#' @export
is.normalized <- function(X) {
# assuming n x p matrix
stopifnot(is.matrix(X) | is.data.frame(X))
isTRUE(all.equal(rowSums(X), rep(1L, nrow(X))))
}


#' @export
get_types <- function(X, ...) {
types <- latentcor::get_types(X)
## determine count and compositional types as a subset of truncated or continuous
tc_ind <- (types=="tru") | (types=="con")
is_count <- apply(X[,tc_ind], 2, is.count)
types[tc_ind][is_count] <- paste(types[tc_ind][is_count], "count", sep="_")
types
}
73 changes: 73 additions & 0 deletions R/normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,71 @@ clr.data.frame <- function(x.f, mar=2, ...) {
clr(as.matrix(x.f), mar, ...)
}

#' Centered log-ratio functions
#' @param x input count data
#' @param pseudo value of the pseudocount
#' @param ... pass through arguments
#' @rdname clr
#' @export
pclr <- function(x, mar=2, pseudo=1, ...) {
clr(x+pseudo, mar=mar, ...)
}


#' Modified central log ratio (mclr) transformation
#'
#' Introduced by GraceYoon/SPRING
#'
#' @param data raw count data or compositional data (n by p) does not matter.
#' @param base exp(1) for natural log
#' @param tol tolerance for checking zeros

# For eps and atleast, users do not have to specify any values. Default should be enough.
#' @param eps epsilon in eq (2) of the paper "Yoon, Gaynanova, M\"{u}ller (2019), Frontiers in Genetics". positive shifts to all non-zero compositions. Refer to the paper for more details. eps = absolute value of minimum of log ratio counts plus c.
#' @param atleast default value is 1. Constant c which ensures all nonzero values to be strictly positive. default is 1.
#'
#'
#' @return \code{mclr} returns a data matrix of the same dimension with input data matrix.
#' @export
## from GraceYoon/SPRING/src/R/helpers.R
mclr <- function(data, base = exp(1), tol = 1e-16, eps = NULL, atleast = 1){
data <- as.matrix(data)
nzero <- (data >= tol) # index for nonzero part
LOG <- ifelse(nzero, log(data, base), 0.0) # take log for only nonzero values. zeros stay as zeros.

# centralize by the log of "geometric mean of only nonzero part" # it should be calculated by each row.
if (nrow(data) > 1){
clrdata <- ifelse(nzero, LOG - rowMeans(LOG)/rowMeans(nzero), 0.0)
} else if (nrow(data) == 1){
clrdata <- ifelse(nzero, LOG - mean(LOG)/mean(nzero), 0.0)
}

if (is.null(eps)){
if(atleast < 0){
warning("atleast should be positive. The functions uses default value 1 instead.")
atleast = 1
}
if( min(clrdata) < 0 ){ # to find the smallest negative value and add 1 to shift all dataa larger than zero.
positivecst <- abs(min(clrdata)) + atleast # "atleast" has default 1.
}else{
positivecst <- 0
}
# positive shift
ADDpos <- ifelse(nzero, clrdata + positivecst, 0.0) ## make all non-zero values strictly positive.
return(ADDpos)
} else if(eps == 0){
## no shift. clr transform applied to non-zero proportions only. without pseudo count.
return(clrdata)
} else if(eps > 0){
## use user-defined eps for additional positive shift.
ADDpos <- ifelse(nzero, clrdata + eps, 0.0)
return(ADDpos)
} else {
stop("check your eps value for additional positive shift. Otherwise, leave it as NULL.")
}
}


#' Additive log-ratio functions
#' @param x.f input data
#' @param ... pass through arguments
Expand Down Expand Up @@ -131,6 +196,14 @@ alr.data.frame <- function(x.f, mar=2, ...) {
alr(as.matrix(x.f), mar, ...)
}

# TODO: should these be exported
#' @keywords internal
dclr <- function(x) t(clr(apply(x, 1, norm_diric),2))

#' @keywords internal
dclrNPN <- function(x) huge::huge.npn(t(clr(apply(x, 1, norm_diric),2)), verbose=FALSE)


#' @keywords internal
triu <- function(x) x[upper.tri(x)]
#' @keywords internal
Expand Down
Loading
Loading