diff --git a/DESCRIPTION b/DESCRIPTION index 2c56f1980..3ae038404 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Seurat -Version: 4.0.1 -Date: 2021-03-17 +Version: 4.0.2 +Date: 2021-05-20 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. Authors@R: c( @@ -10,7 +10,7 @@ Authors@R: c( person(given = "Jeff", family = "Farrell", email = "jfarrell@g.harvard.edu", role = "ctb"), person(given = "Christoph", family = "Hafemeister", email = "chafemeister@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-6365-8254")), person(given = "Yuhan", family = "Hao", email = "yhao@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-1810-0822")), - person(given = "Paul", family = "Hoffman", email = "nygcSatijalab@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7693-8957")), + person(given = "Paul", family = "Hoffman", email = "seurat@nygenome.org", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7693-8957")), person(given = "Jaison", family = "Jain", email = "jjain@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0002-9478-5018")), person(given = "Efthymia", family = "Papalexi", email = "epapalexi@nygenome.org", role = "ctb", comment = c(ORCID = "0000-0001-5898-694X")), person(given = "Patrick", family = "Roelli", email = "proelli@nygenome.org", role = "ctb"), diff --git a/NAMESPACE b/NAMESPACE index 4e32d9ac6..fad1dd5b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -117,6 +117,7 @@ export("Project<-") export("SCTResults<-") export("Tool<-") export("VariableFeatures<-") +export(AddAzimuthResults) export(AddMetaData) export(AddModuleScore) export(AggregateExpression) @@ -360,6 +361,7 @@ importFrom(MASS,lda) importFrom(Matrix,as.matrix) importFrom(Matrix,colMeans) importFrom(Matrix,colSums) +importFrom(Matrix,crossprod) importFrom(Matrix,readMM) importFrom(Matrix,rowMeans) importFrom(Matrix,rowSums) @@ -492,6 +494,7 @@ importFrom(ggplot2,guide_legend) importFrom(ggplot2,guides) importFrom(ggplot2,labs) importFrom(ggplot2,layer) +importFrom(ggplot2,layer_scales) importFrom(ggplot2,margin) importFrom(ggplot2,position_dodge) importFrom(ggplot2,position_jitterdodge) diff --git a/NEWS.md b/NEWS.md index 382a2eb64..40d3c3910 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,28 @@ +# Seurat 4.0.2 (2020-03-20) +## Added +- New `AddAzimuthScores()` and `AddAzimuthResults()` functions +- Add `shuffle` parameter to `FeatureScatter()` ([#4280](https://github.com/satijalab/seurat/pull/4280)) +- Add `lsiproject` and `rpca` options for `FindTransferAnchors()` +- Add `rlsi` option for `FindIntegrationAnchors()` + +## Changes +- Preserve feature metadata when converting from `SingleCellExperiment` to `SeuratObject` class +([#4205](https://github.com/satijalab/seurat/issues/4205)) +- Preserve multiple assays when converting from `SingleCellExperiment` to `SeuratObject` class +([#3764](https://github.com/satijalab/seurat/issues/3764)) +- Fix passing of `score.thresh` parameter in `ScoreJackStraw()` ([#4268](https://github.com/satijalab/seurat/pull/4268)) +- Fix FC calculation in `FindMarkers()` non-log transformed data. +- Add umap-learn version >= 0.5.0 compatibility for `RunUMAP()` +- Fix `DotPlot` to use `log1p` when `scale=False` +([#4298](https://github.com/satijalab/seurat/issues/4298)) +- Fix split and shuffled `DimPlot` +- Disallow NULL or another length 0 vector for `ident.1` in `FindMarkers()` +- Fix range shift when labeling clusters on a GeomSpatial plot +- Fix SpatialPlot distortion for non-square images. +- Fix future-related warnings in `FindIntegrationAnchors()` +- Fix `fc.name` parameter in `FindMarkers()` ([#4474](https://github.com/satijalab/seurat/issues/4474)) +- Deprecate `group.by` parameter in `PlotPerturbScore()` in favor of `mixscape.class`. + # Seurat 4.0.1 (2020-03-17) ## Added - Add direction option to `PlotClusterTree()` diff --git a/R/differential_expression.R b/R/differential_expression.R index 25a91e562..64f6b1228 100644 --- a/R/differential_expression.R +++ b/R/differential_expression.R @@ -813,6 +813,9 @@ FindMarkers.Seurat <- function( if (!is.null(x = assay) && !is.null(x = reduction)) { stop("Please only specify either assay or reduction.") } + if (length(x = ident.1) == 0) { + stop("At least 1 ident must be specified in `ident.1`") + } # select which data to use if (is.null(x = reduction)) { assay <- assay %||% DefaultAssay(object = object) @@ -836,6 +839,20 @@ FindMarkers.Seurat <- function( cells = c(cells$cells.1, cells$cells.2) ) } + # check normalization method + norm.command <- paste0("NormalizeData.", assay) + if (norm.command %in% Command(object = object) && is.null(x = reduction)) { + norm.method <- Command( + object = object, + command = norm.command, + value = "normalization.method" + ) + if (norm.method != "LogNormalize") { + mean.fxn <- function(x) { + return(log(x = rowMeans(x = x) + pseudocount.use, base = base)) + } + } + } de.results <- FindMarkers( object = data.use, slot = slot, @@ -856,6 +873,7 @@ FindMarkers.Seurat <- function( pseudocount.use = pseudocount.use, mean.fxn = mean.fxn, base = base, + fc.name = fc.name, ... ) return(de.results) @@ -1617,9 +1635,7 @@ MarkerTest <- function( # @param data.use Data to test # @param cells.1 Group 1 cells # @param cells.2 Group 2 cells -# @param latent.vars Confounding variables to adjust for in DE test. Default is -# "nUMI", which adjusts for cellular depth (i.e. cellular detection rate). For -# non-UMI based data, set to nGene instead. +# @param latent.vars Confounding variables to adjust for in DE test # @param verbose print output # @param \dots Additional parameters to zero-inflated regression (zlm) function # in MAST @@ -1630,17 +1646,6 @@ MarkerTest <- function( # genes. # #' @importFrom stats relevel -# -# @export -# -# @examples -# \dontrun{ -# data("pbmc_small") -# pbmc_small -# MASTDETest(pbmc_small, cells.1 = WhichCells(object = pbmc_small, idents = 1), -# cells.2 = WhichCells(object = pbmc_small, idents = 2)) -# } -# MASTDETest <- function( data.use, cells.1, diff --git a/R/dimensional_reduction.R b/R/dimensional_reduction.R index a91ce1923..2548289b2 100644 --- a/R/dimensional_reduction.R +++ b/R/dimensional_reduction.R @@ -1433,7 +1433,7 @@ RunUMAP.Graph <- function( reduction.key = 'UMAP_', ... ) { - CheckDots(...) + #CheckDots(...) if (umap.method != 'umap-learn') { warning( "Running UMAP on Graph objects is only supported using the umap-learn method", @@ -1465,7 +1465,7 @@ RunUMAP.Graph <- function( b <- b %||% ab.params[[2]] n.epochs <- n.epochs %||% 0L random.state <- sklearn$utils$check_random_state(seed = as.integer(x = seed.use)) - embeddings <- umap$umap_$simplicial_set_embedding( + umap.args <- list( data = data, graph = object, n_components = n.components, @@ -1481,6 +1481,18 @@ RunUMAP.Graph <- function( metric_kwds = metric.kwds, verbose = verbose ) + if (numeric_version(x = umap$pkg_resources$get_distribution("umap-learn")$version) >= + numeric_version(x = "0.5.0")) { + umap.args <- c(umap.args, list( + densmap = FALSE, + densmap_kwds = NULL, + output_dens = FALSE + )) + } + embeddings <- do.call(what = umap$umap_$simplicial_set_embedding, args = umap.args) + if (length(x = embeddings) == 2) { + embeddings <- embeddings[[1]] + } rownames(x = embeddings) <- colnames(x = data) colnames(x = embeddings) <- paste0("UMAP_", 1:n.components) # center the embeddings on zero @@ -1658,9 +1670,9 @@ RunUMAP.Seurat <- function( if (!inherits(x = object[[nn.name]], what = "Neighbor")) { stop( "Please specify a Neighbor object name, ", - "instead of the name of a ", - class(object[[nn.name]]), - " object", + "instead of the name of a ", + class(object[[nn.name]]), + " object", call. = FALSE ) } @@ -1669,9 +1681,9 @@ RunUMAP.Seurat <- function( if (!inherits(x = object[[graph]], what = "Graph")) { stop( "Please specify a Graph object name, ", - "instead of the name of a ", - class(object[[graph]]), - " object", + "instead of the name of a ", + class(object[[graph]]), + " object", call. = FALSE ) } @@ -1764,7 +1776,7 @@ ScoreJackStraw.DimReduc <- function(object, dims = 1:5, score.thresh = 1e-5, ... JS(object = object) <- ScoreJackStraw( object = JS(object = object), dims = dims, - score.thresh = 1e-5, + score.thresh = score.thresh, ... ) return(object) @@ -1792,6 +1804,7 @@ ScoreJackStraw.Seurat <- function( object[[reduction]] <- ScoreJackStraw( object = object[[reduction]], dims = dims, + score.thresh = score.thresh, ... ) if (do.plot) { diff --git a/R/integration.R b/R/integration.R index 19195e145..cd330084d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -71,6 +71,7 @@ NULL #' \itemize{ #' \item{cca: Canonical correlation analysis} #' \item{rpca: Reciprocal PCA} +#' \item{rlsi: Reciprocal LSI} #' } #' @param l2.norm Perform L2 normalization on the CCA cell embeddings after #' dimensional reduction @@ -85,7 +86,7 @@ NULL #' annoy #' @param n.trees More trees gives higher precision when using annoy approximate #' nearest neighbor search -#' @param eps Error bound on the neighbor finding algorithm (from RANN) +#' @param eps Error bound on the neighbor finding algorithm (from RANN/Annoy) #' @param verbose Print progress bars and output #' #' @return Returns an \code{\link{AnchorSet}} object that can be used as input to @@ -135,7 +136,7 @@ FindIntegrationAnchors <- function( scale = TRUE, normalization.method = c("LogNormalize", "SCT"), sct.clip.range = NULL, - reduction = c("cca", "rpca"), + reduction = c("cca", "rpca", "rlsi"), l2.norm = TRUE, dims = 1:30, k.anchor = 5, @@ -152,6 +153,15 @@ FindIntegrationAnchors <- function( if (reduction == "rpca") { reduction <- "pca" } + if (reduction == "rlsi") { + reduction <- "lsi" + if (normalization.method == "SCT") { + warning("Requested normalization method 'SCT' is not applicable for LSI") + normalization.method <- "LogNormalize" + } + scale <- FALSE + k.filter <- NA + } my.lapply <- ifelse( test = verbose && nbrOfWorkers() == 1, yes = pblapply, @@ -180,8 +190,11 @@ FindIntegrationAnchors <- function( assay <- sapply(X = object.list, FUN = DefaultAssay) } object.list <- CheckDuplicateCellNames(object.list = object.list) - slot <- "data" + if (reduction == "lsi") { + all.rownames <- lapply(X = object.list, FUN = rownames) + anchor.features <- Reduce(f = intersect, x = all.rownames) + } if (normalization.method == "SCT") { slot <- "scale.data" scale <- FALSE @@ -238,10 +251,10 @@ FindIntegrationAnchors <- function( ) } nn.reduction <- reduction - # if using pca, only need to compute the internal neighborhood structure once + # if using pca or lsi, only need to compute the internal neighborhood structure once # for each dataset internal.neighbors <- list() - if (nn.reduction == "pca") { + if (nn.reduction %in% c("pca", "lsi")) { k.filter <- NA if (verbose) { message("Computing within dataset neighborhoods") @@ -285,145 +298,142 @@ FindIntegrationAnchors <- function( } } # determine all anchors - all.anchors <- my.lapply( - X = 1:nrow(x = combinations), - FUN = function(row) { - i <- combinations[row, 1] - j <- combinations[row, 2] - object.1 <- DietSeurat( - object = object.list[[i]], - assays = assay[i], - features = anchor.features, - counts = FALSE, - scale.data = TRUE, - dimreducs = reduction - ) - object.2 <- DietSeurat( - object = object.list[[j]], - assays = assay[j], - features = anchor.features, - counts = FALSE, - scale.data = TRUE, - dimreducs = reduction - ) - # suppress key duplication warning - suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]]) - DefaultAssay(object = object.1) <- "ToIntegrate" - if (reduction %in% Reductions(object = object.1)) { - slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate" - } - object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction) - suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]]) - DefaultAssay(object = object.2) <- "ToIntegrate" - if (reduction %in% Reductions(object = object.2)) { - slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate" - } - object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction) - object.pair <- switch( - EXPR = reduction, - 'cca' = { - object.pair <- RunCCA( - object1 = object.1, - object2 = object.2, - assay1 = "ToIntegrate", - assay2 = "ToIntegrate", - features = anchor.features, - num.cc = max(dims), - renormalize = FALSE, - rescale = FALSE, - verbose = verbose - ) - if (l2.norm){ - object.pair <- L2Dim(object = object.pair, reduction = reduction) - reduction <- paste0(reduction, ".l2") - nn.reduction <- reduction - } - reduction.2 <- character() - object.pair - }, - 'pca' = { - common.features <- intersect( - x = rownames(x = Loadings(object = object.1[["pca"]])), - y = rownames(x = Loadings(object = object.2[["pca"]])) - ) - common.features <- intersect( - x = common.features, - y = anchor.features - ) - object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE) - projected.embeddings.1<- t(x = GetAssayData(object = object.1, slot = "scale.data")[common.features, ]) %*% - Loadings(object = object.2[["pca"]])[common.features, ] - object.pair[['projectedpca.1']] <- CreateDimReducObject( - embeddings = rbind(projected.embeddings.1, Embeddings(object = object.2[["pca"]])), - assay = DefaultAssay(object = object.1), - key = "projectedpca1_" - ) - projected.embeddings.2 <- t(x = GetAssayData(object = object.2, slot = "scale.data")[common.features, ]) %*% - Loadings(object = object.1[["pca"]])[common.features, ] - object.pair[['projectedpca.2']] <- CreateDimReducObject( - embeddings = rbind(projected.embeddings.2, Embeddings(object = object.1[["pca"]])), - assay = DefaultAssay(object = object.2), - key = "projectedpca2_" - ) - object.pair[["pca"]] <- CreateDimReducObject( - embeddings = rbind( - Embeddings(object = object.1[["pca"]]), - Embeddings(object = object.2[["pca"]])), - assay = DefaultAssay(object = object.1), - key = "pca_" - ) - reduction <- "projectedpca.1" - reduction.2 <- "projectedpca.2" - if (l2.norm){ - slot(object = object.pair[["projectedpca.1"]], name = "cell.embeddings") <- Sweep( - x = Embeddings(object = object.pair[["projectedpca.1"]]), - MARGIN = 2, - STATS = apply(X = Embeddings(object = object.pair[["projectedpca.1"]]), MARGIN = 2, FUN = sd), - FUN = "/" - ) - slot(object = object.pair[["projectedpca.2"]], name = "cell.embeddings") <- Sweep( - x = Embeddings(object = object.pair[["projectedpca.2"]]), - MARGIN = 2, - STATS = apply(X = Embeddings(object = object.pair[["projectedpca.2"]]), MARGIN = 2, FUN = sd), - FUN = "/" - ) - object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.1") - object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.2") - reduction <- paste0(reduction, ".l2") - reduction.2 <- paste0(reduction.2, ".l2") - } - object.pair - }, - stop("Invalid reduction parameter. Please choose either cca or rpca") - ) - internal.neighbors <- internal.neighbors[c(i, j)] - - anchors <- FindAnchors( - object.pair = object.pair, - assay = c("ToIntegrate", "ToIntegrate"), - slot = slot, - cells1 = colnames(x = object.1), - cells2 = colnames(x = object.2), - internal.neighbors = internal.neighbors, - reduction = reduction, - reduction.2 = reduction.2, - nn.reduction = nn.reduction, - dims = dims, - k.anchor = k.anchor, - k.filter = k.filter, - k.score = k.score, - max.features = max.features, - nn.method = nn.method, - n.trees = n.trees, - eps = eps, - verbose = verbose - ) - anchors[, 1] <- anchors[, 1] + offsets[i] - anchors[, 2] <- anchors[, 2] + offsets[j] - return(anchors) + anchoring.fxn <- function(row) { + i <- combinations[row, 1] + j <- combinations[row, 2] + object.1 <- DietSeurat( + object = object.list[[i]], + assays = assay[i], + features = anchor.features, + counts = FALSE, + scale.data = TRUE, + dimreducs = reduction + ) + object.2 <- DietSeurat( + object = object.list[[j]], + assays = assay[j], + features = anchor.features, + counts = FALSE, + scale.data = TRUE, + dimreducs = reduction + ) + # suppress key duplication warning + suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]]) + DefaultAssay(object = object.1) <- "ToIntegrate" + if (reduction %in% Reductions(object = object.1)) { + slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate" } - ) + object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction) + suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]]) + DefaultAssay(object = object.2) <- "ToIntegrate" + if (reduction %in% Reductions(object = object.2)) { + slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate" + } + object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction) + object.pair <- switch( + EXPR = reduction, + 'cca' = { + object.pair <- RunCCA( + object1 = object.1, + object2 = object.2, + assay1 = "ToIntegrate", + assay2 = "ToIntegrate", + features = anchor.features, + num.cc = max(dims), + renormalize = FALSE, + rescale = FALSE, + verbose = verbose + ) + if (l2.norm){ + object.pair <- L2Dim(object = object.pair, reduction = reduction) + reduction <- paste0(reduction, ".l2") + nn.reduction <- reduction + } + reduction.2 <- character() + object.pair + }, + 'pca' = { + object.pair <- ReciprocalProject( + object.1 = object.1, + object.2 = object.2, + reduction = 'pca', + projected.name = 'projectedpca', + features = anchor.features, + do.scale = FALSE, + do.center = FALSE, + slot = 'scale.data', + l2.norm = l2.norm, + verbose = verbose + ) + reduction <- "projectedpca.ref" + reduction.2 <- "projectedpca.query" + if (l2.norm) { + reduction <- paste0(reduction, ".l2") + reduction.2 <- paste0(reduction.2, ".l2") + } + object.pair + }, + 'lsi' = { + object.pair <- ReciprocalProject( + object.1 = object.1, + object.2 = object.2, + reduction = 'lsi', + projected.name = 'projectedlsi', + features = anchor.features, + do.center = TRUE, + do.scale = FALSE, + slot = 'data', + l2.norm = l2.norm, + verbose = verbose + ) + reduction <- "projectedlsi.ref" + reduction.2 <- "projectedlsi.query" + if (l2.norm) { + reduction <- paste0(reduction, ".l2") + reduction.2 <- paste0(reduction.2, ".l2") + } + object.pair + }, + stop("Invalid reduction parameter. Please choose either cca, rpca, or rlsi") + ) + internal.neighbors <- internal.neighbors[c(i, j)] + anchors <- FindAnchors( + object.pair = object.pair, + assay = c("ToIntegrate", "ToIntegrate"), + slot = slot, + cells1 = colnames(x = object.1), + cells2 = colnames(x = object.2), + internal.neighbors = internal.neighbors, + reduction = reduction, + reduction.2 = reduction.2, + nn.reduction = nn.reduction, + dims = dims, + k.anchor = k.anchor, + k.filter = k.filter, + k.score = k.score, + max.features = max.features, + nn.method = nn.method, + n.trees = n.trees, + eps = eps, + verbose = verbose + ) + anchors[, 1] <- anchors[, 1] + offsets[i] + anchors[, 2] <- anchors[, 2] + offsets[j] + return(anchors) + } + if (nbrOfWorkers() == 1) { + all.anchors <- pblapply( + X = 1:nrow(x = combinations), + FUN = anchoring.fxn + ) + } else { + all.anchors <- future_lapply( + X = 1:nrow(x = combinations), + FUN = anchoring.fxn, + future.seed = TRUE + ) + } all.anchors <- do.call(what = 'rbind', args = all.anchors) all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)]) all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell) @@ -439,6 +449,119 @@ FindIntegrationAnchors <- function( return(anchor.set) } +# Merge dataset and perform reciprocal SVD projection, adding new dimreducs +# for each projection and the merged original SVDs. +# +# @param object.1 First Seurat object to merge +# @param object.2 Second Seurat object to merge +# @param reduction Name of DimReduc to use. Must be an SVD-based DimReduc (eg, PCA or LSI) +# so that the loadings can be used to project new embeddings. Must be present +# in both input objects, with a substantial overlap in the features use to construct +# the SVDs. +# @param dims dimensions used for rpca +# @param projected.name Name to store projected SVDs under (eg, "projectedpca") +# @param features Features to use. Will subset the SVD loadings to use these features +# before performing projection. Typically uses the anchor.features for integration. +# @param do.center Center projected values (subtract mean) +# @param do.scale Scale projected values (divide by SD) +# @param slot Name of slot to pull data from. Should be scale.data for PCA and data for LSI +# @param verbose Display messages +# @return Returns a merged Seurat object with two projected SVDs (object.1 -> object.2, object.2 -> object.1) +# and a merged SVD (needed for within-dataset neighbors) +ReciprocalProject <- function( + object.1, + object.2, + reduction, + dims, + projected.name, + features, + do.scale, + do.center, + slot, + l2.norm, + verbose = TRUE +) { + common.features <- intersect( + x = rownames(x = Loadings(object = object.1[[reduction]])), + y = rownames(x = Loadings(object = object.2[[reduction]])) + ) + common.features <- intersect( + x = common.features, + y = features + ) + object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE) + data.1 <- GetAssayData( + object = object.1, + slot = slot + ) + data.2 <- GetAssayData( + object = object.2, + slot = slot + ) + + proj.1 <- ProjectSVD( + reduction = object.2[[reduction]], + data = data.1, + mode = reduction, + features = common.features, + do.scale = do.scale, + do.center = do.center, + use.original.stats = FALSE, + verbose = verbose + ) + proj.2 <- ProjectSVD( + reduction = object.1[[reduction]], + data = data.2, + mode = reduction, + features = common.features, + do.scale = do.scale, + do.center = do.center, + use.original.stats = FALSE, + verbose = verbose + ) + # object.1 is ref, and object.2 is query + reduction.dr.name.1 <- paste0(projected.name, ".ref") + reduction.dr.name.2 <- paste0(projected.name, ".query") + object.pair[[reduction.dr.name.1]] <- CreateDimReducObject( + embeddings = rbind(Embeddings(object = object.1[[reduction]]), proj.2)[,dims], + loadings = Loadings(object = object.1[[reduction]])[,dims], + assay = DefaultAssay(object = object.1), + key = paste0(projected.name, "ref_") + ) + object.pair[[reduction.dr.name.2]] <- CreateDimReducObject( + embeddings = rbind(proj.1, Embeddings(object = object.2[[reduction]]))[,dims], + loadings = Loadings(object = object.2[[reduction]])[,dims], + assay = DefaultAssay(object = object.2), + key = paste0(projected.name, "query_") + ) + object.pair[[reduction]] <- CreateDimReducObject( + embeddings = rbind( + Embeddings(object = object.1[[reduction]]), + Embeddings(object = object.2[[reduction]]))[,dims], + loadings = Loadings(object = object.1[[reduction]])[,dims], + assay = DefaultAssay(object = object.1), + key = paste0(projected.name, "_") + ) + if (l2.norm) { + slot(object = object.pair[[reduction.dr.name.1]], name = "cell.embeddings") <- Sweep( + x = Embeddings(object = object.pair[[reduction.dr.name.1]]), + MARGIN = 2, + STATS = apply(X = Embeddings(object = object.pair[[reduction.dr.name.1]]), MARGIN = 2, FUN = sd), + FUN = "/" + ) + slot(object = object.pair[[reduction.dr.name.2]], name = "cell.embeddings") <- Sweep( + x = Embeddings(object = object.pair[[reduction.dr.name.2]]), + MARGIN = 2, + STATS = apply(X = Embeddings(object = object.pair[[reduction.dr.name.2]]), MARGIN = 2, FUN = sd), + FUN = "/" + ) + + object.pair <- L2Dim(object = object.pair, reduction = reduction.dr.name.1) + object.pair <- L2Dim(object = object.pair, reduction = reduction.dr.name.2) + } + return(object.pair) +} + #' Find transfer anchors #' #' Find a set of anchors between a reference and query object. These @@ -458,8 +581,11 @@ FindIntegrationAnchors <- function( #' \code{project.query = TRUE}), using the \code{features} specified. The data #' from the other dataset is then projected onto this learned PCA structure. #' If \code{reduction = "cca"}, then CCA is performed on the reference and -#' query for this dimensional reduction step. If \code{l2.norm} is set to -#' \code{TRUE}, perform L2 normalization of the embedding vectors.} +#' query for this dimensional reduction step. If +#' \code{reduction = "lsiproject"}, the stored LSI dimension reduction in the +#' reference object is used to project the query dataset onto the reference. +#' If \code{l2.norm} is set to \code{TRUE}, perform L2 normalization of the +#' embedding vectors.} #' \item{Identify anchors between the reference and query - pairs of cells #' from each dataset that are contained within each other's neighborhoods #' (also known as mutual nearest neighbors).} @@ -488,6 +614,14 @@ FindIntegrationAnchors <- function( #' \itemize{ #' \item{pcaproject: Project the PCA from the reference onto the query. We #' recommend using PCA when reference and query datasets are from scRNA-seq} +#' \item{lsiproject: Project the LSI from the reference onto the query. We +#' recommend using LSI when reference and query datasets are from scATAC-seq. +#' This requires that LSI has been computed for the reference dataset, and the +#' same features (eg, peaks or genome bins) are present in both the reference +#' and query. See \code{\link[Signac]{RunTFIDF}} and +#' \code{\link[Signac]{RunSVD}}} +#' \item{rpca: Project the PCA from the reference onto the query, and the PCA +#' from the query onto the reference (reciprocal PCA projection).} #' \item{cca: Run a CCA on the reference and query } #' } #' @param reference.reduction Name of dimensional reduction to use from the @@ -502,6 +636,7 @@ FindIntegrationAnchors <- function( #' @param features Features to use for dimensional reduction. If not specified, #' set as variable features of the reference object which are also present in #' the query. +#' @param scale Scale query data. #' @param normalization.method Name of normalization method used: LogNormalize #' or SCT. #' @param recompute.residuals If using SCT as a normalization method, compute @@ -523,7 +658,7 @@ FindIntegrationAnchors <- function( #' @param n.trees More trees gives higher precision when using annoy approximate #' nearest neighbor search #' @param eps Error bound on the neighbor finding algorithm (from -#' \code{\link{RANN}}) +#' \code{\link{RANN}} or \code{\link{RcppAnnoy}}) #' @param approx.pca Use truncated singular value decomposition to approximate #' PCA #' @param mapping.score.k Compute and store nearest k query neighbors in the @@ -533,12 +668,19 @@ FindIntegrationAnchors <- function( #' @param verbose Print progress bars and output #' #' @return Returns an \code{AnchorSet} object that can be used as input to -#' \code{\link{TransferData}} +#' \code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}} and +#' \code{\link{MapQuery}}. The dimension reduction used for finding anchors is +#' stored in the \code{AnchorSet} object and can be used for computing anchor +#' weights in downstream functions. Note that only the requested dimensions are +#' stored in the dimension reduction object in the \code{AnchorSet}. This means +#' that if \code{dims=2:20} is used, for example, the dimension of the stored +#' reduction is \code{1:19}. #' #' @references Stuart T, Butler A, et al. Comprehensive Integration of #' Single-Cell Data. Cell. 2019;177:1888-1902 \doi{10.1016/j.cell.2019.05.031}; #' #' @export +#' @importFrom methods slot slot<- #' @concept integration #' @examples #' \dontrun{ @@ -582,6 +724,7 @@ FindTransferAnchors <- function( reference.reduction = NULL, project.query = FALSE, features = NULL, + scale = TRUE, npcs = 30, l2.norm = TRUE, dims = 1:30, @@ -609,6 +752,7 @@ FindTransferAnchors <- function( reference.reduction = reference.reduction, project.query = project.query, features = features, + scale = scale, npcs = npcs, l2.norm = l2.norm, dims = dims, @@ -624,6 +768,7 @@ FindTransferAnchors <- function( verbose = verbose ) projected <- ifelse(test = reduction == "pcaproject", yes = TRUE, no = FALSE) + reduction.2 <- character() feature.mean <- NULL if (normalization.method == "SCT") { # ensure all residuals required are computed @@ -670,6 +815,13 @@ FindTransferAnchors <- function( features = features, scale.data = TRUE ) + # check assay in the reference.reduction + if (!is.null(reference.reduction) && + slot(object = reference[[reference.reduction]], name = "assay.used") != reference.assay) { + warnings("reference assay is diffrent from the assay.used in", reference.reduction) + slot(object = reference[[reference.reduction]], name = "assay.used") <- reference.assay + } + reference <- DietSeurat( object = reference, assays = reference.assay, @@ -695,7 +847,12 @@ FindTransferAnchors <- function( message("Performing PCA on the provided query using ", length(x = features), " features as input.") } if (normalization.method == "LogNormalize") { - query <- ScaleData(object = query, features = features, verbose = FALSE) + query <- ScaleData( + object = query, + features = features, + do.scale = scale, + verbose = FALSE + ) } query <- RunPCA( object = query, @@ -710,6 +867,7 @@ FindTransferAnchors <- function( reference = query, reduction = reference.reduction, query = reference, + scale = scale, dims = dims, verbose = verbose ) @@ -722,7 +880,7 @@ FindTransferAnchors <- function( message("Performing PCA on the provided reference using ", length(x = features), " features as input.") } if (normalization.method == "LogNormalize") { - reference <- ScaleData(object = reference, features = features, verbose = FALSE) + reference <- ScaleData(object = reference, features = features, do.scale = scale, verbose = FALSE) } reference <- RunPCA( object = reference, @@ -736,6 +894,7 @@ FindTransferAnchors <- function( reference = reference, reduction = reference.reduction, query = query, + scale = scale, dims = dims, feature.mean = feature.mean, verbose = verbose @@ -756,11 +915,104 @@ FindTransferAnchors <- function( colnames(x = orig.loadings) <- paste0("ProjectPC_", 1:ncol(x = orig.loadings)) Loadings(object = combined.ob[["pcaproject"]]) <- orig.loadings[, dims] } + # Use reciprocal PCA projection in anchor finding + if (reduction == "rpca") { + # Run PCA on reference and query + if (is.null(x = reference.reduction)) { + reference.reduction <- "pca" + if (verbose) { + message("Performing PCA on the provided reference using ", length(x = features), " features as input.") + } + if (normalization.method == "LogNormalize") { + reference <- ScaleData( + object = reference, + features = features, + do.scale = scale, + verbose = verbose + ) + } + reference <- RunPCA( + object = reference, + npcs = npcs, + verbose = FALSE, + features = features, + approx = approx.pca + ) + } + if (verbose) { + message("Performing PCA on the provided query using ", length(x = features), " features as input.") + } + if (normalization.method == "LogNormalize") { + query <- ScaleData( + object = query, + features = features, + do.scale = scale, + verbose = verbose + ) + } + query <- RunPCA( + object = query, + npcs = ncol(x = reference[[reference.reduction]]), + reduction.name = reference.reduction, + verbose = FALSE, + features = features, + approx = approx.pca + ) + combined.ob <- ReciprocalProject( + object.1 = reference, + object.2 = query, + reduction = reference.reduction, + dims = dims, + projected.name = reduction, + features = features, + do.scale = FALSE, + do.center = FALSE, + slot = 'scale.data', + l2.norm = l2.norm, + verbose = verbose + ) + # pcaproject is used as the weight.matrix in MapQuery + projected.pca <- ProjectCellEmbeddings( + reference = reference, + reduction = reference.reduction, + query = query, + scale = scale, + dims = dims, + feature.mean = feature.mean, + verbose = verbose + ) + orig.embeddings <- Embeddings(object = reference[[reference.reduction]])[, dims] + orig.loadings <- Loadings(object = reference[[reference.reduction]]) + + combined.pca <- CreateDimReducObject( + embeddings = as.matrix(x = rbind(orig.embeddings, projected.pca)), + key = "ProjectPC_", + assay = reference.assay + ) + combined.ob[["pcaproject"]] <- combined.pca + colnames(x = orig.loadings) <- paste0("ProjectPC_", 1:ncol(x = orig.loadings)) + Loadings(object = combined.ob[["pcaproject"]]) <- orig.loadings[, dims] + + if (l2.norm) { + # L2 norm is done on each projected PCA in ReciprocalProject, so turn it off here + # avoids later error as we now have two reductions (rpca.ref and rpca.query) + l2.norm <- FALSE + reduction <- "rpca.ref.l2" + reduction.2 <- "rpca.query.l2" + } else { + reduction <- "rpca.ref" + reduction.2 <- "rpca.query" + } + if (project.query) { + reduction <- gsub(".ref", ".query", reduction) + reduction.2 <- gsub(".query", ".ref", reduction.2) + } + } # Run CCA as dimension reduction to be used in anchor finding if (reduction == 'cca') { if (normalization.method == "LogNormalize") { - reference <- ScaleData(object = reference, features = features, verbose = FALSE) - query <- ScaleData(object = query, features = features, verbose = FALSE) + reference <- ScaleData(object = reference, features = features, do.scale = scale, verbose = FALSE) + query <- ScaleData(object = query, features = features, do.scale = scale, verbose = FALSE) } combined.ob <- RunCCA( object1 = reference, @@ -771,6 +1023,48 @@ FindTransferAnchors <- function( rescale = FALSE, verbose = verbose ) + slot(object = combined.ob[["cca"]], name = "cell.embeddings") <- Embeddings(combined.ob[["cca"]])[, dims] + slot(object = combined.ob[["cca"]], name = "feature.loadings") <- Loadings(combined.ob[["cca"]])[, dims] + slot(object = combined.ob[["cca"]], name = "feature.loadings.projected") <- Loadings(object = combined.ob[["cca"]], projected = TRUE)[, dims] + } + if (reduction == "lsiproject") { + if (project.query) { + projected.lsi <- ProjectSVD( + reduction = query[[reference.reduction]], + data = GetAssayData(object = reference, assay = reference.assay, slot = "data"), + mode = "lsi", + do.center = TRUE, + do.scale = FALSE, + use.original.stats = FALSE, + verbose = verbose + ) + orig.embeddings <- Embeddings(object = query[[reference.reduction]]) + orig.loadings <- Loadings(object = query[[reference.reduction]]) + } else { + projected.lsi <- ProjectSVD( + reduction = reference[[reference.reduction]], + data = GetAssayData(object = query, assay = query.assay, slot = "data"), + mode = "lsi", + do.center = TRUE, + do.scale = FALSE, + use.original.stats = FALSE, + verbose = verbose + ) + orig.embeddings <- Embeddings(object = reference[[reference.reduction]]) + orig.loadings <- Loadings(object = reference[[reference.reduction]]) + } + combined.lsi <- CreateDimReducObject( + embeddings = as.matrix(x = rbind(orig.embeddings, projected.lsi))[,dims], + key = "ProjectLSI_", + assay = reference.assay + ) + combined.ob <- merge( + x = DietSeurat(object = reference), + y = DietSeurat(object = query) + ) + combined.ob[["lsiproject"]] <- combined.lsi + colnames(x = orig.loadings) <- paste0("ProjectLSI_", 1:ncol(x = orig.loadings)) + Loadings(object = combined.ob[["lsiproject"]]) <- orig.loadings[,dims] } if (l2.norm) { combined.ob <- L2Dim(object = combined.ob, reduction = reduction) @@ -814,6 +1108,7 @@ FindTransferAnchors <- function( cells1 = colnames(x = reference), cells2 = colnames(x = query), reduction = reduction, + reduction.2 = reduction.2, internal.neighbors = precomputed.neighbors, dims = 1:length(x = dims), k.anchor = k.anchor, @@ -1564,7 +1859,11 @@ LocalStruct <- function( #' This is a convenience wrapper function around the following three functions #' that are often run together when mapping query data to a reference: #' \code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}}, -#' \code{\link{ProjectUMAP}}. +#' \code{\link{ProjectUMAP}}. Note that by default, the \code{weight.reduction} +#' parameter for all functions will be set to the dimension reduction method +#' used in the \code{\link{FindTransferAnchors}} function call used to construct +#' the anchor object, and the \code{dims} parameter will be the same dimensions +#' used to find anchors. #' #' @inheritParams IntegrateEmbeddings #' @inheritParams TransferData @@ -1575,11 +1874,15 @@ LocalStruct <- function( #' \code{\link{IntegrateEmbeddings}} #' @param projectumap.args A named list of additional arguments to #' \code{\link{ProjectUMAP}} -#' @return Returns a modified query Seurat object containing new Assays -#' corresponding to the features transferred and/or their corresponding prediction -#' scores from TransferData. An integrated reduction from IntegrateEmbeddings. -#' And an projected umap reduction of the query cells projected into the -#' reference umap. +#' @return Returns a modified query Seurat object containing: +#' +#' \itemize{ +#' \item{New Assays corresponding to the features transferred and/or their +#' corresponding prediction scores from \code{\link{TransferData}}} +#' \item{An integrated reduction from \code{\link{IntegrateEmbeddings}}} +#' \item{A projected UMAP reduction of the query cells projected into the +#' reference UMAP using \code{\link{ProjectUMAP}}} +#' } #' #' @export #' @concept integration @@ -1591,26 +1894,77 @@ MapQuery <- function( refdata = NULL, new.reduction.name = NULL, reference.reduction = NULL, + reference.dims = NULL, + query.dims = NULL, reduction.model = NULL, transferdata.args = list(), integrateembeddings.args = list(), projectumap.args = list(), verbose = TRUE ) { - reference.reduction <- reference.reduction %||% slot(object = anchorset, name = "command")$reference.reduction - new.reduction.name <- new.reduction.name %||% paste0("ref.", reference.reduction) + # determine anchor type + if (grepl(pattern = "pca", x = slot(object = anchorset, name = "command")$reduction)) { + anchor.reduction <- "pcaproject" + } else if (grepl(pattern = "cca", x = slot(object = anchorset, name = "command")$reduction)) { + anchor.reduction <- "cca" + ref.cca.embedding <- Embeddings( + slot(object = anchorset, name = "object.list")[[1]][["cca"]] + )[slot(object = anchorset, name = "reference.cells"), ] + rownames(x = ref.cca.embedding) <- gsub( + pattern = "_reference", + replacement = "", + x = rownames(x = ref.cca.embedding) + ) + query.cca.embedding <- Embeddings( + slot(object = anchorset, name = "object.list")[[1]][["cca"]] + )[slot(object = anchorset, name = "query.cells"), ] + rownames(x = query.cca.embedding) <- gsub( + pattern = "_query", + replacement = "", + x = rownames(x = query.cca.embedding) + ) + reference[["cca"]] <- CreateDimReducObject( + embeddings = ref.cca.embedding, + key = "CCA_", + assay = DefaultAssay(reference) + ) + query[["cca"]] <- CreateDimReducObject( + embeddings = query.cca.embedding, + key = "CCA_", + assay = DefaultAssay(query) + ) + reference.reduction <- new.reduction.name <- "cca" + reference.dims <- query.dims <- 1:ncol(x = ref.cca.embedding) + } else if (grepl(pattern = "lsi", x = slot(object = anchorset, name = "command")$reduction)) { + anchor.reduction <- "lsiproject" + } else { + stop("unkown type of anchors") + } + + reference.reduction <- reference.reduction %||% + slot(object = anchorset, name = "command")$reference.reduction %||% + anchor.reduction + new.reduction.name <- new.reduction.name %||% + paste0("ref.", reference.reduction) + + # checking TransferData parameters td.badargs <- names(x = transferdata.args)[!names(x = transferdata.args) %in% names(x = formals(fun = TransferData))] if (length(x = td.badargs) > 0) { warning("The following arguments in transferdata.args are not valid: ", paste(td.badargs, collapse = ", "), immediate. = TRUE, call. = FALSE) } transferdata.args <- transferdata.args[names(x = transferdata.args) %in% names(x = formals(fun = TransferData))] + transferdata.args$weight.reduction <- transferdata.args$weight.reduction %||% anchor.reduction + # checking IntegrateEmbeddings parameters ie.badargs <- names(x = integrateembeddings.args)[!names(x = integrateembeddings.args) %in% names(x = formals(fun = IntegrateEmbeddings.TransferAnchorSet))] if (length(x = ie.badargs) > 0) { warning("The following arguments in integrateembeddings.args are not valid: ", paste(ie.badargs, collapse = ", "), immediate. = TRUE, call. = FALSE) } - integrateembeddings.args <- integrateembeddings.args[names(x = integrateembeddings.args) %in% names(x = formals(fun = IntegrateEmbeddings))] + integrateembeddings.args <- integrateembeddings.args[names(x = integrateembeddings.args) %in% names(x = formals(fun = IntegrateEmbeddings.TransferAnchorSet))] + integrateembeddings.args$reductions <- integrateembeddings.args$reductions %||% anchor.reduction + integrateembeddings.args$weight.reduction <- integrateembeddings.args$weight.reduction %||% anchor.reduction + slot(object = query, name = "tools")$TransferData <- NULL reuse.weights.matrix <- FALSE if (!is.null(x = refdata)) { @@ -1626,26 +1980,32 @@ MapQuery <- function( ), transferdata.args ) ) - reuse.weights.matrix <- TRUE + if (transferdata.args$weight.reduction == integrateembeddings.args$weight.reduction) { + reuse.weights.matrix <- TRUE + } } - - query <- do.call( - what = IntegrateEmbeddings, - args = c(list( - anchorset = anchorset, - reference = reference, - query = query, - new.reduction.name = new.reduction.name, - reuse.weights.matrix = reuse.weights.matrix, - verbose = verbose + if (anchor.reduction != "cca"){ + query <- do.call( + what = IntegrateEmbeddings, + args = c(list( + anchorset = anchorset, + reference = reference, + query = query, + new.reduction.name = new.reduction.name, + reuse.weights.matrix = reuse.weights.matrix, + verbose = verbose ), integrateembeddings.args + ) ) - ) + } slot(object = query, name = "tools")$TransferData <- NULL - if (!is.null(x = reduction.model)) { - ref.dims <- slot(object = anchorset, name = "command")$dims - query.dims <- 1:ncol(x = slot(object = anchorset, name = "object.list")[[1]][["pcaproject"]]) + reference.dims <- reference.dims %||% slot(object = anchorset, name = "command")$dims + query.dims <- query.dims %||% 1:ncol(x = query[[new.reduction.name]]) + if (length(x = query.dims) != length(x = reference.dims)) { + message("Query and reference dimensions are not equal, proceeding with reference dimensions.") + query.dims <- reference.dims + } query <- do.call( what = ProjectUMAP, args = c(list( @@ -1653,7 +2013,7 @@ MapQuery <- function( query.reduction = new.reduction.name, query.dims = query.dims, reference = reference, - reference.dims = ref.dims, + reference.dims = reference.dims, reference.reduction = reference.reduction, reduction.model = reduction.model ), projectumap.args @@ -2373,6 +2733,7 @@ SelectIntegrationFeatures <- function( #' anchors. Options are: #' \itemize{ #' \item{pcaproject: Use the projected PCA used for anchor building} +#' \item{lsiproject: Use the projected LSI used for anchor building} #' \item{pca: Use an internal PCA on the query only} #' \item{cca: Use the CCA used for anchor building} #' \item{custom DimReduc: User provided \code{\link{DimReduc}} object @@ -2380,7 +2741,9 @@ SelectIntegrationFeatures <- function( #' } #' @param l2.norm Perform L2 normalization on the cell embeddings after #' dimensional reduction -#' @param dims Set of dimensions to use in the anchor weighting procedure +#' @param dims Set of dimensions to use in the anchor weighting procedure. If +#' NULL, the same dimensions that were used to find anchors will be used for +#' weighting. #' @param k.weight Number of neighbors to consider when weighting anchors #' @param sd.weight Controls the bandwidth of the Gaussian kernel for weighting #' @param eps Error bound on the neighbor finding algorithm (from @@ -2513,6 +2876,13 @@ TransferData <- function( combined.ob <- L2Dim(object = combined.ob, reduction = 'pca') } } + if (!inherits(x = weight.reduction, what = "DimReduc") && weight.reduction == "lsi") { + if (!("lsi" %in% Reductions(object = query))) { + stop("Requested lsi for weight.reduction, but lsi not stored in query object.") + } else { + weight.reduction <- query[["lsi"]] + } + } if (inherits(x = weight.reduction, what = "DimReduc")) { weight.reduction <- RenameCells( object = weight.reduction, @@ -3076,6 +3446,7 @@ FindAnchors <- function( eps = eps, verbose = verbose ) + object.pair <- FindAnchorPairs( object = object.pair, integration.name = "integrated", @@ -3305,8 +3676,8 @@ FindNN <- function( } if (length(x = reduction.2) > 0) { nnab <- NNHelper( - data = Embeddings(object = object[[reduction.2]])[cells2, ], - query = Embeddings(object = object[[reduction.2]])[cells1, ], + data = Embeddings(object = object[[reduction.2]])[cells2, nn.dims], + query = Embeddings(object = object[[reduction.2]])[cells1, nn.dims], k = k, method = nn.method, n.trees = n.trees, @@ -3314,8 +3685,8 @@ FindNN <- function( index = nn.idx2 ) nnba <- NNHelper( - data = Embeddings(object = object[[reduction]])[cells1, ], - query = Embeddings(object = object[[reduction]])[cells2, ], + data = Embeddings(object = object[[reduction]])[cells1, nn.dims], + query = Embeddings(object = object[[reduction]])[cells2, nn.dims], k = k, method = nn.method, n.trees = n.trees, @@ -3703,7 +4074,7 @@ PairwiseIntegrateReference <- function( if (length(x = object.list) == 2) { weight.reduction <- list(NULL, weight.reduction) } else if (inherits(x = weight.reduction, what = "character")) { - weight.reduction <- rep(x = weight.reduction, times = length(x = object.list)) + weight.reduction <- as.list(x = rep(x = weight.reduction, times = length(x = object.list))) } else { stop("Invalid input for weight.reduction. Please specify either the names of the dimension", "reduction for each object in the list or provide DimReduc objects.") @@ -3712,6 +4083,9 @@ PairwiseIntegrateReference <- function( if (length(x = weight.reduction) != length(x = object.list)) { stop("Please specify a dimension reduction for each object, or one dimension reduction to be used for all objects") } + if (inherits(x = weight.reduction, what = "character")) { + weight.reduction <- as.list(x = weight.reduction) + } available.reductions <- lapply(X = object.list, FUN = FilterObjects, classes.keep = 'DimReduc') for (ii in 1:length(x = weight.reduction)) { if (ii == 1 & is.null(x = weight.reduction[[ii]])) next @@ -3742,6 +4116,9 @@ PairwiseIntegrateReference <- function( y = object.list[reference.objects[2:length(x = reference.objects)]] )) names(x = object.list) <- as.character(-(1:length(x = object.list))) + if (!is.null(x = weight.reduction)) { + names(x = weight.reduction) <- names(x = object.list) + } if (verbose & (length(x = reference.objects) != length(x = object.list))) { message("Building integrated reference") } @@ -3753,6 +4130,12 @@ PairwiseIntegrateReference <- function( merge.pair <- rev(x = merge.pair) sample.tree[ii, ] <- as.numeric(merge.pair) } + if (!is.null(x = weight.reduction)) { + # extract the correct dimreduc objects, in the correct order + weight.pair <- weight.reduction[merge.pair] + } else { + weight.pair <- NULL + } object.1 <- DietSeurat( object = object.list[[merge.pair[1]]], assays = DefaultAssay(object = object.list[[merge.pair[1]]]), @@ -3893,25 +4276,29 @@ ParseRow <- function(clustering, i){ return(unlist(datasets)) } -ProjectCellEmbeddings <- function( +# Rescale query with mean and sd from reference, or known mean and SD +# +# @param reference A reference object +# @param query A query object +# @param features Features to scale +# @param scale Scale data (divide by SD) +# @return Returns a matrix containing the scaled query data +RescaleQuery <- function( reference, query, - reduction = "pca", reference.assay = NULL, query.assay = NULL, - dims = 1:50, - scale = TRUE, - verbose = TRUE, + features = NULL, feature.mean = NULL, - feature.sd = NULL + feature.sd = NULL, + scale = TRUE ) { - if (verbose) { - message("Projecting cell embeddings") - } reference.assay <- reference.assay %||% DefaultAssay(object = reference) query.assay <- query.assay %||% DefaultAssay(object = query) - features <- rownames(x = Loadings(object = reference[[reduction]])) - features <- intersect(x = features, y = rownames(x = query[[query.assay]])) + features <- features %||% intersect( + rownames(x = reference[[reference.assay]]), + rownames(x = query[[query.assay]]) + ) reference.data <- GetAssayData( object = reference, assay = reference.assay, @@ -3922,7 +4309,7 @@ ProjectCellEmbeddings <- function( slot = "data")[features, ] if (is.null(x = feature.mean)) { feature.mean <- rowMeans(x = reference.data) - if(scale){ + if (scale) { feature.sd <- sqrt( x = SparseRowVar2( mat = as(object = reference.data, Class = "dgCMatrix"), @@ -3951,11 +4338,130 @@ ProjectCellEmbeddings <- function( ) } dimnames(x = proj.data) <- store.names + return(proj.data) +} + +ProjectCellEmbeddings <- function( + reference, + query, + reduction = "pca", + reference.assay = NULL, + query.assay = NULL, + dims = 1:50, + scale = TRUE, + verbose = TRUE, + feature.mean = NULL, + feature.sd = NULL +) { + if (verbose) { + message("Projecting cell embeddings") + } + reference.assay <- reference.assay %||% DefaultAssay(object = reference) + query.assay <- query.assay %||% DefaultAssay(object = query) + features <- rownames(x = Loadings(object = reference[[reduction]])) + features <- intersect(x = features, y = rownames(x = query[[query.assay]])) + proj.data <- RescaleQuery( + reference = reference, + query = query, + features = features, + scale = scale, + feature.mean = feature.mean, + feature.sd = feature.sd + ) ref.feature.loadings <- Loadings(object = reference[[reduction]])[features, dims] proj.pca <- t(crossprod(x = ref.feature.loadings, y = proj.data)) return(proj.pca) } +# Project new data onto SVD (LSI or PCA) +# +# A = Uāˆ‘V SVD +# U' = VA'/āˆ‘ LSI projection +# +# Note that because in LSI we don't multiply by āˆ‘ to get the embeddings (it's just U), +# we need to divide by āˆ‘ in the projection to get the equivalent. Therefore need +# the singular values, which (in Signac RunLSI) we store in the DimReduc misc slot. +# +# @param reduction A \code{DimReduc} object containing the SVD dimension +# reduction. Assumes original irlba output is stored in the misc slot of the dimreduc. +# @param data A data matrix to project onto the SVD. Must contain the same +# features used to construct the original SVD. +# @param mode "pca" or "lsi". Determines if we divide projected values by singular values. +# @param features Features to use. If NULL, use all common features between +# the dimreduc and the data matrix. +# @param do.center Center the projected cell embeddings (subtract mean across cells) +# @param do.scale Scale the projected cell embeddings (divide by standard deviation across cells) +# @param use.original.stats When standardizing the vectors, use the mean and standard deviation +# of the original vectors from the SVD, rather than the mean and standard deviation of the +# projected vectors. +# @param dims A vector containing the dimensions to use in the projection. If NULL (default), +# project to all dimensions in the input SVD. +# @param verbose Display messages +# +# @return Returns a matrix +#' @importFrom Matrix crossprod +# @export +ProjectSVD <- function( + reduction, + data, + mode = "pca", + features = NULL, + do.center = FALSE, + do.scale = FALSE, + use.original.stats = FALSE, + dims = NULL, + verbose = TRUE +) { + vt <- Loadings(object = reduction) + dims <- dims %||% seq_len(length.out = ncol(x = vt)) + features <- features %||% rownames(x = vt) + features <- intersect(x = features, y = rownames(x = data)) + vt <- vt[features, dims] + data <- data[features, ] + if (verbose) { + message("Projecting new data onto SVD") + } + projected.u <- as.matrix(x = crossprod(x = vt, y = data)) + if (mode == "lsi") { + components <- slot(object = reduction, name = 'misc') + sigma <- components$d + projected.u <- projected.u / sigma[dims] + } + if (do.center) { + if (use.original.stats) { + components <- slot(object = reduction, name = 'misc') + if ("u" %in% names(x = components)) { + # preferentially use original irlba output stored in misc + # signac scales and centers embeddings by default + embed.mean <- apply(X = components$u, MARGIN = 2, FUN = mean) + } else { + # raw irlba output not stored, fall back to the reference embeddings + ref.emb <- Embeddings(object = reduction) + embed.mean <- apply(X = ref.emb, MARGIN = 2, FUN = mean) + } + } else { + # projected.u is transposed so use MARGIN = 1 + embed.mean <- apply(X = projected.u, MARGIN = 1, FUN = mean) + } + projected.u <- projected.u - embed.mean + } + if (do.scale) { + if (use.original.stats) { + components <- slot(object = reduction, name = 'misc') + if ("u" %in% names(x = components)) { + embed.sd <- apply(X = components$u, MARGIN = 2, FUN = sd) + } else { + ref.emb <- Embeddings(object = reduction) + embed.sd <- apply(X = ref.emb, MARGIN = 2, FUN = sd) + } + } else { + embed.sd <- apply(X = projected.u, MARGIN = 1, FUN = sd) + } + projected.u <- projected.u / embed.sd + } + return(t(x = projected.u)) +} + # Calculate position along a defined reference range for a given vector of # numerics. Will range from 0 to 1. # @@ -4108,6 +4614,7 @@ RunIntegration <- function( ) dims <- 1:ncol(x = dr.weights) } else { + # need to match order of objects dr <- weight.reduction[[2]] if (!all(cells2 %in% rownames(x = dr))) { stop("Query cells not present in supplied DimReduc object. Set weight.reduction to a DimReduc object containing the query cells.") @@ -4117,6 +4624,7 @@ RunIntegration <- function( } else { dr.weights <- query[[dr]] } + dims <- 1:ncol(x = dr.weights) } merged.obj <- FindWeights( object = merged.obj, @@ -4325,6 +4833,7 @@ ValidateParams_FindTransferAnchors <- function( reference.reduction, project.query, features, + scale, npcs, l2.norm, dims, @@ -4347,12 +4856,15 @@ ValidateParams_FindTransferAnchors <- function( ModifyParam(param = "reference", value = reference) DefaultAssay(object = query) <- query.assay ModifyParam(param = "query", value = query) + if (!is.logical(x = scale)) { + stop("Scale should be TRUE or FALSE") + } if (length(x = reference) > 1 | length(x = query) > 1) { stop("We currently only support transfer between a single query and reference", call. = FALSE) } - if (!reduction %in% c("pcaproject", "cca")) { - stop("Please select either pcaproject or cca for the reduction parameter.", + if (!reduction %in% c("pcaproject", "cca", "lsiproject", "rpca")) { + stop("Please select either pcaproject, rpca, cca, or lsiproject for the reduction parameter.", call. = FALSE) } if (reduction == "cca" && !is.null(x = reference.reduction)) { @@ -4366,6 +4878,9 @@ ValidateParams_FindTransferAnchors <- function( if (normalization.method == "SCT") { ModifyParam(param = "k.filter", value = NA) } + if (reduction == "lsiproject") { + ModifyParam(param = "k.filter", value = NA) + } if (!is.na(x = k.filter) && k.filter > ncol(x = query)) { warning("k.filter is larger than the number of cells present in the query.\n", "Continuing without anchor filtering.", @@ -4536,6 +5051,9 @@ ValidateParams_FindTransferAnchors <- function( } } } else { + if (reduction == "lsiproject") { + stop("Must supply a reference reduction if reduction='lsiproject'") + } mdim <- max(dims) if (npcs < mdim) { warning("npcs is smaller than the largest value requested by the dims ", @@ -4667,17 +5185,18 @@ ValidateParams_TransferData <- function( stop("None of the provided refdata elements are valid.", call. = FALSE) } ModifyParam(param = "refdata", value = refdata) + valid.weight.reduction <- c("pcaproject", "pca", "cca", "rpca.ref","lsiproject", "lsi") if (!inherits(x = weight.reduction, "DimReduc")) { - if (!weight.reduction %in% c("pcaproject", "pca", "cca")) { - stop("Please provide one of pcaproject, pca, cca, or a custom DimReduc to ", + if (!weight.reduction %in% valid.weight.reduction) { + stop("Please provide one of ", paste(valid.weight.reduction, collapse = ", "), " or a custom DimReduc to ", "the weight.reduction parameter.", call. = FALSE) } - if (weight.reduction %in% c("pcaproject", "cca") && + if (weight.reduction %in% c("pcaproject", "cca", "rpca.ref", "lsiproject") && !weight.reduction %in% Reductions(object = combined.ob)) { stop("Specified weight.reduction (", weight.reduction, ") is not present ", "in the provided anchorset.", call. = FALSE) } - if (weight.reduction == "pca" && is.null(x = query)) { + if (weight.reduction %in% c("pca", "lsi") && is.null(x = query)) { stop("To use an internal PCA on the query only for weight.reduction, ", "please provide the query object.", call. = FALSE) } @@ -4698,7 +5217,11 @@ ValidateParams_TransferData <- function( } if (!is.null(x = query)) { - if (!isTRUE(x = all.equal(gsub(pattern = "_query", replacement = "", x = query.cells), Cells(x = query)))) { + if (!isTRUE(x = all.equal( + target = gsub(pattern = "_query", replacement = "", x = query.cells), + current = Cells(x = query), + check.attributes = FALSE) + )) { stop("Query object provided contains a different set of cells from the ", "query used to construct the AnchorSet provided.", call. = FALSE) } @@ -4753,7 +5276,7 @@ ValidateParams_IntegrateEmbeddings_IntegrationAnchors <- function( if (!is.null(x = weight.reduction)) { if (inherits(x = weight.reduction, what = "character")) { if (length(x = weight.reduction) == 1) { - weight.reduction <- rep(x = weight.reduction, times = nobs) + weight.reduction <- as.list(x = rep(x = weight.reduction, times = nobs)) } ModifyParam(param = 'weight.reduction', value = weight.reduction) for (i in 1:nobs) { @@ -4826,7 +5349,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( } query.cells <- slot(object = anchorset, name = "query.cells") query.cells <- gsub(pattern = "_query", replacement = "", x = query.cells) - if (!isTRUE(x = all.equal(target = query.cells, current = Cells(x = query)))) { + if (!isTRUE(x = all.equal(target = query.cells, current = Cells(x = query), check.attributes = FALSE))) { stop("The set of cells used as a query in the AnchorSet does not match ", "the set of cells provided in the query object.") } @@ -4862,26 +5385,52 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( ModifyParam(param = "dims.to.integrate", value = dims.to.integrate) if (isTRUE(x = reuse.weights.matrix)) { weights.matrix <- Tool(object = query, slot = "TransferData")$weights.matrix - if (nrow(x = weights.matrix) != nrow(x = slot(object = anchorset, name = "anchors"))) { + if (is.null(x = weights.matrix)) { + message("Requested to reuse weights matrix, but no weights found. Computing new weights.") + reuse.weights.matrix <- FALSE + } else if (nrow(x = weights.matrix) != nrow(x = slot(object = anchorset, name = "anchors"))) { stop("The number of anchors in the weights matrix stored in the query (", nrow(x = weights.matrix), ") doesn't match the number of anchors ", "in the anchorset (", nrow(x = slot(object = anchorset, name = "anchors")), ").", call. = FALSE) + } else { + ModifyParam(param = 'weights.matrix', value = weights.matrix) } - ModifyParam(param = 'weights.matrix', value = weights.matrix) - } else { + } + # check T/F again due to possible modification in above + if (isFALSE(x = reuse.weights.matrix)) { if (k.weight > ncol(x = query)) { stop("k.weight (", k.weight, ") is set larger than the number of cells in ", "the query object (", ncol(x = query), "). Please choose a smaller ", "k.weight.", call. = FALSE) } + if (inherits(x = weight.reduction, what = "list")) { + if (length(x = weight.reduction) > 2) { + stop("Supplied too many dimension reduction objects for weight.reduction. ", + "Should supply a single DimReduc object.") + } + if (length(x = weight.reduction) == 2) { + # take the second element as the dimreduc to use for query + weight.reduction <- weight.reduction[[2]] + } + } if (inherits(x = weight.reduction, what = "character")) { + if (length(x = weight.reduction) > 2) { + stop("Supplied too many dimension reduction names for weight.reduction. ", + "Should supply the name of a single DimReduc present in the query.") + } + if (length(x = weight.reduction) == 2) { + # take the second element as the dimreduc to use for query + weight.reduction <- weight.reduction[[2]] + } if (!weight.reduction %in% Reductions(object = query)) { stop("The weight.reduction ", weight.reduction, " is not present in the ", "query object.", call. = FALSE) } + ModifyParam(param = 'weight.reduction', value = list(NULL, query[[weight.reduction]])) } if (inherits(x = weight.reduction, what = "DimReduc")) { + weight.reduction <- RenameCells(object = weight.reduction, new.names = paste0(Cells(x = weight.reduction), "_query")) if (!isTRUE(all.equal( target = Cells(x = weight.reduction), current = Cells(x = query) @@ -4889,6 +5438,7 @@ ValidateParams_IntegrateEmbeddings_TransferAnchors <- function( stop("Cell names in the provided weight.reduction don't ", "match with the cell names in the query object.", call. = FALSE) } + ModifyParam(param = 'weight.reduction', value = list(NULL, weight.reduction)) } } } diff --git a/R/mixscape.R b/R/mixscape.R index f0a68eabc..3504563dd 100644 --- a/R/mixscape.R +++ b/R/mixscape.R @@ -796,7 +796,7 @@ RunMixscape <- function( message(" ", gene) } de.genes <- prtb_markers[[s]][[gene]] - dat <- GetAssayData(object = object[[assay]], slot = "data")[de.genes, all.cells] + dat <- GetAssayData(object = object[[assay]], slot = "data")[de.genes, all.cells, drop = FALSE] if (slot == "scale.data") { dat <- ScaleData(object = dat, features = de.genes, verbose = FALSE) } @@ -806,12 +806,12 @@ RunMixscape <- function( while (!converged && n.iter < iter.num) { Idents(object = object) <- new.class.name guide.cells <- intersect(x = WhichCells(object = object, idents = gene), y = cells.s) - vec <- rowMeans2(x = dat[, guide.cells]) - rowMeans2(x = dat[, nt.cells]) + vec <- rowMeans2(x = dat[, guide.cells, drop = FALSE]) - rowMeans2(x = dat[, nt.cells, drop = FALSE]) pvec <- apply(X = dat, MARGIN = 2, FUN = ProjectVec, v2 = vec) if (n.iter == 0){ #store pvec gv <- as.data.frame(x = pvec) - gv[, labels] <- "NT" + gv[, labels] <- nt.class.name gv[intersect(x = rownames(x = gv), y = guide.cells), labels] <- gene gv.list[[gene]][[s]] <- gv } @@ -871,6 +871,8 @@ RunMixscape <- function( #' @param balanced Plot an equal number of genes with both groups of cells. #' @param order.by.prob Order cells on heatmap based on their mixscape knockout #' probability from highest to lowest score. +#' @param group.by (Deprecated) Option to split densities based on mixscape +#' classification. Please use mixscape.class instead #' @param mixscape.class metadata column with mixscape classifications. #' @param prtb.type specify type of CRISPR perturbation expected for labeling #' mixscape classifications. Default is KO. @@ -891,9 +893,9 @@ MixscapeHeatmap <- function( ident.2 = NULL, balanced = TRUE, logfc.threshold = 0.25, - assay="RNA", + assay = "RNA", max.genes = 100, - test.use='wilcox', + test.use ='wilcox', max.cells.group = NULL, order.by.prob = TRUE, group.by = NULL, @@ -904,71 +906,73 @@ MixscapeHeatmap <- function( ... ) { + if (!is.null(x = group.by)) { + message("The group.by parameter is being deprecated. Please use ", + "mixscape.class instead. Setting mixscape.class = ", group.by, + " and continuing.") + mixscape.class <- group.by + } DefaultAssay(object = object) <- assay - if(is.numeric(max.genes)){ - all.markers <- FindMarkers(object, ident.1 = ident.1, ident.2 = ident.2, only.pos = F, logfc.threshold = logfc.threshold,test.use = test.use) - if (balanced){ - pos.markers <- all.markers[which(all.markers[,fc.name] > (logfc.threshold)),] - neg.markers <- all.markers[which(all.markers[,fc.name] < (-logfc.threshold)),] - - if (length(rownames(subset(pos.markers, p_val < pval.cutoff))) < max.genes ){ - marker.list=c(rownames(subset(pos.markers, p_val < pval.cutoff))) - if (length(rownames(subset(neg.markers, p_val < pval.cutoff))) < max.genes){ - marker.list <- c(marker.list,rownames(subset(neg.markers, p_val < pval.cutoff))) + if (is.numeric(x = max.genes)) { + all.markers <- FindMarkers( + object = object, + ident.1 = ident.1, + ident.2 = ident.2, + only.pos = FALSE, + logfc.threshold = logfc.threshold, + test.use = test.use + ) + if (balanced) { + pos.markers <- all.markers[which(x = all.markers[,fc.name] > (logfc.threshold)), ] + neg.markers <- all.markers[which(x = all.markers[,fc.name] < (-logfc.threshold)), ] + if (length(x = rownames(x = subset(x = pos.markers, p_val < pval.cutoff))) < max.genes ) { + marker.list <- c(rownames(x = subset(x = pos.markers, p_val < pval.cutoff))) + if (length(x = rownames(x = subset(x = neg.markers, p_val < pval.cutoff))) < max.genes){ + marker.list <- c(marker.list, rownames(x = subset(x = neg.markers, p_val < pval.cutoff))) } else { - marker.list <- c(marker.list, rownames(subset(neg.markers, p_val (logfc.threshold)),] - if (length(rownames(subset(pos.markers, p_val < pval.cutoff))) < max.genes ){ - marker.list <- c(rownames(subset(pos.markers, p_val < pval.cutoff))) - } else{ - marker.list <- c(rownames(subset(pos.markers, p_val < pval.cutoff))[1:max.genes]) + pos.markers <- all.markers[which(x = all.markers[, fc.name] > (logfc.threshold)),] + if (length(x = rownames(x = subset(x = pos.markers, p_val < pval.cutoff))) < max.genes ){ + marker.list <- c(rownames(x = subset(x = pos.markers, p_val < pval.cutoff))) + } else { + marker.list <- c(rownames(x = subset(x = pos.markers, p_val < pval.cutoff))[1:max.genes]) } } - if(is.null(max.cells.group)){ - if(is.null(group.by)){ - sub2 <- subset(object, idents = c(ident.1,ident.2)) - } - else{ - sub2 <- subset(object, idents = c(ident.1,ident.2)) - Idents(sub2) <- group.by - } + if (is.null(x = max.cells.group)) { + if (is.null(x = group.by)) { + sub2 <- subset(x = object, idents = c(ident.1, ident.2)) + } else{ + sub2 <- subset(x = object, idents = c(ident.1, ident.2)) + Idents(object = sub2) <- group.by } - - else{ - if (is.null(group.by)){ - sub2 <- subset(object, idents = c(ident.1, ident.2), downsample = max.cells.group) - - } - else{ - sub <- subset(object, idents = c(ident.1,ident.2)) - Idents(sub) <- group.by - sub2 <- subset(sub, downsample = max.cells.group) - } - + } + else { + if (is.null(x = group.by)) { + sub2 <- subset(x = object, idents = c(ident.1, ident.2), downsample = max.cells.group) + } else { + sub <- subset(x = object, idents = c(ident.1, ident.2)) + Idents(object = sub) <- group.by + sub2 <- subset(x = sub, downsample = max.cells.group) } - - sub2 <- ScaleData(sub2, features = marker.list) - - if(isTRUE(order.by.prob)){ - p_ko <- sub2[[paste0(mixscape.class, "_p_", tolower(prtb.type) )]][,1, drop = FALSE] - ordered.cells <- rownames(p_ko)[order(p_ko[,1], decreasing = T)] - p <- DoHeatmap(sub2, features = marker.list, label = T, cells = ordered.cells ,...) } - - else{ - p <- DoHeatmap(sub2, features = marker.list, label = T,cells = sample(Cells(sub)), ...) + sub2 <- ScaleData(object = sub2, features = marker.list, assay = assay) + if (isTRUE(x = order.by.prob)) { + p_ko <- sub2[[paste0(mixscape.class, "_p_", tolower(x = prtb.type) )]][, 1, drop = FALSE] + ordered.cells <- rownames(x = p_ko)[order(p_ko[,1], decreasing = TRUE)] + p <- DoHeatmap(object = sub2, features = marker.list, label = TRUE, cells = ordered.cells, assay = assay, ...) + } else{ + p <- DoHeatmap(object = sub2, features = marker.list, label = TRUE, cells = sample(x = Cells(x = sub2)), assay = assay, ...) } return(p) } @@ -981,14 +985,16 @@ MixscapeHeatmap <- function( #' function. #' #' @param object An object of class Seurat. -#' @param target.gene.ident Class identity for cells sharing the same -#' perturbation. Name should be the same as the one used to run mixscape. -#' @param group.by Option to split densities based on mixscape classification. +#' @param target.gene.ident Target gene name to visualize perturbation scores for. +#' @param target.gene.class meta data column specifying all target gene names in the experiment. +#' @param before.mixscape Option to split densities based on mixscape classification (default) or original target gene classification. #' Default is set to NULL and plots cells by original class ID. #' @param col Specify color of target gene class or knockout cell class. For #' control non-targeting and non-perturbed cells, colors are set to different #' shades of grey. +#' @param mixscape.class meta data column specifying mixscape classifications. #' @param prtb.type specify type of CRISPR perturbation expected for labeling mixscape classifications. Default is KO. +#' @param split.by For datasets with more than one cell type. Set equal TRUE to visualize perturbation scores for each cell type separately. #' @return A ggplot object. #' #' @importFrom stats median @@ -1000,38 +1006,52 @@ MixscapeHeatmap <- function( #' PlotPerturbScore <- function( object, + target.gene.class = "gene", target.gene.ident = NULL, - group.by = "mixscape_class", + mixscape.class = "mixscape_class", col = "orange2", + split.by = NULL, + before.mixscape = FALSE, prtb.type = "KO" -) { +){ + + if(is.null(target.gene.ident) == TRUE){ + message("Please provide name of target gene class to plot") + } prtb_score_list <- Tool(object = object, slot = "RunMixscape")[[target.gene.ident]] - plot_list <- list() - Idents(object = object) <- group.by - for (i in 1:length(x = prtb_score_list)) { - prtb_score <- prtb_score_list[[i]] - prtb_score[, 2] <- as.factor(x = prtb_score[, 2]) - gd <- setdiff(x = unique(x = prtb_score[, "gene"]), y = target.gene.ident) - colnames(x = prtb_score)[2] <- "gene" - if (is.null(x = group.by)) { - cols <- setNames( - object = c("grey49", col), - nm = c(gd, target.gene.ident) - ) - p <- ggplot(data = prtb_score, mapping = aes_string(x = "pvec", color = "gene")) + - geom_density() + theme_classic() - top_r <- ggplot_build(p)$layout$panel_params[[1]]$y.range[2] - prtb_score$y.jitter <- prtb_score$pvec - prtb_score$y.jitter[prtb_score[, "gene"] == gd] <- runif( - n = prtb_score$y.jitter[prtb_score[, "gene"] == gd], - min = 0.001, - max = top_r / 10 - ) - prtb_score$y.jitter[prtb_score[,"gene"] == target.gene.ident] <- runif( - n = prtb_score$y.jitter[prtb_score[, "gene"] == target.gene.ident], - min = -top_r / 10, - max = 0 - ) + + for (nm in names(prtb_score_list)){ + prtb_score_list[[nm]]['name'] <- nm + } + prtb_score <- do.call(rbind, prtb_score_list) + prtb_score[, 2] <- as.factor(x = prtb_score[, 2]) + gd <- setdiff(x = unique(x = prtb_score[, target.gene.class]), y = target.gene.ident) + colnames(x = prtb_score)[2] <- "gene" + prtb_score$cell.bc <- sapply(rownames(prtb_score), FUN = function(x) strsplit(x, split = "[.]")[[1]][2]) + + if (isTRUE(x = before.mixscape)) { + cols <- setNames( + object = c("grey49", col), + nm = c(gd, target.gene.ident) + ) + + p <- ggplot(data = prtb_score, mapping = aes_string(x = "pvec", color = "gene")) + + geom_density() + theme_classic() + top_r <- ggplot_build(p)$layout$panel_params[[1]]$y.range[2] + prtb_score$y.jitter <- prtb_score$pvec + prtb_score$y.jitter[prtb_score[, "gene"] == gd] <- runif( + n = prtb_score$y.jitter[prtb_score[, "gene"] == gd], + min = 0.001, + max = top_r / 10 + ) + prtb_score$y.jitter[prtb_score[,"gene"] == target.gene.ident] <- runif( + n = prtb_score$y.jitter[prtb_score[, "gene"] == target.gene.ident], + min = -top_r / 10, + max = 0 + ) + + if(is.null(split.by)==FALSE) { + prtb_score$split <- as.character(object[[split.by]][prtb_score$cell.bc,1]) p2 <- p + scale_color_manual(values = cols, drop = FALSE) + geom_density(size = 1.5) + geom_point(data = prtb_score, aes_string(x = "pvec", y = "y.jitter"), size = 0.1) + @@ -1039,62 +1059,86 @@ PlotPerturbScore <- function( ylab("Cell density") + xlab("perturbation score") + theme(legend.key.size = unit(1, "cm"), legend.text = element_text(colour = "black", size = 14), - legend.title = element_blank() - ) - } else { - cols <- setNames( - object = c("grey49", "grey79", col), - nm = c(gd, paste0(target.gene.ident, " NP"), paste(target.gene.ident, prtb.type, sep = " ")) - ) - #add mixscape classifications - prtb_score$mix <- as.character(x = prtb_score[, "gene"]) - classes <- unique(x = object[[group.by]][, 1]) - #define KO and NP cells - ko.cells <- WhichCells(object = object, idents = paste(target.gene.ident, prtb.type, sep = " ")) - np.cells <- WhichCells(object = object, idents = paste0(target.gene.ident, " NP")) - prtb_score[np.cells, "mix"] <- paste0(target.gene.ident, " NP") - prtb_score[ko.cells, "mix"] <- paste(target.gene.ident, paste(prtb.type), sep = " ") - prtb_score[, "mix"] <- as.factor(x = prtb_score[,"mix"]) - p <- ggplot(data = prtb_score, aes_string(x = "pvec", color = "mix")) + - geom_density() + theme_classic() - top_r <- ggplot_build(p)$layout$panel_params[[1]]$y.range[2] - prtb_score$y.jitter <- prtb_score$pvec - gd2 <- setdiff( - x = unique(x = prtb_score[, "mix"]), - y = c(paste0(target.gene.ident, " NP"), paste(target.gene.ident, prtb.type, sep = " ")) - ) - prtb_score$y.jitter[prtb_score[, "mix"] == gd2] <- runif( - n = prtb_score$y.jitter[prtb_score[, "mix"] == gd2], - min = 0.001, - max = top_r / 10 - ) - prtb_score$y.jitter[prtb_score$mix == paste(target.gene.ident, prtb.type, sep = " ")] <- runif( - n = prtb_score$y.jitter[prtb_score[, "mix"] == paste(target.gene.ident, prtb.type, sep = " ")], - min = -top_r / 10, - max = 0 - ) - prtb_score$y.jitter[prtb_score$mix == paste0(target.gene.ident, " NP")] <- runif( - n = prtb_score$y.jitter[prtb_score[, "mix"] == paste0(target.gene.ident, " NP")], - min = -top_r / 10, - max = 0 - ) + legend.title = element_blank(), plot.title = element_text(size = 16, face = "bold"))+ + facet_wrap(vars(split)) + } + + else{ p2 <- p + scale_color_manual(values = cols, drop = FALSE) + geom_density(size = 1.5) + geom_point(data = prtb_score, aes_string(x = "pvec", y = "y.jitter"), size = 0.1) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20)) + ylab("Cell density") + xlab("perturbation score") + theme(legend.key.size = unit(1, "cm"), - legend.text = element_text(colour ="black", size = 14), - legend.title = element_blank() - ) + legend.text = element_text(colour = "black", size = 14), + legend.title = element_blank(), plot.title = element_text(size = 16, face = "bold")) } - plot_list[[i]] <- p2 } - if (length(x = plot_list) == 1) { - return(plot_list[[1]]) - } else { - return(plot_list) + + + else { + cols <- setNames( + object = c("grey49", "grey79", col), + nm = c(gd, paste0(target.gene.ident, " NP"), paste(target.gene.ident, prtb.type, sep = " ")) + ) + #add mixscape identities + prtb_score$mix <- object[[mixscape.class]][prtb_score$cell.bc,] + + p <- ggplot(data = prtb_score, aes_string(x = "pvec", color = "mix")) + + geom_density() + theme_classic() + + top_r <- ggplot_build(p)$layout$panel_params[[1]]$y.range[2] + prtb_score$y.jitter <- prtb_score$pvec + gd2 <- setdiff( + x = unique(x = prtb_score[, "mix"]), + y = c(paste0(target.gene.ident, " NP"), paste(target.gene.ident, prtb.type, sep = " ")) + ) + prtb_score$y.jitter[prtb_score[, "mix"] == gd2] <- runif( + n = prtb_score$y.jitter[prtb_score[, "mix"] == gd2], + min = 0.001, + max = top_r / 10 + ) + prtb_score$y.jitter[prtb_score$mix == paste(target.gene.ident, prtb.type, sep = " ")] <- runif( + n = prtb_score$y.jitter[prtb_score[, "mix"] == paste(target.gene.ident, prtb.type, sep = " ")], + min = -top_r / 10, + max = 0 + ) + prtb_score$y.jitter[prtb_score$mix == paste0(target.gene.ident, " NP")] <- runif( + n = prtb_score$y.jitter[prtb_score[, "mix"] == paste0(target.gene.ident, " NP")], + min = -top_r / 10, + max = 0 + ) + prtb_score[, "mix"] <- as.factor(x = prtb_score[,"mix"]) + + if(is.null(split.by) == FALSE){ + prtb_score$split <- as.character(object[[split.by]][prtb_score$cell.bc,1]) + p2 <- ggplot(data = prtb_score, aes_string(x = "pvec", color = "mix")) + + scale_color_manual(values = cols, drop = FALSE) + + geom_density(size = 1.5) + + geom_point(aes_string(x = "pvec", y = "y.jitter"), size = 0.1) + + theme_classic() + + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20)) + + ylab("Cell density") + xlab("perturbation score") + + theme(legend.key.size = unit(1, "cm"), + legend.text = element_text(colour ="black", size = 14), + legend.title = element_blank(), + plot.title = element_text(size = 16, face = "bold"))+ + facet_wrap(vars(split)) + } + else{ + p2 <- p + scale_color_manual(values = cols, drop = FALSE) + + geom_density(size = 1.5) + + geom_point(data = prtb_score, aes_string(x = "pvec", y = "y.jitter"), size = 0.1) + + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20)) + + ylab("Cell density") + xlab("perturbation score") + + theme(legend.key.size = unit(1, "cm"), + legend.text = element_text(colour ="black", size = 14), + legend.title = element_blank(), + plot.title = element_text(size = 16, face = "bold")) + } + } + return(p2) } #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Internal @@ -1215,18 +1259,24 @@ GetMissingPerturb <- function(object, assay, features, verbose = TRUE) { # @return returns matrix of perturbation differences # #' @importFrom matrixStats rowMeans2 +#' @importFrom Matrix sparseMatrix colSums #' PerturbDiff <- function(object, assay, slot, all_cells, nt_cells, features, neighbors, verbose) { nt_data <- as.matrix(x = expm1(x = GetAssayData(object = object, assay = assay, slot = slot)[features, nt_cells, drop = FALSE])) mysapply <- ifelse(test = verbose, yes = pbsapply, no = sapply) - new_expr <- mysapply(X = all_cells, FUN = function(i) { - index <- Indices(object = neighbors)[i, ] - nt_cells20 <- nt_cells[index] - avg_nt <- rowMeans2(x = nt_data[, nt_cells20, drop = FALSE]) - avg_nt <- as.matrix(x = avg_nt) - colnames(x = avg_nt) <- i - return(avg_nt) - }) + # new_expr <- mysapply(X = all_cells, FUN = function(i) { + # index <- Indices(object = neighbors)[i, ] + # nt_cells20 <- nt_cells[index] + # avg_nt <- rowMeans2(x = nt_data[, nt_cells20, drop = FALSE]) + # avg_nt <- as.matrix(x = avg_nt) + # colnames(x = avg_nt) <- i + # return(avg_nt) + # }) + idx <- Indices(object = neighbors)[all_cells,] + model.matrix <- sparseMatrix(i = as.vector(idx), j = rep(1:nrow(x = idx), times = ncol(x = idx)), x = 1, dims = c(length(x = nt_cells), nrow(x = idx))) + model.matrix <- model.matrix/rep(colSums(model.matrix), each = nrow(x = model.matrix)) + new_expr <- nt_data %*% model.matrix + new_expr <- matrix(data = new_expr, nrow = length(x = features)) new_expr <- log1p(x = new_expr) rownames(x = new_expr) <- rownames(x = nt_data) diff --git a/R/objects.R b/R/objects.R index f79dcb24f..cd5d8fe40 100644 --- a/R/objects.R +++ b/R/objects.R @@ -1014,7 +1014,7 @@ as.Seurat.CellDataSet <- function( #' set to \code{NULL} if only normalized data are present #' @param data name of the SingleCellExperiment assay to slot as \code{data}. #' Set to NULL if only counts are present -#' @param assay Name to store expression matrices as +#' @param assay Name of assays to convert; set to \code{NULL} for all assays to be converted #' @param project Project name for new Seurat object #' #' @rdname as.Seurat @@ -1026,94 +1026,137 @@ as.Seurat.SingleCellExperiment <- function( x, counts = 'counts', data = 'logcounts', - assay = 'RNA', + assay = NULL, project = 'SingleCellExperiment', ... ) { CheckDots(...) if (!PackageCheck('SingleCellExperiment', error = FALSE)) { stop( - "Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object", + "Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object.", + "\nhttps://bioconductor.org/packages/SingleCellExperiment/", call. = FALSE ) } meta.data <- as.data.frame(x = SummarizedExperiment::colData(x = x)) - # Pull expression matrices - mats <- list(counts = counts, data = data) - mats <- Filter(f = Negate(f = is.null), x = mats) - if (length(x = mats) == 0) { - stop("Cannot pass 'NULL' to both 'counts' and 'data'") - } - for (m in 1:length(x = mats)) { - # if (is.null(x = mats[[m]])) next - mats[[m]] <- tryCatch( - expr = SummarizedExperiment::assay(x = x, i = mats[[m]]), - error = function(e) { - stop("No data in provided assay - ", mats[[m]], call. = FALSE) - } - ) - # if cell names are NULL, fill with cell_X - if (is.null(x = colnames(x = mats[[m]]))) { - warning( - "The column names of the", - names(x = mats)[m], - " matrix is NULL. Setting cell names to cell_columnidx (e.g 'cell_1').", - call. = FALSE, - immediate. = TRUE - ) - cell.names <- paste0("cell_", 1:ncol(x = mats[[m]])) - colnames(x = mats[[m]]) <- cell.names - rownames(x = meta.data) <- cell.names + if (!is.null(SingleCellExperiment::altExpNames(x = x))) { + assayn <- assay %||% SingleCellExperiment::altExpNames(x = x) + if (!all(assay %in% SingleCellExperiment::altExpNames(x = x))) { + stop("One or more of the assays you are trying to convert is not in the SingleCellExperiment object") } - } - assays <- if (is.null(x = mats$counts)) { - list(CreateAssayObject(data = mats$data)) - } else if (is.null(x = mats$data)) { - list(CreateAssayObject(counts = mats$counts)) + assayn <- c("originalexp", assayn) } else { - a <- CreateAssayObject(counts = mats$counts) - a <- SetAssayData(object = a, slot = 'data', new.data = mats$data) - list(a) - } - names(x = assays) <- assay - Key(object = assays[[assay]]) <- paste0(tolower(x = assay), '_') - # Create the Seurat object - object <- new( - Class = 'Seurat', - assays = assays, - meta.data = meta.data, - version = packageVersion(pkg = 'Seurat'), - project.name = project - ) - DefaultAssay(object = object) <- assay - Idents(object = object) <- project - # Get DimReduc information - if (length(x = SingleCellExperiment::reducedDimNames(x = x)) > 0) { - for (dr in SingleCellExperiment::reducedDimNames(x = x)) { - embeddings <- SingleCellExperiment::reducedDim(x = x, type = dr) - if (is.null(x = rownames(x = embeddings))) { - rownames(x = embeddings) <- cell.names - } - key <- gsub( - pattern = "[[:digit:]]", - replacement = "_", - x = colnames(x = SingleCellExperiment::reducedDim(x = x, type = dr))[1] + assayn <- "originalexp" + } + for (assay in assayn) { + if (assay != "originalexp") { + x <- SingleCellExperiment::swapAltExp(x = x, name = assay, saved = NULL) + } + # Pull matrices + mats <- list(counts = counts, data = data) + mats <- Filter(f = Negate(f = is.null), x = mats) + if (length(x = mats) == 0) { + stop("Cannot pass 'NULL' to both 'counts' and 'data'") + } + for (m in 1:length(x = mats)) { + mats[[m]] <- tryCatch( + expr = SummarizedExperiment::assay(x = x, i = mats[[m]]), + error = function(e) { + stop("No data in provided assay - ", mats[[m]], call. = FALSE) + } ) - if (length(x = key) == 0) { - key <- paste0(dr, "_") + # if cell names are NULL, fill with cell_X + if (is.null(x = colnames(x = mats[[m]]))) { + warning( + "The column names of the", + names(x = mats)[m], + " matrix is NULL. Setting cell names to cell_columnidx (e.g 'cell_1').", + call. = FALSE, + immediate. = TRUE + ) + cell.names <- paste0("cell_", 1:ncol(x = mats[[m]])) + colnames(x = mats[[m]]) <- cell.names + rownames(x = meta.data) <- cell.names } - colnames(x = embeddings) <- paste0(key, 1:ncol(x = embeddings)) - object[[dr]] <- CreateDimReducObject( - embeddings = embeddings, - key = key, - assay = DefaultAssay(object = object) + } + assays <- if (is.null(x = mats$counts)) { + list(CreateAssayObject(data = mats$data)) + } else if (is.null(x = mats$data)) { + list(CreateAssayObject(counts = mats$counts)) + } else { + a <- CreateAssayObject(counts = mats$counts) + a <- SetAssayData(object = a, slot = 'data', new.data = mats$data) + list(a) + } + names(x = assays) <- assay + Key(object = assays[[assay]]) <- paste0(tolower(x = assay), '_') + # Create the Seurat object + if (!exists(x = "object")) { + object <- CreateSeuratObject( + counts = assays[[assay]], + Class = 'Seurat', + assay = assay, + meta.data = meta.data, + version = packageVersion(pkg = 'Seurat'), + project.name = project ) + } else { + object[[assay]] <- assays[[assay]] + } + DefaultAssay(object = object) <- assay + # add feature level meta data + md <- SingleCellExperiment::rowData(x = x) + # replace underscores + rownames(x = md) <- gsub(pattern = "_", replacement = "-", x = rownames(x = md)) + md <- as.data.frame(x = md) + # ensure order same as data + md <- md[rownames(x = object[[assay]]), ] + object[[assay]] <- AddMetaData( + object = object[[assay]], + metadata = md + ) + Idents(object = object) <- project + # Get DimReduc information, add underscores if needed and pull from different alt EXP + if (length(x = SingleCellExperiment::reducedDimNames(x = x)) > 0) { + for (dr in SingleCellExperiment::reducedDimNames(x = x)) { + embeddings <- SingleCellExperiment::reducedDim(x = x, type = dr) + if (is.null(x = rownames(x = embeddings))) { + rownames(x = embeddings) <- cell.names + } + if(!grepl('_$', + gsub(pattern = "[[:digit:]]", + replacement = "_", + x = colnames(x = SingleCellExperiment::reducedDim(x = x, type = dr))[1] + ))){ + key <- gsub( + pattern = "[[:digit:]]", + replacement = "_", + x = colnames(x = SingleCellExperiment::reducedDim(x = x, type = dr))[1] + ) + } else + { + key <- gsub( + pattern = "[[:digit:]]", + replacement = "", + x = colnames(x = SingleCellExperiment::reducedDim(x = x, type = dr))[1] + ) + } + if (length(x = key) == 0) { + key <- paste0(dr, "_") + } + colnames(x = embeddings) <- paste0(key, 1:ncol(x = embeddings)) + object[[dr]] <- CreateDimReducObject( + embeddings = embeddings, + key = key, + assay = DefaultAssay(object = object) + ) + } } } return(object) } -#' @param assay Assay to convert +#' @param assay Assays to convert #' #' @rdname as.SingleCellExperiment #' @concept objects @@ -1125,20 +1168,41 @@ as.SingleCellExperiment.Seurat <- function(x, assay = NULL, ...) { if (!PackageCheck('SingleCellExperiment', error = FALSE)) { stop("Please install SingleCellExperiment from Bioconductor before converting to a SingeCellExperiment object") } - assay <- assay %||% DefaultAssay(object = x) + assay <- assay %||% Assays(object = x) + if (!all(assay %in% Assays(object = x))) { + stop("One or more of the assays you are trying to convert is not in the Seurat object") + } + experiments <- list() + for (assayn in assay){ assays = list( - counts = GetAssayData(object = x, assay = assay, slot = "counts"), - logcounts = GetAssayData(object = x, assay = assay, slot = "data") - ) - assays <- assays[sapply(X = assays, FUN = nrow) != 0] - sce <- SingleCellExperiment::SingleCellExperiment(assays = assays) + counts = GetAssayData(object = x, assay = assayn, slot = "counts"), + logcounts = GetAssayData(object = x, assay = assayn, slot = "data")) + assays <- assays[sapply(X = assays, FUN = nrow) != 0] + sume <- SummarizedExperiment::SummarizedExperiment(assays = assays) + experiments[[assayn]] <- sume + } + # create one single cell experiment + sce <- as(object = experiments[[1]], Class = "SingleCellExperiment") + sce <- SingleCellExperiment::SingleCellExperiment(sce, altExps = experiments) + orig.exp.name <- names(x = experiments[1]) + sce <- SingleCellExperiment::swapAltExp(x = sce, name = orig.exp.name, saved = NULL) metadata <- x[[]] metadata$ident <- Idents(object = x) - SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(metadata) - SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(x[[assay]][[]]) + SummarizedExperiment::colData(x = sce) <- S4Vectors::DataFrame(metadata) + for (assayn in assay){ + sce <- SingleCellExperiment::swapAltExp(x = sce, name = assayn, saved = orig.exp.name) + SummarizedExperiment::rowData(x = sce) <- S4Vectors::DataFrame(x[[assayn]][[]]) + sce <- SingleCellExperiment::swapAltExp(x = sce, name = orig.exp.name, saved = assayn) + } for (dr in FilterObjects(object = x, classes.keep = "DimReduc")) { - SingleCellExperiment::reducedDim(sce, toupper(x = dr)) <- Embeddings(object = x[[dr]]) + assay.used <- DefaultAssay(object = x[[dr]]) + if (assay.used %in% SingleCellExperiment::altExpNames(x = sce)) { + sce <- SingleCellExperiment::swapAltExp(x = sce, name = assay.used, saved = orig.exp.name) + SingleCellExperiment::reducedDim(x = sce, toupper(x = dr)) <- Embeddings(object = x[[dr]]) + sce <- SingleCellExperiment::swapAltExp(x = sce, name = orig.exp.name, saved = assay.used) + } } + sce <- SingleCellExperiment::swapAltExp(x = sce, name = orig.exp.name, saved = NULL) return(sce) } diff --git a/R/utilities.R b/R/utilities.R index 287f07019..85509c821 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -6,6 +6,98 @@ NULL # Functions #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#' Add Azimuth Results +#' +#' Add mapping and prediction scores, UMAP embeddings, and imputed assay (if +#' available) +#' from Azimuth to an existing or new \code{\link[SeuratObject]{Seurat}} object +#' +#' @param object A \code{\link[SeuratObject]{Seurat}} object +#' @param filename Path to Azimuth mapping scores file +#' +#' @return \code{object} with Azimuth results added +#' +#' @examples +#' \dontrun{ +#' object <- AddAzimuthResults(object, filename = "azimuth_results.Rds") +#' } +#' +#' @export +AddAzimuthResults <- function(object = NULL, filename) { + if (is.null(x = filename)) { + stop("No Azimuth results provided.") + } + azimuth_results <- readRDS(file = filename) + if (!is.list(x = azimuth_results) || any(!(c('umap', 'pred.df') %in% names(x = azimuth_results)))) { + stop("Expected following format for azimuth_results: + `list(umap = , pred.df = [, impADT = ])`") + } + + if (is.null(x = object)) { + message("No existing Seurat object provided. Creating new one.") + object <- CreateSeuratObject( + counts = matrix( + nrow = 1, + ncol = nrow(x = azimuth_results$umap), + dimnames = list( + row.names = 'Dummy.feature', + col.names = rownames(x = azimuth_results$umap)) + ), + assay = 'Dummy' + ) + } else { + overlap.cells <- intersect( + x = Cells(x = object), + y = rownames(x = azimuth_results$umap) + ) + if (!(all(overlap.cells %in% Cells(x = object)))) { + stop("Cells in object do not match cells in download") + } else if (length(x = overlap.cells) < length(x = Cells(x = object))) { + warning(paste0("Subsetting out ", length(x = Cells(x = object)) - length(x = overlap.cells), + " cells that are absent in downloaded results (perhaps filtered by Azimuth)")) + object <- subset(x = object, cells = overlap.cells) + } + } + + azimuth_results$pred.df$cell <- NULL + object <- AddMetaData(object = object, metadata = azimuth_results$pred.df) + object[['umap.proj']] <- azimuth_results$umap + if ('impADT' %in% names(x = azimuth_results)) { + object[['impADT']] <- azimuth_results$impADT + if ('Dummy' %in% Assays(object = object)) { + DefaultAssay(object = object) <- 'impADT' + object[['Dummy']] <- NULL + } + } + return(object) +} + +#' Add Azimuth Scores +#' +#' Add mapping and prediction scores from Azimuth to a +#' \code{\link[SeuratObject]{Seurat}} object +#' +#' @param object A \code{\link[SeuratObject]{Seurat}} object +#' @param filename Path to Azimuth mapping scores file +#' +#' @return \code{object} with the mapping scores added +#' +#' @examples +#' \dontrun{ +#' object <- AddAzimuthScores(object, filename = "azimuth_pred.tsv") +#' } +#' +AddAzimuthScores <- function(object, filename) { + if (!file.exists(filename)) { + stop("Cannot find Azimuth scores file ", filename, call. = FALSE) + } + object <- AddMetaData( + object = object, + metadata = read.delim(file = filename, row.names = 1) + ) + return(object) +} + #' Calculate module scores for feature expression programs in single cells #' #' Calculate the average expression levels of each program (cluster) on single @@ -1726,13 +1818,14 @@ CheckDuplicateCellNames <- function(object.list, verbose = TRUE, stop = FALSE) { # Create an empty dummy assay to replace existing assay -# +#' @importFrom Matrix sparseMatrix CreateDummyAssay <- function(assay) { - cm <- as.sparse(x = matrix( - data = 0, - nrow = nrow(x = assay), - ncol = ncol(x = assay) - )) + cm <- sparseMatrix( + i = {}, + j = {}, + dims = c(nrow(x = assay), ncol(x = assay)) + ) + cm <- as(object = cm, Class = "dgCMatrix") rownames(x = cm) <- rownames(x = assay) colnames(x = cm) <- colnames(x = assay) # TODO: restore once check.matrix is in SeuratObject diff --git a/R/visualization.R b/R/visualization.R index 25d74c015..7e363c0b6 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -806,10 +806,7 @@ DimPlot <- function( } reduction <- reduction %||% DefaultDimReduc(object = object) cells <- cells %||% colnames(x = object) - if (isTRUE(x = shuffle)) { - set.seed(seed = seed) - cells <- sample(x = cells) - } + data <- Embeddings(object = object[[reduction]])[cells, dims] data <- as.data.frame(x = data) dims <- paste0(Key(object = object[[reduction]]), dims) @@ -829,6 +826,10 @@ DimPlot <- function( if (!is.null(x = split.by)) { data[, split.by] <- object[[split.by, drop = TRUE]] } + if (isTRUE(x = shuffle)) { + set.seed(seed = seed) + data <- data[sample(x = 1:nrow(x = data)), ] + } plots <- lapply( X = group.by, FUN = function(x) { @@ -1854,6 +1855,9 @@ CellScatter <- function( #' be metrics, PC scores, etc. - anything that can be retreived with FetchData #' @param feature2 Second feature to plot. #' @param cells Cells to include on the scatter plot. +#' @param shuffle Whether to randomly shuffle the order of points. This can be +#' useful for crowded plots if points of interest are being buried. (default is FALSE) +#' @param seed Sets the seed if randomly shuffling the order of points. #' @param group.by Name of one or more metadata columns to group (color) cells by #' (for example, orig.ident); pass 'ident' to group by identity class #' @param cols Colors to use for identity class plotting. @@ -1887,6 +1891,8 @@ FeatureScatter <- function( feature1, feature2, cells = NULL, + shuffle = FALSE, + seed = 1, group.by = NULL, cols = NULL, pt.size = 1, @@ -1899,6 +1905,10 @@ FeatureScatter <- function( raster = NULL ) { cells <- cells %||% colnames(x = object) + if (isTRUE(x = shuffle)) { + set.seed(seed = seed) + cells <- sample(x = cells) + } object[['ident']] <- Idents(object = object) group.by <- group.by %||% 'ident' data <- FetchData( @@ -3512,7 +3522,7 @@ DotPlot <- function( data.use <- scale(x = data.use) data.use <- MinMax(data = data.use, min = col.min, max = col.max) } else { - data.use <- log(x = data.use) + data.use <- log1p(x = data.use) } return(data.use) } @@ -4643,7 +4653,7 @@ Intensity <- function(color) { #' #' @importFrom stats median na.omit #' @importFrom ggrepel geom_text_repel geom_label_repel -#' @importFrom ggplot2 aes_string geom_text geom_label +#' @importFrom ggplot2 aes_string geom_text geom_label layer_scales #' @importFrom RANN nn2 #' #' @export @@ -4684,6 +4694,8 @@ LabelClusters <- function( } pb <- ggplot_build(plot = plot) if (geom == 'GeomSpatial') { + xrange.save <- layer_scales(plot = plot)$x$range$range + yrange.save <- layer_scales(plot = plot)$y$range$range data[, xynames["y"]] = max(data[, xynames["y"]]) - data[, xynames["y"]] + min(data[, xynames["y"]]) if (!pb$plot$plot_env$crop) { y.transform <- c(0, nrow(x = pb$plot$plot_env$image)) - pb$layout$panel_params[[1]]$y.range @@ -4761,6 +4773,10 @@ LabelClusters <- function( ... ) } + # restore old axis ranges + if (geom == 'GeomSpatial') { + plot <- suppressMessages(expr = plot + coord_fixed(xlim = xrange.save, ylim = yrange.save)) + } return(plot) } @@ -5854,8 +5870,8 @@ GeomSpatial <- ggproto( if (!crop) { y.transform <- c(0, nrow(x = image)) - panel_scales$y.range data$y <- data$y + sum(y.transform) - panel_scales$x$continuous_range <- c(0, nrow(x = image)) - panel_scales$y$continuous_range <- c(0, ncol(x = image)) + panel_scales$x$continuous_range <- c(0, ncol(x = image)) + panel_scales$y$continuous_range <- c(0, nrow(x = image)) panel_scales$y.range <- c(0, nrow(x = image)) panel_scales$x.range <- c(0, ncol(x = image)) } @@ -7541,7 +7557,7 @@ SingleSpatialPlot <- function( image.alpha = image.alpha, crop = crop, stroke = stroke - ) + coord_fixed() + ) + coord_fixed() + theme(aspect.ratio = 1) }, 'interactive' = { plot + geom_spatial_interactive( @@ -7585,7 +7601,7 @@ SingleSpatialPlot <- function( } plot <- plot + scale } - plot <- plot + theme_void() + plot <- plot + NoAxes() + theme(panel.background = element_blank()) return(plot) } diff --git a/cran-comments.md b/cran-comments.md index eed9624bd..955ac6563 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,4 @@ -# Seurat v4.0.1 +# Seurat v4.0.2 ## Test environments * local Ubuntu 18.04.4 install, R 4.0.3 @@ -14,8 +14,8 @@ There were no ERRORs, WARNINGs, or NOTEs ## Downstream dependencies -There are no packages that depend on Seurat. +There is one package that depends on Seurat: tidyseurat; this update does not impact its functionality -There are six packages that imports Seurat: CDSeq, DUBStepR, scMappR, Signac, SignacX, and SoupX; this update does not impact their functionality +There are seven packages that imports Seurat: CDSeq, DUBStepR, rPanglaoDB, scMappR, Signac, SignacX, and SoupX; this update does not impact their functionality -There are eleven packages that suggest Seurat: BisqueRNA, clustree, conos, DIscBIO, dyngen, nanny, rliger, Rmagic, singleCellHaystack, treefit, VAM; this update does not impact their functionality. +There are nine packages that suggest Seurat: BisqueRNA, ClustAssess, clustree, conos, DIscBIO, dyngen, rliger, Rmagic, VAM; this update does not impact their functionality. diff --git a/index.md b/index.md index 32e268d55..82fc4d196 100644 --- a/index.md +++ b/index.md @@ -15,7 +15,7 @@ We are excited to release Seurat v4.0! This update brings the following new feat * **Rapid mapping of query datasets to references.** We introduce Azimuth, a workflow to leverage high-quality reference datasets to rapidly map new scRNA-seq datasets (queries). For example, you can map any scRNA-seq dataset of human PBMC onto our reference, automating the process of visualization, clustering annotation, and differential expression. Azimuth can be run within Seurat, or using a standalone web application that requires no installation or programming experience. - Vignette: [Mapping scRNA-seq queries onto reference datasets](articles/multimodal_reference_mapping.html) - - Web app: [Automated mapping, visualization, and annotation of scRNA-seq datasets from human PBMC]("https://azimuth.hubmapconsortium.org/") + - Web app: [Automated mapping, visualization, and annotation of scRNA-seq datasets from human PBMC](https://azimuth.hubmapconsortium.org/) Additional speed and usability updates: We have made minor changes in v4, primarily to improve the performance of Seurat v4 on large datasets. These changes substantially improve the speed and memory requirements, but do not adversely impact downstream results. We provide a detailed description of key changes [here](articles/v4_changes.html). Users who wish to fully reproduce existing results can continue to do so by continuing to install Seurat v3. diff --git a/man/AddAzimuthResults.Rd b/man/AddAzimuthResults.Rd new file mode 100644 index 000000000..2044e87b2 --- /dev/null +++ b/man/AddAzimuthResults.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{AddAzimuthResults} +\alias{AddAzimuthResults} +\title{Add Azimuth Results} +\usage{ +AddAzimuthResults(object = NULL, filename) +} +\arguments{ +\item{object}{A \code{\link[SeuratObject]{Seurat}} object} + +\item{filename}{Path to Azimuth mapping scores file} +} +\value{ +\code{object} with Azimuth results added +} +\description{ +Add mapping and prediction scores, UMAP embeddings, and imputed assay (if +available) +from Azimuth to an existing or new \code{\link[SeuratObject]{Seurat}} object +} +\examples{ +\dontrun{ +object <- AddAzimuthResults(object, filename = "azimuth_results.Rds") +} + +} diff --git a/man/AddAzimuthScores.Rd b/man/AddAzimuthScores.Rd new file mode 100644 index 000000000..a49350531 --- /dev/null +++ b/man/AddAzimuthScores.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{AddAzimuthScores} +\alias{AddAzimuthScores} +\title{Add Azimuth Scores} +\usage{ +AddAzimuthScores(object, filename) +} +\arguments{ +\item{object}{A \code{\link[SeuratObject]{Seurat}} object} + +\item{filename}{Path to Azimuth mapping scores file} +} +\value{ +\code{object} with the mapping scores added +} +\description{ +Add mapping and prediction scores from Azimuth to a +\code{\link[SeuratObject]{Seurat}} object +} +\examples{ +\dontrun{ +object <- AddAzimuthScores(object, filename = "azimuth_pred.tsv") +} + +} diff --git a/man/FeatureScatter.Rd b/man/FeatureScatter.Rd index c28125081..ef6ac6eb9 100644 --- a/man/FeatureScatter.Rd +++ b/man/FeatureScatter.Rd @@ -10,6 +10,8 @@ FeatureScatter( feature1, feature2, cells = NULL, + shuffle = FALSE, + seed = 1, group.by = NULL, cols = NULL, pt.size = 1, @@ -32,6 +34,11 @@ be metrics, PC scores, etc. - anything that can be retreived with FetchData} \item{cells}{Cells to include on the scatter plot.} +\item{shuffle}{Whether to randomly shuffle the order of points. This can be +useful for crowded plots if points of interest are being buried. (default is FALSE)} + +\item{seed}{Sets the seed if randomly shuffling the order of points.} + \item{group.by}{Name of one or more metadata columns to group (color) cells by (for example, orig.ident); pass 'ident' to group by identity class} diff --git a/man/FindIntegrationAnchors.Rd b/man/FindIntegrationAnchors.Rd index 4addc5aaf..62495f03c 100644 --- a/man/FindIntegrationAnchors.Rd +++ b/man/FindIntegrationAnchors.Rd @@ -12,7 +12,7 @@ FindIntegrationAnchors( scale = TRUE, normalization.method = c("LogNormalize", "SCT"), sct.clip.range = NULL, - reduction = c("cca", "rpca"), + reduction = c("cca", "rpca", "rlsi"), l2.norm = TRUE, dims = 1:30, k.anchor = 5, @@ -63,6 +63,7 @@ be one of: \itemize{ \item{cca: Canonical correlation analysis} \item{rpca: Reciprocal PCA} + \item{rlsi: Reciprocal LSI} }} \item{l2.norm}{Perform L2 normalization on the CCA cell embeddings after @@ -86,7 +87,7 @@ annoy} \item{n.trees}{More trees gives higher precision when using annoy approximate nearest neighbor search} -\item{eps}{Error bound on the neighbor finding algorithm (from RANN)} +\item{eps}{Error bound on the neighbor finding algorithm (from RANN/Annoy)} \item{verbose}{Print progress bars and output} } diff --git a/man/FindTransferAnchors.Rd b/man/FindTransferAnchors.Rd index 43e7ae9f2..398a55a03 100644 --- a/man/FindTransferAnchors.Rd +++ b/man/FindTransferAnchors.Rd @@ -16,6 +16,7 @@ FindTransferAnchors( reference.reduction = NULL, project.query = FALSE, features = NULL, + scale = TRUE, npcs = 30, l2.norm = TRUE, dims = 1:30, @@ -54,6 +55,14 @@ Options are: \itemize{ \item{pcaproject: Project the PCA from the reference onto the query. We recommend using PCA when reference and query datasets are from scRNA-seq} + \item{lsiproject: Project the LSI from the reference onto the query. We + recommend using LSI when reference and query datasets are from scATAC-seq. + This requires that LSI has been computed for the reference dataset, and the + same features (eg, peaks or genome bins) are present in both the reference + and query. See \code{\link[Signac]{RunTFIDF}} and + \code{\link[Signac]{RunSVD}}} + \item{rpca: Project the PCA from the reference onto the query, and the PCA + from the query onto the reference (reciprocal PCA projection).} \item{cca: Run a CCA on the reference and query } }} @@ -72,6 +81,8 @@ query object that are alos present in the reference.} set as variable features of the reference object which are also present in the query.} +\item{scale}{Scale query data.} + \item{npcs}{Number of PCs to compute on reference if reference.reduction is not provided.} @@ -98,7 +109,7 @@ annoy} nearest neighbor search} \item{eps}{Error bound on the neighbor finding algorithm (from -\code{\link{RANN}})} +\code{\link{RANN}} or \code{\link{RcppAnnoy}})} \item{approx.pca}{Use truncated singular value decomposition to approximate PCA} @@ -112,7 +123,13 @@ neighbor calculations to make the mapping score function more efficient.} } \value{ Returns an \code{AnchorSet} object that can be used as input to -\code{\link{TransferData}} +\code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}} and +\code{\link{MapQuery}}. The dimension reduction used for finding anchors is +stored in the \code{AnchorSet} object and can be used for computing anchor +weights in downstream functions. Note that only the requested dimensions are +stored in the dimension reduction object in the \code{AnchorSet}. This means +that if \code{dims=2:20} is used, for example, the dimension of the stored +reduction is \code{1:19}. } \description{ Find a set of anchors between a reference and query object. These @@ -133,8 +150,11 @@ description of the methodology, please see Stuart, Butler, et al Cell 2019. \code{project.query = TRUE}), using the \code{features} specified. The data from the other dataset is then projected onto this learned PCA structure. If \code{reduction = "cca"}, then CCA is performed on the reference and - query for this dimensional reduction step. If \code{l2.norm} is set to - \code{TRUE}, perform L2 normalization of the embedding vectors.} + query for this dimensional reduction step. If + \code{reduction = "lsiproject"}, the stored LSI dimension reduction in the + reference object is used to project the query dataset onto the reference. + If \code{l2.norm} is set to \code{TRUE}, perform L2 normalization of the + embedding vectors.} \item{Identify anchors between the reference and query - pairs of cells from each dataset that are contained within each other's neighborhoods (also known as mutual nearest neighbors).} diff --git a/man/MapQuery.Rd b/man/MapQuery.Rd index 57b1df83c..7ca0a68ce 100644 --- a/man/MapQuery.Rd +++ b/man/MapQuery.Rd @@ -11,6 +11,8 @@ MapQuery( refdata = NULL, new.reduction.name = NULL, reference.reduction = NULL, + reference.dims = NULL, + query.dims = NULL, reduction.model = NULL, transferdata.args = list(), integrateembeddings.args = list(), @@ -42,6 +44,10 @@ MapQuery( \item{reference.reduction}{Name of reduction to use from the reference for neighbor finding} +\item{reference.dims}{Dimensions (columns) to use from reference} + +\item{query.dims}{Dimensions (columns) to use from query} + \item{reduction.model}{\code{DimReduc} object that contains the umap model} \item{transferdata.args}{A named list of additional arguments to @@ -56,16 +62,24 @@ neighbor finding} \item{verbose}{Print progress bars and output} } \value{ -Returns a modified query Seurat object containing new Assays -corresponding to the features transferred and/or their corresponding prediction -scores from TransferData. An integrated reduction from IntegrateEmbeddings. -And an projected umap reduction of the query cells projected into the -reference umap. +Returns a modified query Seurat object containing: + +\itemize{ + \item{New Assays corresponding to the features transferred and/or their + corresponding prediction scores from \code{\link{TransferData}}} + \item{An integrated reduction from \code{\link{IntegrateEmbeddings}}} + \item{A projected UMAP reduction of the query cells projected into the + reference UMAP using \code{\link{ProjectUMAP}}} +} } \description{ This is a convenience wrapper function around the following three functions that are often run together when mapping query data to a reference: \code{\link{TransferData}}, \code{\link{IntegrateEmbeddings}}, -\code{\link{ProjectUMAP}}. +\code{\link{ProjectUMAP}}. Note that by default, the \code{weight.reduction} +parameter for all functions will be set to the dimension reduction method +used in the \code{\link{FindTransferAnchors}} function call used to construct +the anchor object, and the \code{dims} parameter will be the same dimensions +used to find anchors. } \concept{integration} diff --git a/man/MixscapeHeatmap.Rd b/man/MixscapeHeatmap.Rd index ca81071b3..fabae8b35 100644 --- a/man/MixscapeHeatmap.Rd +++ b/man/MixscapeHeatmap.Rd @@ -90,7 +90,8 @@ Increasing logfc.threshold speeds up the function, but can miss weaker signals.} \item{order.by.prob}{Order cells on heatmap based on their mixscape knockout probability from highest to lowest score.} -\item{group.by}{Regroup cells into a different identity class prior to performing differential expression (see example)} +\item{group.by}{(Deprecated) Option to split densities based on mixscape +classification. Please use mixscape.class instead} \item{mixscape.class}{metadata column with mixscape classifications.} diff --git a/man/PlotPerturbScore.Rd b/man/PlotPerturbScore.Rd index da338c33e..bdf62286e 100644 --- a/man/PlotPerturbScore.Rd +++ b/man/PlotPerturbScore.Rd @@ -6,25 +6,33 @@ \usage{ PlotPerturbScore( object, + target.gene.class = "gene", target.gene.ident = NULL, - group.by = "mixscape_class", + mixscape.class = "mixscape_class", col = "orange2", + split.by = NULL, + before.mixscape = FALSE, prtb.type = "KO" ) } \arguments{ \item{object}{An object of class Seurat.} -\item{target.gene.ident}{Class identity for cells sharing the same -perturbation. Name should be the same as the one used to run mixscape.} +\item{target.gene.class}{meta data column specifying all target gene names in the experiment.} -\item{group.by}{Option to split densities based on mixscape classification. -Default is set to NULL and plots cells by original class ID.} +\item{target.gene.ident}{Target gene name to visualize perturbation scores for.} + +\item{mixscape.class}{meta data column specifying mixscape classifications.} \item{col}{Specify color of target gene class or knockout cell class. For control non-targeting and non-perturbed cells, colors are set to different shades of grey.} +\item{split.by}{For datasets with more than one cell type. Set equal TRUE to visualize perturbation scores for each cell type separately.} + +\item{before.mixscape}{Option to split densities based on mixscape classification (default) or original target gene classification. +Default is set to NULL and plots cells by original class ID.} + \item{prtb.type}{specify type of CRISPR perturbation expected for labeling mixscape classifications. Default is KO.} } \value{ diff --git a/man/TransferData.Rd b/man/TransferData.Rd index d0f74eb3a..6bbe17fcb 100644 --- a/man/TransferData.Rd +++ b/man/TransferData.Rd @@ -46,6 +46,7 @@ TransferData( anchors. Options are: \itemize{ \item{pcaproject: Use the projected PCA used for anchor building} + \item{lsiproject: Use the projected LSI used for anchor building} \item{pca: Use an internal PCA on the query only} \item{cca: Use the CCA used for anchor building} \item{custom DimReduc: User provided \code{\link{DimReduc}} object @@ -55,7 +56,9 @@ anchors. Options are: \item{l2.norm}{Perform L2 normalization on the cell embeddings after dimensional reduction} -\item{dims}{Set of dimensions to use in the anchor weighting procedure} +\item{dims}{Set of dimensions to use in the anchor weighting procedure. If +NULL, the same dimensions that were used to find anchors will be used for +weighting.} \item{k.weight}{Number of neighbors to consider when weighting anchors} diff --git a/man/as.Seurat.Rd b/man/as.Seurat.Rd index 65afef659..58ffd4e64 100644 --- a/man/as.Seurat.Rd +++ b/man/as.Seurat.Rd @@ -11,7 +11,7 @@ x, counts = "counts", data = "logcounts", - assay = "RNA", + assay = NULL, project = "SingleCellExperiment", ... ) @@ -21,7 +21,7 @@ \item{slot}{Slot to store expression data as} -\item{assay}{Name to store expression matrices as} +\item{assay}{Name of assays to convert; set to \code{NULL} for all assays to be converted} \item{verbose}{Show progress updates} diff --git a/man/as.SingleCellExperiment.Rd b/man/as.SingleCellExperiment.Rd index 46bd4fafb..04dc908a9 100644 --- a/man/as.SingleCellExperiment.Rd +++ b/man/as.SingleCellExperiment.Rd @@ -14,7 +14,7 @@ as.SingleCellExperiment(x, ...) \item{...}{Arguments passed to other methods} -\item{assay}{Assay to convert} +\item{assay}{Assays to convert} } \description{ Convert objects to SingleCellExperiment objects diff --git a/tests/testthat/test_data_manipulation.R b/tests/testthat/test_data_manipulation.R index 6363c7003..17a715f4f 100644 --- a/tests/testthat/test_data_manipulation.R +++ b/tests/testthat/test_data_manipulation.R @@ -91,23 +91,23 @@ test_that("Fast implementation of row scaling returns expected values", { expect_true(max(mat.clipped, na.rm = T) >= 0.2) }) -mat <- as(object = matrix(rnorm(1000), nrow = 10, ncol = 10), Class = "dgCMatrix") +mat <- as(object = matrix(rnorm(100), nrow = 10, ncol = 10), Class = "dgCMatrix") test_that("Row scaling with known stats works", { mat.rowmeans <- rowMeans(x = mat) mat.sd <- apply(X = mat, MARGIN = 1, FUN = sd) expect_equal( - t(scale(t(as.matrix(mat)), center = mat.rowmeans, scale = mat.sd)), + t(scale(t(as.matrix(mat)), center = mat.rowmeans, scale = mat.sd)), FastSparseRowScaleWithKnownStats(mat = mat, mu = mat.rowmeans, sigma = mat.sd, scale = TRUE, center = TRUE, scale_max = 10, display_progress = FALSE), check.attributes = FALSE ) expect_equal( - t(scale(t(as.matrix(mat)), center = FALSE, scale = mat.sd)), + t(scale(t(as.matrix(mat)), center = FALSE, scale = mat.sd)), FastSparseRowScaleWithKnownStats(mat = mat, mu = mat.rowmeans, sigma = mat.sd, scale = TRUE, center = FALSE, scale_max = 10, display_progress = FALSE), check.attributes = FALSE ) expect_equal( - t(scale(t(as.matrix(mat)), center = mat.rowmeans, scale = FALSE)), + t(scale(t(as.matrix(mat)), center = mat.rowmeans, scale = FALSE)), FastSparseRowScaleWithKnownStats(mat = mat, mu = mat.rowmeans, sigma = mat.sd, scale = FALSE, center = TRUE, scale_max = 10, display_progress = FALSE), check.attributes = FALSE ) diff --git a/vignettes/mixscape_vignette.Rmd b/vignettes/mixscape_vignette.Rmd index 64566e00c..4edf7fe62 100644 --- a/vignettes/mixscape_vignette.Rmd +++ b/vignettes/mixscape_vignette.Rmd @@ -265,7 +265,7 @@ To ensure mixscape is assigning the correct perturbation status to cells we can # Explore the perturbation scores of cells. PlotPerturbScore(object = eccite, target.gene.ident = "IFNGR2", - group.by = "mixscape_class", + mixscape.class = "mixscape_class", col = "coral2") +labs(fill = "mixscape class") # Inspect the posterior probability values in NP and KO cells. diff --git a/vignettes/pbmc3k_tutorial.Rmd b/vignettes/pbmc3k_tutorial.Rmd index 4cd571e3a..bca5952de 100644 --- a/vignettes/pbmc3k_tutorial.Rmd +++ b/vignettes/pbmc3k_tutorial.Rmd @@ -302,9 +302,9 @@ Seurat can help you find markers that define clusters via differential expressio The `min.pct` argument requires a feature to be detected at a minimum percentage in either of the two groups of cells, and the thresh.test argument requires a feature to be differentially expressed (on average) by some amount between the two groups. You can set both of these to 0, but with a dramatic increase in time - since this will test a large number of features that are unlikely to be highly discriminatory. As another option to speed up these computations, `max.cells.per.ident` can be set. This will downsample each identity class to have no more cells than whatever this is set to. While there is generally going to be a loss in power, the speed increases can be significant and the most highly differentially expressed features will likely still rise to the top. ```{r markers1, fig.height=8, fig.width=15} -# find all markers of cluster 1 -cluster1.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25) -head(cluster1.markers, n = 5) +# find all markers of cluster 2 +cluster2.markers <- FindMarkers(pbmc, ident.1 = 2, min.pct = 0.25) +head(cluster2.markers, n = 5) # find all markers distinguishing cluster 5 from clusters 0 and 3 cluster5.markers <- FindMarkers(pbmc, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25) head(cluster5.markers, n = 5) @@ -316,7 +316,7 @@ pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) Seurat has several tests for differential expression which can be set with the test.use parameter (see our [DE vignette](de_vignette.html) for details). For example, the ROC test returns the 'classification power' for any individual marker (ranging from 0 - random, to 1 - perfect). ```{r markersroc, fig.height=8, fig.width=15} -cluster1.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE) +cluster0.markers <- FindMarkers(pbmc, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE) ``` We include several tools for visualizing marker expression. `VlnPlot()` (shows expression probability distributions across clusters), and `FeaturePlot()` (visualizes feature expression on a tSNE or PCA plot) are our most commonly used visualizations. We also suggest exploring `RidgePlot()`, `CellScatter()`, and `DotPlot()` as additional methods to view your dataset.