From 7fa89d8814ff7ffa2f52522a941fbb605d767f89 Mon Sep 17 00:00:00 2001
From: Nick-Eagles <nick.eagles@libd.org>
Date: Tue, 21 Nov 2023 16:53:18 -0500
Subject: [PATCH] Use the full gene set. Also use the normal categorical
 display of pathology groups since a Samui bug was fixed. Most of #84

---
 code/16_samui/01-SPG_images.py | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/code/16_samui/01-SPG_images.py b/code/16_samui/01-SPG_images.py
index e01d4b57..62a3edd5 100644
--- a/code/16_samui/01-SPG_images.py
+++ b/code/16_samui/01-SPG_images.py
@@ -128,25 +128,10 @@
 #   any duplicated cases
 gene_df = gene_df.loc[: , ~gene_df.columns.duplicated()].copy()
 
-#   Samui seems to break when using > ~ 5,000 genes. Take just the genes where
-#   at least 10% of spots have nonzero counts
-gene_df = gene_df.loc[:, np.sum(gene_df > 0, axis = 0) > (gene_df.shape[0] * 0.1)].copy()
-
 assert default_gene in gene_df.columns, "Default gene not in AnnData"
 
 print('Using {} genes as features.'.format(gene_df.shape[1]))
 
-################################################################################
-#   Split 'path_groups' column into binary columns for each of its values
-################################################################################
-
-#   Circumvent a Samui bug (https://github.com/chaichontat/samui/issues/84);
-#   turn the categorical column 'path_groups' into several numeric columns with
-#   just values of 0 and 1
-path_df = pd.DataFrame()
-for path_group in path_groups:
-    path_df[path_group] = (spe.obs['path_groups'] == path_group).astype(int)
-
 ################################################################################
 #   Use the Samui API to create the importable directory for this sample
 ################################################################################
@@ -167,7 +152,7 @@
 )
 
 #   Add gene expression results (multiple columns) as a feature
-this_sample.add_csv_feature(
+this_sample.add_chunked_feature(
     gene_df, name = "Genes", coordName = "coords", dataType = "quantitative"
 )
 
@@ -178,9 +163,10 @@
 )
 
 #   Add pathology groups
+path_df = pd.DataFrame({'path_group': spe.obs['path_groups']})
 this_sample.add_csv_feature(
     path_df, name = "Pathology Group", coordName = "coords",
-    dataType = "quantitative"
+    dataType = "categorical"
 )
 
 this_sample.set_default_feature(group = "Genes", feature = default_gene)