Skip to content

Commit

Permalink
Remove whitespace in bulk RNA expression (#337)
Browse files Browse the repository at this point in the history
* remove whitespace in bulk RNA annotation

* reformat

* add details of random whitespace issue
  • Loading branch information
wxicu authored Aug 11, 2023
1 parent bbdf99b commit 35e5c69
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion pertpy/tools/_metadata/_cell_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def __init__(self):
zip_file.extract("rnaseq_read_count_20220624.csv", path=settings.cachedir)

self.bulk_rna_sanger = pd.read_csv(bulk_rna_sanger_file_path, skiprows=[2, 3], header=[0, 1], index_col=[0, 1])

# issue: read count values contain random whitespace, not sure what it supposes to mean
# solution: remove the white space and convert to int before depmap updates the metadata
self.bulk_rna_sanger = self.bulk_rna_sanger.applymap(
lambda x: int(x.replace(" ", "")) if isinstance(x, str) else x
)
self.bulk_rna_sanger = self.bulk_rna_sanger.T
self.bulk_rna_sanger.index = self.bulk_rna_sanger.index.droplevel("model_id")
self.bulk_rna_sanger.columns = self.bulk_rna_sanger.columns.droplevel("gene_id")
Expand Down Expand Up @@ -396,7 +402,8 @@ def annotate_bulk_rna_expression(
sanger_rna_exp.index = adata.obs.index
adata.obsm["bulk_rna_expression_sanger"] = sanger_rna_exp
else:
ccle_expression = self.bulk_rna_broad.reindex(adata.obs[query_id])
broad_rna_exp = self.bulk_rna_broad[self.bulk_rna_broad.index.isin(adata.obs[query_id])]
ccle_expression = broad_rna_exp.reindex(adata.obs[query_id])
ccle_expression.index = adata.obs.index
adata.obsm["bulk_rna_expression_broad"] = ccle_expression

Expand Down

0 comments on commit 35e5c69

Please sign in to comment.