Merge pull request #37 from legumeinfo/develop

added hierarchical categories for expression data
legumeinfo · Nov 12, 2024 · 090e92f · 090e92f
2 parents 9c3b2e0 + bdee230
commit 090e92f
Showing 1 changed file with 17 additions and 50 deletions.
diff --git a/scripts/process_collections.py b/scripts/process_collections.py
@@ -198,33 +198,13 @@ def process_collections(self, cmds_only, mode):
 
                     if mode == "jbrowse":  # for jbrowse
                         if url.endswith("bw"):
-
-                            # print("\nCollection_type:dsfile: \n: ",self.files[collection_type][dsfile])
-
                             bw_name = self.files[collection_type][dsfile].get(
                                 "name", None
                             )
                             bw_id = bw_name.split(".")[-2:]
                             project_id = ".".join(bw_name.split(".")[1:-2])
+                            cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force"
 
-                            # cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force'
-                            cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --out {os.path.abspath(self.out_dir)} --force"
-                            #cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force"
-                        """
-                        dis_name = self.files[collection_type][dsfile].get(
-                            "name", None
-                        )
-                        print("disname is: ", dis_name)
-                        # path = (f'{self.from_github}/{genus}/{species}/expression/')
-                        if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
-                            print("Directory exists!")
-                            for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
-                                # looks at for bigwigs of matching strain:
-                                if fname.endswith(f'{dis_name}.bw'):
-                                    print("\nFound big wig: ", fname, "\n")
-                                    cmd = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --force'
-                        else: continue
-                        """
                     elif mode == "blast":  # for blast
                         continue  # Not blastable at the moment
 
@@ -238,23 +218,6 @@ def process_collections(self, cmds_only, mode):
                     cmd, shell=True, executable="/bin/bash"
                 ):  # execute cmd and check exit value = 0
                     logger.error(f"Non-zero exit value: {cmd}")
-                """
-                if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
-                    dis_name = self.files[collection_type][dsfile].get("name", None)
-                    for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
-                        
-
-                        if fname.endswith(f'{dis_name}.bw'):
-
-                            print("\nBigwig file found: ", fname, "\n")
-                                # captures display name to match with potential bigwig files
-
-                            cmd2 = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --load copy --out {os.path.abspath(self.out_dir)} --force'
-                            if subprocess.check_call(
-                                cmd2, shell=True, executable="/bin/bash"
-                            ): 
-                                logger.error(f'Couldn\'t add bigwig')
-                """
 
     def populate_jbrowse2(self, out_dir, cmds_only=False):
         """Populate jbrowse2 config object from collected objects"""
@@ -425,7 +388,7 @@ def add_collections(self, collection_type, genus, species):
                     ]
                 }
                 linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}"  # build the URL for the resource
-                
+
                 linear_data = {
                     "name": f"JBrowse2 {lookup}",
                     "URL": str(linear_url).replace(
@@ -615,7 +578,7 @@ def add_collections(self, collection_type, genus, species):
                                     )  # add data for later writing in resources
             ###
             elif collection_type == "expression":  # add parent expr files
-                ref=""
+                ref = ""
                 # Synteny after the new changes. Parent is a tuple with both genome_main files
                 checksum_url = (
                     f"{self.datastore_url}{collection_dir}CHECKSUM.{parts[1]}.md5"
@@ -663,8 +626,8 @@ def add_collections(self, collection_type, genus, species):
                                 }
                                 logger.debug(self.files[collection_type][bw_lookup])
 
-                                #url =  f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz"  # genome_main in datastore_url
-                                url = self.files['genomes'][parent]['url']
+                                # url =  f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz"  # genome_main in datastore_url
+                                url = self.files["genomes"][parent]["url"]
                                 fai_url = f"{url}.fai"  # get fai file for jbrowse session construction
                                 fai_response = self.get_remote(
                                     fai_url
@@ -678,16 +641,17 @@ def add_collections(self, collection_type, genus, species):
                                     logger.error(f"No fai file for: {url}")
                                     sys.exit(1)
 
-
                                 linear_session = {  # LinearGenomeView object for JBrowse2
                                     "views": [
                                         {
                                             "assembly": parent,
-                                            #sequence is currently hardcoded, don't know how "ref" works for genomes
+                                            # sequence is currently hardcoded, don't know how "ref" works for genomes
                                             "loc": f"{ref}:1-{stop}",  # JBrowse2 does not allow null loc
                                             "type": "LinearGenomeView",
-                                            "tracks": [".".join(bw_lookup.split(".")[:-1])]
-                                            #["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
+                                            "tracks": [
+                                                ".".join(bw_lookup.split(".")[:-1])
+                                            ],
+                                            # ["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
                                             #                                                " gff3tabix_genes " ,
                                             #                                                " volvox_filtered_vcf " ,
                                             #                                                " volvox_microarray " ,
@@ -697,7 +661,7 @@ def add_collections(self, collection_type, genus, species):
                                     ]
                                 }
                                 linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}"  # build the URL for the resource
-                                
+
                                 linear_data = {
                                     "name": f"JBrowse2 {parent}",
                                     "URL": str(linear_url).replace(
@@ -710,9 +674,12 @@ def add_collections(self, collection_type, genus, species):
                                     self.infraspecies_resources[strain_lookup] = (
                                         []
                                     )  # initialize infraspecies list within species
-                                if self.jbrowse_url:  # dont add data if no jbrowse url set
-                                    self.infraspecies_resources[strain_lookup].append(linear_data)
-
+                                if (
+                                    self.jbrowse_url
+                                ):  # dont add data if no jbrowse url set
+                                    self.infraspecies_resources[strain_lookup].append(
+                                        linear_data
+                                    )
                                 logger.debug(f"linear data for bw: {linear_data} \n")
             ###