From 8c91b6dadda5b1738ee119c18fc7c050f8ec45f4 Mon Sep 17 00:00:00 2001
From: elavelle <elavelle@ncgr.org>
Date: Tue, 12 Nov 2024 09:12:32 -0700
Subject: [PATCH 1/4] added jbrowse2 hierarchy for bigwigs

---
 scripts/process_collections.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/process_collections.py b/scripts/process_collections.py
index 0937d36..9f72c5d 100644
--- a/scripts/process_collections.py
+++ b/scripts/process_collections.py
@@ -208,7 +208,7 @@ def process_collections(self, cmds_only, mode):
                             project_id = ".".join(bw_name.split(".")[1:-2])
 
                             # cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force'
-                            cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --out {os.path.abspath(self.out_dir)} --force"
+                            cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force"
                             #cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force"
                         """
                         dis_name = self.files[collection_type][dsfile].get(
@@ -705,14 +705,14 @@ def add_collections(self, collection_type, genus, species):
                                     ),  # url encode for .yml file and Jekyll linking
                                     "description": "JBrowse2 Linear Genome View",
                                 }  # the object that will be written into the .yml file
-
+                                print(f"linear data: {linear_data}")
                                 if strain_lookup not in self.infraspecies_resources:
                                     self.infraspecies_resources[strain_lookup] = (
                                         []
                                     )  # initialize infraspecies list within species
                                 if self.jbrowse_url:  # dont add data if no jbrowse url set
                                     self.infraspecies_resources[strain_lookup].append(linear_data)
-                    
+
                                 logger.debug(f"linear data for bw: {linear_data} \n")
             ###
 

From b0f69bcfb3362b46eff85e49c93666dfe65014bf Mon Sep 17 00:00:00 2001
From: elavelle <elavelle@ncgr.org>
Date: Tue, 12 Nov 2024 09:16:39 -0700
Subject: [PATCH 2/4] commented out a debug line

---
 scripts/process_collections.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/process_collections.py b/scripts/process_collections.py
index 9f72c5d..1c11eff 100644
--- a/scripts/process_collections.py
+++ b/scripts/process_collections.py
@@ -705,14 +705,13 @@ def add_collections(self, collection_type, genus, species):
                                     ),  # url encode for .yml file and Jekyll linking
                                     "description": "JBrowse2 Linear Genome View",
                                 }  # the object that will be written into the .yml file
-                                print(f"linear data: {linear_data}")
+                                # print(f"linear data: {linear_data}")
                                 if strain_lookup not in self.infraspecies_resources:
                                     self.infraspecies_resources[strain_lookup] = (
                                         []
                                     )  # initialize infraspecies list within species
                                 if self.jbrowse_url:  # dont add data if no jbrowse url set
                                     self.infraspecies_resources[strain_lookup].append(linear_data)
-
                                 logger.debug(f"linear data for bw: {linear_data} \n")
             ###
 

From 04c5e2642f8db29c8e4ce1c483470d7ec9df08f0 Mon Sep 17 00:00:00 2001
From: elavelle <elavelle@ncgr.org>
Date: Tue, 12 Nov 2024 11:02:09 -0700
Subject: [PATCH 3/4] added hierarchical categories #36

---
 scripts/process_collections.py | 41 ++--------------------------------
 1 file changed, 2 insertions(+), 39 deletions(-)

diff --git a/scripts/process_collections.py b/scripts/process_collections.py
index 1c11eff..2fd0611 100644
--- a/scripts/process_collections.py
+++ b/scripts/process_collections.py
@@ -198,33 +198,13 @@ def process_collections(self, cmds_only, mode):
 
                     if mode == "jbrowse":  # for jbrowse
                         if url.endswith("bw"):
-
-                            # print("\nCollection_type:dsfile: \n: ",self.files[collection_type][dsfile])
-
                             bw_name = self.files[collection_type][dsfile].get(
                                 "name", None
                             )
                             bw_id = bw_name.split(".")[-2:]
                             project_id = ".".join(bw_name.split(".")[1:-2])
-
-                            # cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force'
                             cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force"
-                            #cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force"
-                        """
-                        dis_name = self.files[collection_type][dsfile].get(
-                            "name", None
-                        )
-                        print("disname is: ", dis_name)
-                        # path = (f'{self.from_github}/{genus}/{species}/expression/')
-                        if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
-                            print("Directory exists!")
-                            for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
-                                # looks at for bigwigs of matching strain:
-                                if fname.endswith(f'{dis_name}.bw'):
-                                    print("\nFound big wig: ", fname, "\n")
-                                    cmd = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --force'
-                        else: continue
-                        """
+
                     elif mode == "blast":  # for blast
                         continue  # Not blastable at the moment
 
@@ -238,23 +218,6 @@ def process_collections(self, cmds_only, mode):
                     cmd, shell=True, executable="/bin/bash"
                 ):  # execute cmd and check exit value = 0
                     logger.error(f"Non-zero exit value: {cmd}")
-                """
-                if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
-                    dis_name = self.files[collection_type][dsfile].get("name", None)
-                    for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
-                        
-
-                        if fname.endswith(f'{dis_name}.bw'):
-
-                            print("\nBigwig file found: ", fname, "\n")
-                                # captures display name to match with potential bigwig files
-
-                            cmd2 = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --load copy --out {os.path.abspath(self.out_dir)} --force'
-                            if subprocess.check_call(
-                                cmd2, shell=True, executable="/bin/bash"
-                            ): 
-                                logger.error(f'Couldn\'t add bigwig')
-                """
 
     def populate_jbrowse2(self, out_dir, cmds_only=False):
         """Populate jbrowse2 config object from collected objects"""
@@ -705,7 +668,7 @@ def add_collections(self, collection_type, genus, species):
                                     ),  # url encode for .yml file and Jekyll linking
                                     "description": "JBrowse2 Linear Genome View",
                                 }  # the object that will be written into the .yml file
-                                # print(f"linear data: {linear_data}")
+
                                 if strain_lookup not in self.infraspecies_resources:
                                     self.infraspecies_resources[strain_lookup] = (
                                         []

From bdee230ab1369305beb3415e11726051573bfd6f Mon Sep 17 00:00:00 2001
From: elavelle <el239@nau.edu>
Date: Tue, 12 Nov 2024 11:09:54 -0700
Subject: [PATCH 4/4] format with black #36

---
 scripts/process_collections.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/scripts/process_collections.py b/scripts/process_collections.py
index 2fd0611..b19d54d 100644
--- a/scripts/process_collections.py
+++ b/scripts/process_collections.py
@@ -388,7 +388,7 @@ def add_collections(self, collection_type, genus, species):
                     ]
                 }
                 linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}"  # build the URL for the resource
-                
+
                 linear_data = {
                     "name": f"JBrowse2 {lookup}",
                     "URL": str(linear_url).replace(
@@ -578,7 +578,7 @@ def add_collections(self, collection_type, genus, species):
                                     )  # add data for later writing in resources
             ###
             elif collection_type == "expression":  # add parent expr files
-                ref=""
+                ref = ""
                 # Synteny after the new changes. Parent is a tuple with both genome_main files
                 checksum_url = (
                     f"{self.datastore_url}{collection_dir}CHECKSUM.{parts[1]}.md5"
@@ -626,8 +626,8 @@ def add_collections(self, collection_type, genus, species):
                                 }
                                 logger.debug(self.files[collection_type][bw_lookup])
 
-                                #url =  f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz"  # genome_main in datastore_url
-                                url = self.files['genomes'][parent]['url']
+                                # url =  f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz"  # genome_main in datastore_url
+                                url = self.files["genomes"][parent]["url"]
                                 fai_url = f"{url}.fai"  # get fai file for jbrowse session construction
                                 fai_response = self.get_remote(
                                     fai_url
@@ -641,16 +641,17 @@ def add_collections(self, collection_type, genus, species):
                                     logger.error(f"No fai file for: {url}")
                                     sys.exit(1)
 
-
                                 linear_session = {  # LinearGenomeView object for JBrowse2
                                     "views": [
                                         {
                                             "assembly": parent,
-                                            #sequence is currently hardcoded, don't know how "ref" works for genomes
+                                            # sequence is currently hardcoded, don't know how "ref" works for genomes
                                             "loc": f"{ref}:1-{stop}",  # JBrowse2 does not allow null loc
                                             "type": "LinearGenomeView",
-                                            "tracks": [".".join(bw_lookup.split(".")[:-1])]
-                                            #["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
+                                            "tracks": [
+                                                ".".join(bw_lookup.split(".")[:-1])
+                                            ],
+                                            # ["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
                                             #                                                " gff3tabix_genes " ,
                                             #                                                " volvox_filtered_vcf " ,
                                             #                                                " volvox_microarray " ,
@@ -660,7 +661,7 @@ def add_collections(self, collection_type, genus, species):
                                     ]
                                 }
                                 linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}"  # build the URL for the resource
-                                
+
                                 linear_data = {
                                     "name": f"JBrowse2 {parent}",
                                     "URL": str(linear_url).replace(
@@ -673,8 +674,12 @@ def add_collections(self, collection_type, genus, species):
                                     self.infraspecies_resources[strain_lookup] = (
                                         []
                                     )  # initialize infraspecies list within species
-                                if self.jbrowse_url:  # dont add data if no jbrowse url set
-                                    self.infraspecies_resources[strain_lookup].append(linear_data)
+                                if (
+                                    self.jbrowse_url
+                                ):  # dont add data if no jbrowse url set
+                                    self.infraspecies_resources[strain_lookup].append(
+                                        linear_data
+                                    )
                                 logger.debug(f"linear data for bw: {linear_data} \n")
             ###