From 8c91b6dadda5b1738ee119c18fc7c050f8ec45f4 Mon Sep 17 00:00:00 2001 From: elavelle Date: Tue, 12 Nov 2024 09:12:32 -0700 Subject: [PATCH 1/4] added jbrowse2 hierarchy for bigwigs --- scripts/process_collections.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/process_collections.py b/scripts/process_collections.py index 0937d36..9f72c5d 100644 --- a/scripts/process_collections.py +++ b/scripts/process_collections.py @@ -208,7 +208,7 @@ def process_collections(self, cmds_only, mode): project_id = ".".join(bw_name.split(".")[1:-2]) # cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force' - cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --out {os.path.abspath(self.out_dir)} --force" + cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force" #cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force" """ dis_name = self.files[collection_type][dsfile].get( @@ -705,14 +705,14 @@ def add_collections(self, collection_type, genus, species): ), # url encode for .yml file and Jekyll linking "description": "JBrowse2 Linear Genome View", } # the object that will be written into the .yml file - + print(f"linear data: {linear_data}") if strain_lookup not in self.infraspecies_resources: self.infraspecies_resources[strain_lookup] = ( [] ) # initialize infraspecies list within species if self.jbrowse_url: # dont add data if no jbrowse url set self.infraspecies_resources[strain_lookup].append(linear_data) - + logger.debug(f"linear data for bw: {linear_data} \n") ### From b0f69bcfb3362b46eff85e49c93666dfe65014bf Mon Sep 17 00:00:00 2001 From: elavelle Date: Tue, 12 Nov 2024 09:16:39 -0700 Subject: [PATCH 2/4] commented out a debug line --- scripts/process_collections.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/process_collections.py b/scripts/process_collections.py index 9f72c5d..1c11eff 100644 --- a/scripts/process_collections.py +++ b/scripts/process_collections.py @@ -705,14 +705,13 @@ def add_collections(self, collection_type, genus, species): ), # url encode for .yml file and Jekyll linking "description": "JBrowse2 Linear Genome View", } # the object that will be written into the .yml file - print(f"linear data: {linear_data}") + # print(f"linear data: {linear_data}") if strain_lookup not in self.infraspecies_resources: self.infraspecies_resources[strain_lookup] = ( [] ) # initialize infraspecies list within species if self.jbrowse_url: # dont add data if no jbrowse url set self.infraspecies_resources[strain_lookup].append(linear_data) - logger.debug(f"linear data for bw: {linear_data} \n") ### From 04c5e2642f8db29c8e4ce1c483470d7ec9df08f0 Mon Sep 17 00:00:00 2001 From: elavelle Date: Tue, 12 Nov 2024 11:02:09 -0700 Subject: [PATCH 3/4] added hierarchical categories #36 --- scripts/process_collections.py | 41 ++-------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/scripts/process_collections.py b/scripts/process_collections.py index 1c11eff..2fd0611 100644 --- a/scripts/process_collections.py +++ b/scripts/process_collections.py @@ -198,33 +198,13 @@ def process_collections(self, cmds_only, mode): if mode == "jbrowse": # for jbrowse if url.endswith("bw"): - - # print("\nCollection_type:dsfile: \n: ",self.files[collection_type][dsfile]) - bw_name = self.files[collection_type][dsfile].get( "name", None ) bw_id = bw_name.split(".")[-2:] project_id = ".".join(bw_name.split(".")[1:-2]) - - # cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force' cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force" - #cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force" - """ - dis_name = self.files[collection_type][dsfile].get( - "name", None - ) - print("disname is: ", dis_name) - # path = (f'{self.from_github}/{genus}/{species}/expression/') - if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'): - print("Directory exists!") - for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'): - # looks at for bigwigs of matching strain: - if fname.endswith(f'{dis_name}.bw'): - print("\nFound big wig: ", fname, "\n") - cmd = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --force' - else: continue - """ + elif mode == "blast": # for blast continue # Not blastable at the moment @@ -238,23 +218,6 @@ def process_collections(self, cmds_only, mode): cmd, shell=True, executable="/bin/bash" ): # execute cmd and check exit value = 0 logger.error(f"Non-zero exit value: {cmd}") - """ - if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'): - dis_name = self.files[collection_type][dsfile].get("name", None) - for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'): - - - if fname.endswith(f'{dis_name}.bw'): - - print("\nBigwig file found: ", fname, "\n") - # captures display name to match with potential bigwig files - - cmd2 = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --load copy --out {os.path.abspath(self.out_dir)} --force' - if subprocess.check_call( - cmd2, shell=True, executable="/bin/bash" - ): - logger.error(f'Couldn\'t add bigwig') - """ def populate_jbrowse2(self, out_dir, cmds_only=False): """Populate jbrowse2 config object from collected objects""" @@ -705,7 +668,7 @@ def add_collections(self, collection_type, genus, species): ), # url encode for .yml file and Jekyll linking "description": "JBrowse2 Linear Genome View", } # the object that will be written into the .yml file - # print(f"linear data: {linear_data}") + if strain_lookup not in self.infraspecies_resources: self.infraspecies_resources[strain_lookup] = ( [] From bdee230ab1369305beb3415e11726051573bfd6f Mon Sep 17 00:00:00 2001 From: elavelle Date: Tue, 12 Nov 2024 11:09:54 -0700 Subject: [PATCH 4/4] format with black #36 --- scripts/process_collections.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/scripts/process_collections.py b/scripts/process_collections.py index 2fd0611..b19d54d 100644 --- a/scripts/process_collections.py +++ b/scripts/process_collections.py @@ -388,7 +388,7 @@ def add_collections(self, collection_type, genus, species): ] } linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}" # build the URL for the resource - + linear_data = { "name": f"JBrowse2 {lookup}", "URL": str(linear_url).replace( @@ -578,7 +578,7 @@ def add_collections(self, collection_type, genus, species): ) # add data for later writing in resources ### elif collection_type == "expression": # add parent expr files - ref="" + ref = "" # Synteny after the new changes. Parent is a tuple with both genome_main files checksum_url = ( f"{self.datastore_url}{collection_dir}CHECKSUM.{parts[1]}.md5" @@ -626,8 +626,8 @@ def add_collections(self, collection_type, genus, species): } logger.debug(self.files[collection_type][bw_lookup]) - #url = f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz" # genome_main in datastore_url - url = self.files['genomes'][parent]['url'] + # url = f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz" # genome_main in datastore_url + url = self.files["genomes"][parent]["url"] fai_url = f"{url}.fai" # get fai file for jbrowse session construction fai_response = self.get_remote( fai_url @@ -641,16 +641,17 @@ def add_collections(self, collection_type, genus, species): logger.error(f"No fai file for: {url}") sys.exit(1) - linear_session = { # LinearGenomeView object for JBrowse2 "views": [ { "assembly": parent, - #sequence is currently hardcoded, don't know how "ref" works for genomes + # sequence is currently hardcoded, don't know how "ref" works for genomes "loc": f"{ref}:1-{stop}", # JBrowse2 does not allow null loc "type": "LinearGenomeView", - "tracks": [".".join(bw_lookup.split(".")[:-1])] - #["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"] + "tracks": [ + ".".join(bw_lookup.split(".")[:-1]) + ], + # ["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"] # " gff3tabix_genes " , # " volvox_filtered_vcf " , # " volvox_microarray " , @@ -660,7 +661,7 @@ def add_collections(self, collection_type, genus, species): ] } linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}" # build the URL for the resource - + linear_data = { "name": f"JBrowse2 {parent}", "URL": str(linear_url).replace( @@ -673,8 +674,12 @@ def add_collections(self, collection_type, genus, species): self.infraspecies_resources[strain_lookup] = ( [] ) # initialize infraspecies list within species - if self.jbrowse_url: # dont add data if no jbrowse url set - self.infraspecies_resources[strain_lookup].append(linear_data) + if ( + self.jbrowse_url + ): # dont add data if no jbrowse url set + self.infraspecies_resources[strain_lookup].append( + linear_data + ) logger.debug(f"linear data for bw: {linear_data} \n") ###