Skip to content

Commit

Permalink
Merge pull request #37 from legumeinfo/develop
Browse files Browse the repository at this point in the history
added hierarchical categories for expression data
  • Loading branch information
ctcncgr authored Nov 12, 2024
2 parents 9c3b2e0 + bdee230 commit 090e92f
Showing 1 changed file with 17 additions and 50 deletions.
67 changes: 17 additions & 50 deletions scripts/process_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,33 +198,13 @@ def process_collections(self, cmds_only, mode):

if mode == "jbrowse": # for jbrowse
if url.endswith("bw"):

# print("\nCollection_type:dsfile: \n: ",self.files[collection_type][dsfile])

bw_name = self.files[collection_type][dsfile].get(
"name", None
)
bw_id = bw_name.split(".")[-2:]
project_id = ".".join(bw_name.split(".")[1:-2])
cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force"

# cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force'
cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --out {os.path.abspath(self.out_dir)} --force"
#cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force"
"""
dis_name = self.files[collection_type][dsfile].get(
"name", None
)
print("disname is: ", dis_name)
# path = (f'{self.from_github}/{genus}/{species}/expression/')
if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
print("Directory exists!")
for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
# looks at for bigwigs of matching strain:
if fname.endswith(f'{dis_name}.bw'):
print("\nFound big wig: ", fname, "\n")
cmd = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --force'
else: continue
"""
elif mode == "blast": # for blast
continue # Not blastable at the moment

Expand All @@ -238,23 +218,6 @@ def process_collections(self, cmds_only, mode):
cmd, shell=True, executable="/bin/bash"
): # execute cmd and check exit value = 0
logger.error(f"Non-zero exit value: {cmd}")
"""
if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
dis_name = self.files[collection_type][dsfile].get("name", None)
for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
if fname.endswith(f'{dis_name}.bw'):
print("\nBigwig file found: ", fname, "\n")
# captures display name to match with potential bigwig files
cmd2 = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --load copy --out {os.path.abspath(self.out_dir)} --force'
if subprocess.check_call(
cmd2, shell=True, executable="/bin/bash"
):
logger.error(f'Couldn\'t add bigwig')
"""

def populate_jbrowse2(self, out_dir, cmds_only=False):
"""Populate jbrowse2 config object from collected objects"""
Expand Down Expand Up @@ -425,7 +388,7 @@ def add_collections(self, collection_type, genus, species):
]
}
linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}" # build the URL for the resource

linear_data = {
"name": f"JBrowse2 {lookup}",
"URL": str(linear_url).replace(
Expand Down Expand Up @@ -615,7 +578,7 @@ def add_collections(self, collection_type, genus, species):
) # add data for later writing in resources
###
elif collection_type == "expression": # add parent expr files
ref=""
ref = ""
# Synteny after the new changes. Parent is a tuple with both genome_main files
checksum_url = (
f"{self.datastore_url}{collection_dir}CHECKSUM.{parts[1]}.md5"
Expand Down Expand Up @@ -663,8 +626,8 @@ def add_collections(self, collection_type, genus, species):
}
logger.debug(self.files[collection_type][bw_lookup])

#url = f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz" # genome_main in datastore_url
url = self.files['genomes'][parent]['url']
# url = f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz" # genome_main in datastore_url
url = self.files["genomes"][parent]["url"]
fai_url = f"{url}.fai" # get fai file for jbrowse session construction
fai_response = self.get_remote(
fai_url
Expand All @@ -678,16 +641,17 @@ def add_collections(self, collection_type, genus, species):
logger.error(f"No fai file for: {url}")
sys.exit(1)


linear_session = { # LinearGenomeView object for JBrowse2
"views": [
{
"assembly": parent,
#sequence is currently hardcoded, don't know how "ref" works for genomes
# sequence is currently hardcoded, don't know how "ref" works for genomes
"loc": f"{ref}:1-{stop}", # JBrowse2 does not allow null loc
"type": "LinearGenomeView",
"tracks": [".".join(bw_lookup.split(".")[:-1])]
#["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
"tracks": [
".".join(bw_lookup.split(".")[:-1])
],
# ["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
# " gff3tabix_genes " ,
# " volvox_filtered_vcf " ,
# " volvox_microarray " ,
Expand All @@ -697,7 +661,7 @@ def add_collections(self, collection_type, genus, species):
]
}
linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}" # build the URL for the resource

linear_data = {
"name": f"JBrowse2 {parent}",
"URL": str(linear_url).replace(
Expand All @@ -710,9 +674,12 @@ def add_collections(self, collection_type, genus, species):
self.infraspecies_resources[strain_lookup] = (
[]
) # initialize infraspecies list within species
if self.jbrowse_url: # dont add data if no jbrowse url set
self.infraspecies_resources[strain_lookup].append(linear_data)

if (
self.jbrowse_url
): # dont add data if no jbrowse url set
self.infraspecies_resources[strain_lookup].append(
linear_data
)
logger.debug(f"linear data for bw: {linear_data} \n")
###

Expand Down

0 comments on commit 090e92f

Please sign in to comment.