Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added hierarchical categories for expression data #37

Merged
merged 4 commits into from
Nov 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 17 additions & 50 deletions scripts/process_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,33 +198,13 @@ def process_collections(self, cmds_only, mode):

if mode == "jbrowse": # for jbrowse
if url.endswith("bw"):

# print("\nCollection_type:dsfile: \n: ",self.files[collection_type][dsfile])

bw_name = self.files[collection_type][dsfile].get(
"name", None
)
bw_id = bw_name.split(".")[-2:]
project_id = ".".join(bw_name.split(".")[1:-2])
cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --category expression,{project_id} --out {os.path.abspath(self.out_dir)} --force"

# cmd = f'jbrowse add-track {bw_name} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --load copy --force'
cmd = f"jbrowse add-track {url} --name {bw_id[0]} --assemblyNames {parent[0]} --out {os.path.abspath(self.out_dir)} --force"
#cmd = f"jbrowse add-track {self.from_github}/{genus}/{species}/expression/{project_id}/{bw_name} --name {bw_id[0]} --assemblyNames {parent[0]} --load copy --out {os.path.abspath(self.out_dir)} --force"
"""
dis_name = self.files[collection_type][dsfile].get(
"name", None
)
print("disname is: ", dis_name)
# path = (f'{self.from_github}/{genus}/{species}/expression/')
if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
print("Directory exists!")
for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):
# looks at for bigwigs of matching strain:
if fname.endswith(f'{dis_name}.bw'):
print("\nFound big wig: ", fname, "\n")
cmd = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --out {os.path.abspath(self.out_dir)} --force'
else: continue
"""
elif mode == "blast": # for blast
continue # Not blastable at the moment

Expand All @@ -238,23 +218,6 @@ def process_collections(self, cmds_only, mode):
cmd, shell=True, executable="/bin/bash"
): # execute cmd and check exit value = 0
logger.error(f"Non-zero exit value: {cmd}")
"""
if os.path.exists(f'{self.from_github}/{genus}/{species}/expression/'):
dis_name = self.files[collection_type][dsfile].get("name", None)
for fname in os.listdir(f'{self.from_github}/{genus}/{species}/expression/'):


if fname.endswith(f'{dis_name}.bw'):

print("\nBigwig file found: ", fname, "\n")
# captures display name to match with potential bigwig files

cmd2 = f'jbrowse add-track {self.from_github}/{genus}/{species}/expression/{fname} --assemblyNames {name} --load copy --out {os.path.abspath(self.out_dir)} --force'
if subprocess.check_call(
cmd2, shell=True, executable="/bin/bash"
):
logger.error(f'Couldn\'t add bigwig')
"""

def populate_jbrowse2(self, out_dir, cmds_only=False):
"""Populate jbrowse2 config object from collected objects"""
Expand Down Expand Up @@ -425,7 +388,7 @@ def add_collections(self, collection_type, genus, species):
]
}
linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}" # build the URL for the resource

linear_data = {
"name": f"JBrowse2 {lookup}",
"URL": str(linear_url).replace(
Expand Down Expand Up @@ -615,7 +578,7 @@ def add_collections(self, collection_type, genus, species):
) # add data for later writing in resources
###
elif collection_type == "expression": # add parent expr files
ref=""
ref = ""
# Synteny after the new changes. Parent is a tuple with both genome_main files
checksum_url = (
f"{self.datastore_url}{collection_dir}CHECKSUM.{parts[1]}.md5"
Expand Down Expand Up @@ -663,8 +626,8 @@ def add_collections(self, collection_type, genus, species):
}
logger.debug(self.files[collection_type][bw_lookup])

#url = f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz" # genome_main in datastore_url
url = self.files['genomes'][parent]['url']
# url = f"{self.datastore_url}{collection_dir}{parts[0]}.{parts[1]}.genome_main.fna.gz" # genome_main in datastore_url
url = self.files["genomes"][parent]["url"]
fai_url = f"{url}.fai" # get fai file for jbrowse session construction
fai_response = self.get_remote(
fai_url
Expand All @@ -678,16 +641,17 @@ def add_collections(self, collection_type, genus, species):
logger.error(f"No fai file for: {url}")
sys.exit(1)


linear_session = { # LinearGenomeView object for JBrowse2
"views": [
{
"assembly": parent,
#sequence is currently hardcoded, don't know how "ref" works for genomes
# sequence is currently hardcoded, don't know how "ref" works for genomes
"loc": f"{ref}:1-{stop}", # JBrowse2 does not allow null loc
"type": "LinearGenomeView",
"tracks": [".".join(bw_lookup.split(".")[:-1])]
#["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
"tracks": [
".".join(bw_lookup.split(".")[:-1])
],
# ["glyma.Wm82.gnm6.ann1.expr.mixed.Kour_Boone_2014.Clark_defective"]
# " gff3tabix_genes " ,
# " volvox_filtered_vcf " ,
# " volvox_microarray " ,
Expand All @@ -697,7 +661,7 @@ def add_collections(self, collection_type, genus, species):
]
}
linear_url = f"{self.jbrowse_url}/?config=config.json&session=spec-{linear_session}" # build the URL for the resource

linear_data = {
"name": f"JBrowse2 {parent}",
"URL": str(linear_url).replace(
Expand All @@ -710,9 +674,12 @@ def add_collections(self, collection_type, genus, species):
self.infraspecies_resources[strain_lookup] = (
[]
) # initialize infraspecies list within species
if self.jbrowse_url: # dont add data if no jbrowse url set
self.infraspecies_resources[strain_lookup].append(linear_data)

if (
self.jbrowse_url
): # dont add data if no jbrowse url set
self.infraspecies_resources[strain_lookup].append(
linear_data
)
logger.debug(f"linear data for bw: {linear_data} \n")
###

Expand Down
Loading