Skip to content

Commit

Permalink
Merge pull request #121 from geneontology/noctua-issue-902b
Browse files Browse the repository at this point in the history
Use functional syntax instead of OBO
  • Loading branch information
kltm authored Oct 25, 2024
2 parents 43300c6 + c7d4eb7 commit 88d583a
Show file tree
Hide file tree
Showing 8 changed files with 318 additions and 281 deletions.
47 changes: 20 additions & 27 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
OBO = http://purl.obolibrary.org/obo

all: target all_obo neo.obo neo.owl
all: target all_ofn neo.obo neo.owl

clean:
rm trigger datasets.json mirror/*gz mirror/*tmp target/*.obo || echo "not all files present, perhaps last build did not complete"

TEST_SRCS ?= sgd pombase
SRCS ?= sgd pombase mgi zfin rgd dictybase fb tair wb goa_human goa_human_complex goa_human_rna goa_human_isoform goa_pig xenbase pseudocap ecocyc goa_sars-cov-2 uniprot_reviewed
ROBOT_ENV = ROBOT_JAVA_ARGS=-Xmx12G
ROBOT = $(ROBOT_ENV) robot

OBO_SRCS = $(patsubst %,target/neo-%.obo,$(SRCS))
all_obo: $(OBO_SRCS)
test_obo: target $(patsubst %,target/neo-%.obo,$(TEST_SRCS))
OFN_SRCS = $(patsubst %,target/neo-%.ofn,$(SRCS))
all_ofn: $(OFN_SRCS)
test_ofn: target $(patsubst %,target/neo-%.ofn,$(TEST_SRCS))

#test: touch_trigger test_obo
test:
Expand All @@ -23,8 +25,8 @@ trigger:
touch $@

IMPORTS = imports/pr_import.obo
neo.obo: $(OBO_SRCS) $(IMPORTS)
owltools --create-ontology http://purl.obolibrary.org/obo/go/noctua/neo.owl $^ --merge-support-ontologies -o -f obo $@.tmp && grep -v ^owl-axioms $@.tmp > $@
neo.owl: $(OFN_SRCS) $(IMPORTS)
$(ROBOT) merge $(addprefix -i ,$^) annotate --ontology-iri 'http://purl.obolibrary.org/obo/go/noctua/neo.owl' convert -f owl -o $@.tmp && mv $@.tmp $@

## datasets.json is created as a throwaway in the NEO versions of the
## pipeline and is based on the go-site master data.
Expand All @@ -41,8 +43,8 @@ foo:
# BUG: temporary hardcode until https://github.com/geneontology/go-site/issues/1431 is resolved and stable GPI URL is established
mirror/goa_sars-cov-2.gpi.gz:
wget --no-check-certificate https://raw.githubusercontent.com/Knowledge-Graph-Hub/kg-covid-19/master/curated/ORFs/uniprot_sars-cov-2.gpi -O mirror/goa_sars-cov-2.gpi && gzip mirror/goa_sars-cov-2.gpi
target/neo-goa_sars-cov-2.obo: mirror/goa_sars-cov-2.gpi.gz
gzip -dc $< | ./gpi2obo.pl -s Scov2 -n sars-cov-2 > $@.tmp && mv $@.tmp $@
target/neo-goa_sars-cov-2.ofn: mirror/goa_sars-cov-2.gpi.gz
gzip -dc $< | ./gpi2ofn.pl -s Scov2 -n sars-cov-2 > $@.tmp && mv $@.tmp $@

# ## In support of including viruses and bacteria
# ## (https://github.com/geneontology/neo/issues/77).
Expand All @@ -64,8 +66,8 @@ mirror/uniprot_reviewed.gpi.gz: datasets.json
perl filter.pl -v --metadata datasets.json --filter filter_list.txt --input mirror/uniprot_reviewed.gpi.tmp > mirror/filtered_uniprot_reviewed.gpi.tmp
gzip -c mirror/filtered_uniprot_reviewed.gpi.tmp > mirror/filtered_uniprot_reviewed.gpi.gz.tmp
mv mirror/filtered_uniprot_reviewed.gpi.gz.tmp mirror/uniprot_reviewed.gpi.gz
target/neo-uniprot_reviewed.obo: mirror/uniprot_reviewed.gpi.gz
gzip -dc $< | ./gpi2obo.pl -F -n reviewed > $@.tmp && mv $@.tmp $@
target/neo-uniprot_reviewed.ofn: mirror/uniprot_reviewed.gpi.gz
gzip -dc $< | ./gpi2ofn.pl -F -n reviewed > $@.tmp && mv $@.tmp $@

# Sub-makefile
#
Expand All @@ -75,20 +77,13 @@ target/neo-uniprot_reviewed.obo: mirror/uniprot_reviewed.gpi.gz
# see below for regenerating this
include Makefile-gafs

# This is very hacky:
# - The neo solr index has an ID field (which is a CURIE), but no URI
# - Minerva requires OWL which uses URIs
# The neo solr index has an ID field (which is a CURIE), but no URI
# Minerva requires OWL which uses URIs
#
# When loading solr, owltools will use the oboInOwl:id field as priority to load the ID field (see https://github.com/owlcollab/owltools/pull/247)
# Otherwise, the owltools built-in URI contraction method is used, which assumes OBO purls, with unpredictable behavior non-OBO PURLs
#
# Neo entities are NOT OBO ontologies, so they have a mix of prefixes, including identifiers.org
#
# Our hack is as follows. The perl code first generates an OBO file with CURIEs like FlyBase:FBgn111
# The default owltools expansion makes this an OBO PURLs
# We then "reverse" this with some hacky regexes...
neo.owl: neo.obo
owltools $< -o $@.tmp && ./bin/fix-obo-uris.pl $@.tmp > $@.tmp2 && mv $@.tmp2 $@
neo.obo: neo.owl
$(ROBOT) convert -i $< -o $@.tmp -f obo && grep -v ^owl-axioms $@.tmp >$@

Makefile-gafs: datasets.json
./build-neo-makefile.py -i $< > $@.tmp && mv $@.tmp $@
Expand All @@ -107,10 +102,8 @@ rnacentral.gpi.gz:
rnacentral.gpi: rnacentral.gpi.gz
gzip -dc $< > $@

target/neo-rnac.obo: rnacentral.gpi.gz
gzip -dc $< | ./rnacgpi2obo.pl > $@.tmp && mv $@.tmp $@
target/neo-rnac.ofn: rnacentral.gpi.gz
gzip -dc $< | ./rnacgpi2ofn.pl > $@.tmp && mv $@.tmp $@

target/xneo-%.owl: target/neo-%.obo
owltools $< -o $@.tmp && mv $@.tmp $@
target/neo-%.owl: target/xneo-%.owl
./bin/fix-obo-uris.pl $< > $@.tmp && mv $@.tmp $@
target/neo-%.owl: target/neo-%.ofn
$(ROBOT) convert -i $< -o $@.tmp -f owl && mv $@.tmp $@
Loading

0 comments on commit 88d583a

Please sign in to comment.