diff --git a/docker/gremlin-gaffer/.env b/docker/gremlin-gaffer/.env new file mode 100644 index 00000000..b9a28848 --- /dev/null +++ b/docker/gremlin-gaffer/.env @@ -0,0 +1,7 @@ +ZOOKEEPER_VERSION=3.7.1 +GAFFER_VERSION=2.0.0 +GREMLIN_VERSION=3.6.4 +ACCUMULO_VERSION=2.0.1 +HADOOP_VERSION=3.3.3 +ACCUMULO_CONF_DIR=/etc/accumulo/conf +HADOOP_CONF_DIR=/etc/hadoop/conf diff --git a/docker/gremlin-gaffer/Dockerfile b/docker/gremlin-gaffer/Dockerfile new file mode 100644 index 00000000..ffb80399 --- /dev/null +++ b/docker/gremlin-gaffer/Dockerfile @@ -0,0 +1,51 @@ +# Copyright 2023 Crown Copyright +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG BUILDER_IMAGE_NAME=maven +ARG BUILDER_IMAGE_TAG=3.8.4-jdk-8 + +ARG BASE_IMAGE_NAME=tinkerpop/gremlin-server +ARG BASE_IMAGE_TAG=3.6.4 + +FROM ${BUILDER_IMAGE_NAME}:${BUILDER_IMAGE_TAG} as builder + +ARG BASE_IMAGE_TAG +ARG GAFFER_VERSION=2.0.0 +ARG GAFFER_DOWNLOAD_URL=https://repo1.maven.org/maven2 +ARG GAFFER_GIT_REPO=https://github.com/gchq/Gaffer.git + +WORKDIR /jars + +# Allow users to provide their own JAR files +COPY ./files/ . +# Try to download required version from Maven Central, otherwise build from source +RUN allFilesDownloaded="TRUE" && \ + if [ ! -f "./tinkerpop-${GAFFER_VERSION}-jar-with-dependencies.jar" ] && [ "${allFilesDownloaded}" = "TRUE" ]; then \ + wget -nv "${GAFFER_DOWNLOAD_URL}/uk/gov/gchq/gaffer/tinkerpop/${GAFFER_VERSION}/tinkerpop-${GAFFER_VERSION}-jar-with-dependencies.jar" || allFilesDownloaded="FALSE"; \ + fi && \ + if [ "${allFilesDownloaded}" = "FALSE" ]; then \ + git clone ${GAFFER_GIT_REPO} /tmp/gaffer && \ + cd /tmp/gaffer && \ + git checkout ${GAFFER_VERSION} || git checkout gaffer2-${GAFFER_VERSION} && \ + mvn clean package -Pquick -pl :tinkerpop && \ + if [ ! -f "/jars/tinkerpop-${GAFFER_VERSION}-jar-with-dependencies.jar" ]; then \ + cp ./library/tinkerpop/target/tinkerpop-${GAFFER_VERSION}-jar-with-dependencies.jar /jars; \ + fi \ + fi + +FROM ${BASE_IMAGE_NAME}:${BASE_IMAGE_TAG} +COPY --from=builder --chown=root:root /jars/*.jar ext/gafferpop/plugin/ +COPY ./conf/gafferpop/ conf/gafferpop/ +COPY ./conf/gremlin-server-empty-gaffer.yaml conf/gremlin-server-empty-gaffer.yaml +CMD ["conf/gremlin-server-empty-gaffer.yaml"] diff --git a/docker/gremlin-gaffer/README.md b/docker/gremlin-gaffer/README.md new file mode 100644 index 00000000..018e76d6 --- /dev/null +++ b/docker/gremlin-gaffer/README.md @@ -0,0 +1,39 @@ +Gremlin Gaffer Plugin +====== +In this folder you can find the required files for building and running a gremlin-server with the Gaffer plugin loaded. + +The Docker image uses TinkerPop's gremlin-server with GafferPop config and plugin jars added in. +When run with docker compose it will provide you a full accumulo ecosystem complete with [hdfs](../hdfs) and the [Gaffer REST API](../gaffer-rest). + +# Running Locally +The easiest way to build and run these services is to use docker compose, by running the following from this directory: +```bash +docker compose up +``` + +## Example Notebook +See `gremlin-gaffer-modern-example.ipynb` for an example using the "TinkerPop Modern" demo graph. + +## Customising the build +Custom Gaffer TinkerPop plugin jars can be added in the files directory. The Gaffer schema, store properties and gafferpop properties can be found in `conf/gafferpop` and are customised in a docker compose build using volumes. The `gremlin-server-empty-gaffer.yaml` cannot be overwritten in a volume, it must be built into the image. + +## Containers that are started: +* Zookeeper +* HDFS + * Datanode + * Namenode +* Accumulo + * Monitor + * GC + * tserver + * Master +* Gaffer REST +* Gremlin Server with GafferPop + +Access the HDFS NameNode web UI at: http://localhost:9870 + +Access the Accumulo Monitor UI at: http://localhost:9995 + +Access the Gaffer REST API at: http://localhost:8080/rest/ + +Access the Gremlin Server with GafferPop at: http://localhost:8182/ diff --git a/docker/gremlin-gaffer/conf/gafferpop/gaffer/schema/elements.json b/docker/gremlin-gaffer/conf/gafferpop/gaffer/schema/elements.json new file mode 100644 index 00000000..7508117d --- /dev/null +++ b/docker/gremlin-gaffer/conf/gafferpop/gaffer/schema/elements.json @@ -0,0 +1,52 @@ +{ + "entities": { + "person": { + "vertex": "person", + "properties": { + "name": "name.string", + "age": "age.integer" + }, + "aggregate": false + }, + "software": { + "vertex": "software", + "properties": { + "name": "name.string", + "lang": "name.string" + }, + "aggregate": false + } + }, + "edges": { + "knows": { + "source": "person", + "destination": "person", + "directed": "true", + "properties": { + "weight": "weight.double" + }, + "aggregate": false + }, + "created": { + "source": "person", + "destination": "software", + "directed": "true", + "properties": { + "weight": "weight.double" + }, + "aggregate": false + }, + "dependsOn": { + "source": "software", + "destination": "software", + "directed": "true", + "aggregate": false + }, + "encapsulates": { + "source": "software", + "destination": "software", + "directed": "true", + "aggregate": false + } + } +} diff --git a/docker/gremlin-gaffer/conf/gafferpop/gaffer/schema/types.json b/docker/gremlin-gaffer/conf/gafferpop/gaffer/schema/types.json new file mode 100644 index 00000000..95784312 --- /dev/null +++ b/docker/gremlin-gaffer/conf/gafferpop/gaffer/schema/types.json @@ -0,0 +1,22 @@ +{ + "types": { + "person": { + "class": "java.lang.String" + }, + "software": { + "class": "java.lang.String" + }, + "true": { + "class": "java.lang.Boolean" + }, + "name.string": { + "class": "java.lang.String" + }, + "age.integer": { + "class": "java.lang.Integer" + }, + "weight.double": { + "class": "java.lang.Double" + } + } +} diff --git a/docker/gremlin-gaffer/conf/gafferpop/gaffer/store.properties b/docker/gremlin-gaffer/conf/gafferpop/gaffer/store.properties new file mode 100644 index 00000000..efb5b466 --- /dev/null +++ b/docker/gremlin-gaffer/conf/gafferpop/gaffer/store.properties @@ -0,0 +1,24 @@ +# +# Copyright 2016-2023 Crown Copyright +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +gaffer.store.class=uk.gov.gchq.gaffer.accumulostore.AccumuloStore +gaffer.store.properties.class=uk.gov.gchq.gaffer.accumulostore.AccumuloProperties +accumulo.instance=accumulo +accumulo.zookeepers=zookeeper +accumulo.user=root +accumulo.password=secret +# General store config +gaffer.cache.service.class=uk.gov.gchq.gaffer.cache.impl.HashMapCacheService +gaffer.store.job.tracker.enabled=true diff --git a/docker/gremlin-gaffer/conf/gafferpop/gafferpop-tinkerpop-modern.properties b/docker/gremlin-gaffer/conf/gafferpop/gafferpop-tinkerpop-modern.properties new file mode 100644 index 00000000..2bb1fbfb --- /dev/null +++ b/docker/gremlin-gaffer/conf/gafferpop/gafferpop-tinkerpop-modern.properties @@ -0,0 +1,20 @@ +# +# Copyright 2016-2023 Crown Copyright +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +gremlin.graph=uk.gov.gchq.gaffer.tinkerpop.GafferPopGraph +gaffer.graphId=graph1 +gaffer.storeproperties=conf/gafferpop/gaffer/store.properties +gaffer.schemas=conf/gafferpop/gaffer/schema/ +gaffer.userId=user01 diff --git a/docker/gremlin-gaffer/conf/graphConfig.json b/docker/gremlin-gaffer/conf/graphConfig.json new file mode 100644 index 00000000..57f97103 --- /dev/null +++ b/docker/gremlin-gaffer/conf/graphConfig.json @@ -0,0 +1,3 @@ +{ + "graphId": "graph1" +} \ No newline at end of file diff --git a/docker/gremlin-gaffer/conf/gremlin-server-empty-gaffer.yaml b/docker/gremlin-gaffer/conf/gremlin-server-empty-gaffer.yaml new file mode 100644 index 00000000..0fae7ec1 --- /dev/null +++ b/docker/gremlin-gaffer/conf/gremlin-server-empty-gaffer.yaml @@ -0,0 +1,43 @@ +# Copyright 2023 Crown Copyright +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +host: localhost +port: 8182 +evaluationTimeout: 30000 +graphs: { + graph: conf/gafferpop/gafferpop-tinkerpop-modern.properties} +scriptEngines: { + gremlin-groovy: { + plugins: { org.apache.tinkerpop.gremlin.server.jsr223.GremlinServerGremlinPlugin: {}, + uk.gov.gchq.gaffer.tinkerpop.gremlinplugin.GafferPopGremlinPlugin: {}, + org.apache.tinkerpop.gremlin.groovy.jsr223.GroovyCompilerGremlinPlugin: {enableThreadInterrupt: true}, + org.apache.tinkerpop.gremlin.jsr223.ImportGremlinPlugin: {classImports: [java.lang.Math], methodImports: [java.lang.Math#*]}, + org.apache.tinkerpop.gremlin.jsr223.ScriptFileGremlinPlugin: {files: [scripts/empty-sample.groovy]}} + } +} +serializers: + - { className: org.apache.tinkerpop.gremlin.driver.ser.GraphBinaryMessageSerializerV1, config: { serializeResultToString: true }} # application/vnd.graphbinary-v1.0-stringd + +metrics: { + slf4jReporter: {enabled: true, interval: 180000}} +strictTransactionManagement: false +idleConnectionTimeout: 0 +keepAliveInterval: 0 +maxInitialLineLength: 4096 +maxHeaderSize: 8192 +maxChunkSize: 8192 +maxContentLength: 10485760 +maxAccumulationBufferComponents: 1024 +resultIterationBatchSize: 64 +enableAuditLog: true diff --git a/docker/gremlin-gaffer/docker-compose.yaml b/docker/gremlin-gaffer/docker-compose.yaml new file mode 100644 index 00000000..4bb32592 --- /dev/null +++ b/docker/gremlin-gaffer/docker-compose.yaml @@ -0,0 +1,203 @@ +# Copyright 2020-2023 Crown Copyright +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.7" + +services: + + zookeeper: + image: zookeeper:${ZOOKEEPER_VERSION} + healthcheck: + test: echo ruok | nc 127.0.0.1 2181 | grep imok + interval: 30s + timeout: 5s + retries: 3 + container_name: zookeeper + hostname: zookeeper + environment: + - ZOO_SERVERS=server.1=zookeeper:2888:3888;2181 + - ZOO_4LW_COMMANDS_WHITELIST=* + volumes: + - /data + - /datalog + + hdfs-namenode: + image: gchq/hdfs:${HADOOP_VERSION} + depends_on: + zookeeper: + condition: service_healthy + healthcheck: + test: curl -f http://localhost:9870 || exit 1 + interval: 30s + timeout: 10s + retries: 3 + build: + context: ../hdfs/ + args: + HADOOP_VERSION: ${HADOOP_VERSION} + command: namenode + container_name: hdfs-namenode + hostname: hdfs-namenode + environment: + - HADOOP_CONF_DIR=${HADOOP_CONF_DIR} + ports: + - 9870:9870 + volumes: + - ../hdfs/conf:${HADOOP_CONF_DIR}:ro + - /var/log/hadoop + - /data1 + - /data2 + + hdfs-datanode: + image: gchq/hdfs:${HADOOP_VERSION} + depends_on: + hdfs-namenode: + condition: service_healthy + command: datanode + container_name: hdfs-datanode + hostname: hdfs-datanode + environment: + - HADOOP_CONF_DIR=${HADOOP_CONF_DIR} + volumes: + - ../hdfs/conf:${HADOOP_CONF_DIR}:ro + - /var/log/hadoop + - /data1 + - /data2 + + accumulo-master: + image: gchq/gaffer:${GAFFER_VERSION}-accumulo-${ACCUMULO_VERSION} + depends_on: + hdfs-namenode: + condition: service_healthy + healthcheck: + test: cat /proc/net/tcp | grep 270F + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + build: + context: . + args: + GAFFER_VERSION: ${GAFFER_VERSION} + BASE_IMAGE_NAME: gchq/accumulo + BASE_IMAGE_TAG: ${ACCUMULO_VERSION} + command: master + container_name: accumulo-master + hostname: accumulo-master + environment: + - ACCUMULO_CONF_DIR=${ACCUMULO_CONF_DIR} + # There doesn't seem to be an easy way (with docker-compose) to init our + # HDFS instance with the right permissions so that Accumulo can create the + # file structure it needs. Using the following workaround to allow + # accumulo to "auth" with HDFS as the super user so that it can: + - HADOOP_USER_NAME=hadoop + volumes: + - ../accumulo/conf-${ACCUMULO_VERSION}:${ACCUMULO_CONF_DIR}:ro + - /var/log/accumulo + + accumulo-tserver: + image: gchq/gaffer:${GAFFER_VERSION}-accumulo-${ACCUMULO_VERSION} + depends_on: + accumulo-master: + condition: service_healthy + healthcheck: + test: cat /proc/net/tcp | grep 270D + interval: 30s + timeout: 5s + retries: 3 + command: tserver + container_name: accumulo-tserver + hostname: accumulo-tserver + environment: + - ACCUMULO_CONF_DIR=${ACCUMULO_CONF_DIR} + # There doesn't seem to be an easy way (with docker-compose) to init our + # HDFS instance with the right permissions so that Accumulo can create the + # file structure it needs. Using the following workaround to allow + # accumulo to "auth" with HDFS as the super user so that it can: + - HADOOP_USER_NAME=hadoop + volumes: + - ../accumulo/conf-${ACCUMULO_VERSION}:${ACCUMULO_CONF_DIR}:ro + - /var/log/accumulo + + accumulo-monitor: + image: gchq/gaffer:${GAFFER_VERSION}-accumulo-${ACCUMULO_VERSION} + depends_on: + accumulo-master: + condition: service_healthy + command: monitor + container_name: accumulo-monitor + hostname: accumulo-monitor + environment: + - ACCUMULO_CONF_DIR=${ACCUMULO_CONF_DIR} + # There doesn't seem to be an easy way (with docker-compose) to init our + # HDFS instance with the right permissions so that Accumulo can create the + # file structure it needs. Using the following workaround to allow + # accumulo to "auth" with HDFS as the super user so that it can: + - HADOOP_USER_NAME=hadoop + ports: + - 9995:9995 + volumes: + - ../accumulo/conf-${ACCUMULO_VERSION}:${ACCUMULO_CONF_DIR}:ro + - /var/log/accumulo + + accumulo-gc: + image: gchq/gaffer:${GAFFER_VERSION}-accumulo-${ACCUMULO_VERSION} + depends_on: + accumulo-master: + condition: service_healthy + command: gc + container_name: accumulo-gc + hostname: accumulo-gc + environment: + - ACCUMULO_CONF_DIR=${ACCUMULO_CONF_DIR} + # There doesn't seem to be an easy way (with docker-compose) to init our + # HDFS instance with the right permissions so that Accumulo can create the + # file structure it needs. Using the following workaround to allow + # accumulo to "auth" with HDFS as the super user so that it can: + - HADOOP_USER_NAME=hadoop + volumes: + - ../accumulo/conf-${ACCUMULO_VERSION}:${ACCUMULO_CONF_DIR}:ro + - /var/log/accumulo + + gaffer-rest: + image: gchq/gaffer-rest:${GAFFER_VERSION}-accumulo-${ACCUMULO_VERSION} + depends_on: + accumulo-tserver: + condition: service_healthy + build: + context: ../gaffer-rest/ + args: + GAFFER_VERSION: ${GAFFER_VERSION} + ACCUMULO_VERSION: ${ACCUMULO_VERSION} + ports: + - 8080:8080 + volumes: + - ./conf/graphConfig.json:/gaffer/graph/graphConfig.json:ro + - ./conf/gafferpop/gaffer/store.properties:/gaffer/store/store.properties:ro + - ./conf/gafferpop/gaffer/schema:/gaffer/schema:ro + + gremlin-gaffer: + image: gchq/gremlin-gaffer:${GREMLIN_VERSION}-gaffer-${GAFFER_VERSION} + depends_on: + accumulo-tserver: + condition: service_healthy + build: + context: . + args: + GAFFER_VERSION: ${GAFFER_VERSION} + GREMLIN_VERSION: ${GREMLIN_VERSION} + ports: + - 8182:8182 + volumes: + - ./conf/gafferpop:/opt/gremlin-server/conf/gafferpop:ro diff --git a/docker/gremlin-gaffer/files/.gitignore b/docker/gremlin-gaffer/files/.gitignore new file mode 100644 index 00000000..fd807ac5 --- /dev/null +++ b/docker/gremlin-gaffer/files/.gitignore @@ -0,0 +1,2 @@ +*.tar.gz +*.jar diff --git a/docker/gremlin-gaffer/gremlin-gaffer-modern-example.ipynb b/docker/gremlin-gaffer/gremlin-gaffer-modern-example.ipynb new file mode 100644 index 00000000..d0a6bb79 --- /dev/null +++ b/docker/gremlin-gaffer/gremlin-gaffer-modern-example.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Gremlin Gaffer modern example" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Common imports\n", + "from gremlin_python.process.anonymous_traversal import traversal\n", + "from gremlin_python.process.graph_traversal import __\n", + "from gremlin_python.process.strategies import *\n", + "from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection\n", + "from gremlin_python.process.traversal import *\n", + "from gremlin_python.driver.client import Client\n", + "import nest_asyncio\n", + "\n", + "from_ = Direction.OUT\n", + "to = Direction.IN\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create gremlin client to run gremlin scripts directly\n", + "client = Client('ws://localhost:8182/gremlin', 'g')\n", + "# Import graphml data with script\n", + "client.submit(\"graph.io(graphml()).readGraph('data/tinkerpop-modern.xml')\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Setup a connection with the gremlin rest server\n", + "g = traversal().with_remote(DriverRemoteConnection('ws://localhost:8182/gremlin', 'g'))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': ['marko'], 'age': [29]}]\n" + ] + } + ], + "source": [ + "# Get properties from an Entity\n", + "print(g.V('1').valueMap().to_list())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.5]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Starting with multiple Entity seeds, filter by those with group \"person\", hop down an \"known\" edges, and print properties less than 1\n", + "g.V('1', '2').hasLabel('person').outE('knows').values().is_(lt(1)).to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[path[v[1], v[3]], path[v[1], v[4], v[3]]]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Calculate up to 6 paths between two Entities\n", + "g.V('1').repeat(__.both().simplePath()).until(__.hasId('3')).limit(6).path().to_list()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tinkerpop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}