Skip to content

Commit

Permalink
update docker & requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
Binh Vu committed Feb 18, 2018
1 parent e955475 commit c520793
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 63 deletions.
25 changes: 20 additions & 5 deletions container/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
FROM continuumio/miniconda
FROM ubuntu:14.04

RUN apt-get update
RUN apt-get install -y build-essential wget software-properties-common

RUN wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh && \
bash Miniconda2-latest-Linux-x86_64.sh -b

ENV PATH /root/miniconda2/bin/:$PATH

RUN /root/miniconda2/bin/conda install -y numpy scipy

RUN cd home && \
wget https://github.com/minhptx/iswc-2016-semantic-labeling/archive/v0.1-alpha.tar.gz && \
wget https://github.com/minhptx/iswc-2016-semantic-labeling/archive/v0.1-alpha.3.tar.gz && \
mkdir semantic-labeling && \
tar -xf v0.1-alpha.tar.gz -C semantic-labeling --strip-components=1 && \
rm v0.1-alpha.tar.gz
tar -xf v0.1-alpha.3.tar.gz -C semantic-labeling --strip-components=1 && \
rm v0.1-alpha.3.tar.gz

RUN conda install numpy scipy
RUN pip install -r requirements.txt

RUN apt-get install -y
RUN add-apt-repository ppa:webupd8team/java -y
RUN apt-get update
RUN echo debconf shared/accepted-oracle-license-v1-1 select true | sudo debconf-set-selections && \
echo debconf shared/accepted-oracle-license-v1-1 seen true | sudo debconf-set-selections
RUN apt-get install oracle-java8-installer

WORKDIR /home/semantic-labeling

102 changes: 51 additions & 51 deletions container/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,55 @@ services:
ports:
- 9200:9200
volumes:
- ../docker-data/esdata:/usr/share/elasticsearch/data
- ./es_config:/usr/share/elasticsearch/config
master:
image: gettyimages/spark
command: bin/spark-class org.apache.spark.deploy.master.Master -h master
hostname: master
environment:
MASTER: spark://master:7077
SPARK_CONF_DIR: /conf
SPARK_PUBLIC_DNS: localhost
expose:
- 7001
- 7002
- 7003
- 7004
- 7005
- 7006
- 7077
- 6066
ports:
- 4040:4040
- 6066:6066
- 7077:7077
- 8080:8080
volumes:
- ../docker-data/master/conf:/conf
- ../docker-data/master:/tmp/data
worker:
image: gettyimages/spark
command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
hostname: worker
environment:
SPARK_CONF_DIR: /conf
SPARK_WORKER_CORES: 2
SPARK_WORKER_MEMORY: 1g
SPARK_WORKER_PORT: 8881
SPARK_WORKER_WEBUI_PORT: 8081
SPARK_PUBLIC_DNS: localhost
links:
- master
expose:
- 7012
- 7013
- 7014
- 7015
- 7016
- 8881
ports:
- 8081:8081
volumes:
- ../docker-data/worker/conf:/conf
- ../docker-data/worker:/tmp/data
# - ../docker-data/esdata:/usr/share/elasticsearch/data
# master:
# image: gettyimages/spark:2.2.0-hadoop-2.7
# command: bin/spark-class org.apache.spark.deploy.master.Master -h master
# hostname: master
# environment:
# MASTER: spark://master:7077
# SPARK_CONF_DIR: /conf
# SPARK_PUBLIC_DNS: localhost
# expose:
# - 7001
# - 7002
# - 7003
# - 7004
# - 7005
# - 7006
# - 7077
# - 6066
# ports:
# - 4040:4040
# - 6066:6066
# - 7077:7077
# - 8080:8080
# volumes:
# - ../docker-data/master/conf:/conf
# - ../docker-data/master:/tmp/data
# worker:
# image: gettyimages/spark:2.2.0-hadoop-2.7
# command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://master:7077
# hostname: worker
# environment:
# SPARK_CONF_DIR: /conf
# SPARK_WORKER_CORES: 2
# SPARK_WORKER_MEMORY: 1g
# SPARK_WORKER_PORT: 8881
# SPARK_WORKER_WEBUI_PORT: 8081
# SPARK_PUBLIC_DNS: localhost
# links:
# - master
# expose:
# - 7012
# - 7013
# - 7014
# - 7015
# - 7016
# - 8881
# ports:
# - 8081:8081
# volumes:
# - ../docker-data/worker/conf:/conf
# - ../docker-data/worker:/tmp/data
11 changes: 5 additions & 6 deletions main/api.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, logging

import ujson

import logging
import os
import sys
from typing import Dict, Tuple, List, Set, Union, Optional
import ujson

from elasticsearch import Elasticsearch

from main.semantic_labeler import SemanticLabeler

"""API for semantic labeling, a dataset is a set of sources"""


def get_logger(name):
logger = logging.getLogger()
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
logger.propagate = False

Expand Down
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
gensim==3.2.0
pyspark==2.2.0
pyspark==2.2.1
elasticsearch==6.0.0
ujson
scikit-learn=0.19.1
pandas==0.22.0

0 comments on commit c520793

Please sign in to comment.