-
Notifications
You must be signed in to change notification settings - Fork 0
/
luigi.cfg.example
113 lines (96 loc) · 2.03 KB
/
luigi.cfg.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
[hive]
user=default
password=test
server=hive-server1
port=10000
command=bin/beeline
db=default
metastore_port=9083
[spark]
home=/var/spark-2.0.1
shell=bin/pyspark
submit=bin/spark-submit
pyspark_python_cluster=./ENV/py3spark_env/bin/python
pyspark_python_client=./envs/py3spark_env/bin/python
pyspark_python_default=python
config_local=~/spark-utils/luigi.cfg
config_hdfs=hdfs:/user/luigi.cfg
master=yarn
deploy-mode=cluster
queue=public
archives_cluster=py3spark_env.zip#ENV
archives_client=envs/py3spark_env.zip#envs
jars=hdfs:/spark/datanucleus-api-jdo-3.2.6.jar,hdfs:/spark/datanucleus-core-3.2.10.jar,hdfs:/spark/datanucleus-rdbms-3.2.9.jar
files=spark-2.0.1/conf/hive-site.xml
[scheduler]
host=this-host
port=8082
record_task_history=False
retry-delay=600
worker-disconnect-delay=60
[python_requirements]
pip='numpy pandas luigi gspread>=0.2.5 oauth2client==1.5.2 slackclient>=0.16 django hdfs'
conda='pyopenssl psycopg2 cssselect'
[yarn]
rm=rm-host
rm_port=8088
job_history_port=19888
log_container_port=45454
[hdfs]
namenode=namenode-host
port=50070
effective_user=me
namenode_host=namenode-host
namenode_port=50070
snakebite_autoconfig=False
[python]
version=3.5.2
dir=~/anaconda3
command=bin/python3
env=py3spark_env
installer=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda3-2.4.1-Linux-x86_64.sh
[bash]
profile=~/.bashrc
[repo]
dir=~/spark-utils
name=spark-utils
[execution_summary]
summary-length=1
[core]
default-scheduler-url=http://this-host:8082
max-reschedules=2
parallel-scheduling=true
rpc-connect-timeout=10.0
[worker]
max_reschedules=2
keep_alive=false
ping_interval=1.0
#kills job worker processes after x seconds
timeout=0
retry_external_tasks=false
[retcode]
already_running=10
missing_data=20
task_failed=30
unhandled_exception=40
[postgres]
host=
port=
database=
user=
password=
[hadoop]
command=
[google_sheets]
json_auth_file=
scope=
type=service_account
project_id=
private_key=
client_email=
client_id=
auth_uri=
token_uri:=
auth_provider_x509_cert_url=
client_x509_cert_url=
@:5432/