Refactor dockerfile (#2384) (294ec783) · Commits · SUMMER2020 / students / proj-2002251

dockerfile/Dockerfile

+1 −1

Original line number	Diff line number	Diff line
		@@ -90,6 +90,6 @@ RUN chmod +x /root/checkpoint.sh && \
		RUN rm -rf /var/cache/apk/*

		#9. expose port
		EXPOSE 2181 2888 3888 5432 12345 50051 8888
		EXPOSE 2181 2888 3888 5432 5678 1234 12345 50051 8888

		ENTRYPOINT ["/sbin/tini", "--", "/root/startup.sh"]
		No newline at end of file

dockerfile/README.md

+12 −12

Original line number	Diff line number	Diff line
		@@ -162,18 +162,6 @@ This environment variable sets the runtime environment for task. The default val

		User data directory path, self configuration, please make sure the directory exists and have read write permissions. The default value is `/tmp/dolphinscheduler`

		`DOLPHINSCHEDULER_DATA_DOWNLOAD_BASEDIR_PATH`

		Directory path for user data download. self configuration, please make sure the directory exists and have read write permissions. The default value is `/tmp/dolphinscheduler/download`

		`DOLPHINSCHEDULER_PROCESS_EXEC_BASEPATH`

		Process execute directory. self configuration, please make sure the directory exists and have read write permissions. The default value is `/tmp/dolphinscheduler/exec`

		`TASK_QUEUE`

		This environment variable sets the task queue for `master-server` and `worker-serverr`. The default value is `zookeeper`.

		`ZOOKEEPER_QUORUM`

		This environment variable sets zookeeper quorum for `master-server` and `worker-serverr`. The default value is `127.0.0.1:2181`.
		@@ -208,6 +196,10 @@ This environment variable sets max cpu load avg for `master-server`. The default

		This environment variable sets reserved memory for `master-server`. The default value is `0.1`.

		`MASTER_LISTEN_PORT`

		This environment variable sets port for `master-server`. The default value is `5678`.

		`WORKER_EXEC_THREADS`

		This environment variable sets exec thread num for `worker-server`. The default value is `100`.
		@@ -228,6 +220,14 @@ This environment variable sets max cpu load avg for `worker-server`. The default

		This environment variable sets reserved memory for `worker-server`. The default value is `0.1`.

		`WORKER_LISTEN_PORT`

		This environment variable sets port for `worker-server`. The default value is `1234`.

		`WORKER_GROUP`

		This environment variable sets group for `worker-server`. The default value is `default`.

		`XLS_FILE_PATH`

		This environment variable sets xls file path for `alert-server`. The default value is `/tmp/xls`.

dockerfile/README_zh_CN.md

+12 −12

Original line number	Diff line number	Diff line
		@@ -162,18 +162,6 @@ Dolphin Scheduler映像使用了几个容易遗漏的环境变量。虽然这些

		用户数据目录, 用户自己配置, 请确保这个目录存在并且用户读写权限，默认值 `/tmp/dolphinscheduler`。

		`DOLPHINSCHEDULER_DATA_DOWNLOAD_BASEDIR_PATH`

		用户数据下载目录, 用户自己配置, 请确保这个目录存在并且用户读写权限，默认值 `/tmp/dolphinscheduler/download`。

		`DOLPHINSCHEDULER_PROCESS_EXEC_BASEPATH`

		任务执行目录, 用户自己配置, 请确保这个目录存在并且用户读写权限，默认值 `/tmp/dolphinscheduler/exec`。

		`TASK_QUEUE`

		配置`master-server`和`worker-serverr`的`Zookeeper`任务队列名，默认值 `zookeeper`。

		`ZOOKEEPER_QUORUM`

		配置`master-server`和`worker-serverr`的`Zookeeper`地址, 默认值 `127.0.0.1:2181`。
		@@ -208,6 +196,10 @@ Dolphin Scheduler映像使用了几个容易遗漏的环境变量。虽然这些

		配置`master-server`的保留内存，默认值 `0.1`。

		`MASTER_LISTEN_PORT`

		配置`master-server`的端口，默认值 `5678`。

		`WORKER_EXEC_THREADS`

		配置`worker-server`中的执行线程数量，默认值 `100`。
		@@ -228,6 +220,14 @@ Dolphin Scheduler映像使用了几个容易遗漏的环境变量。虽然这些

		配置`worker-server`的保留内存，默认值 `0.1`。

		`WORKER_LISTEN_PORT`

		配置`worker-server`的端口，默认值 `1234`。

		`WORKER_GROUP`

		配置`worker-server`的分组，默认值 `default`。

		`XLS_FILE_PATH`

		配置`alert-server`的`XLS`文件的存储路径，默认值 `/tmp/xls`。

dockerfile/conf/dolphinscheduler/application-api.properties.tpl

+8 −0

Original line number	Diff line number	Diff line
		@@ -14,21 +14,29 @@
		# See the License for the specific language governing permissions and
		# limitations under the License.
		#

		# server port
		server.port=12345

		# session config
		server.servlet.session.timeout=7200

		# servlet config
		server.servlet.context-path=/dolphinscheduler/

		# file size limit for upload
		spring.servlet.multipart.max-file-size=1024MB
		spring.servlet.multipart.max-request-size=1024MB

		# post content
		server.jetty.max-http-post-size=5000000

		# i18n
		spring.messages.encoding=UTF-8

		#i18n classpath folder , file prefix messages， if have many files, use "," seperator
		spring.messages.basename=i18n/messages

		# Authentication types (supported types: PASSWORD)
		security.authentication.type=PASSWORD

dockerfile/conf/dolphinscheduler/common.properties.tpl

+35 −41

Original line number	Diff line number	Diff line
		@@ -15,70 +15,64 @@
		# limitations under the License.
		#

		#task queue implementation, default "zookeeper"
		dolphinscheduler.queue.impl=${TASK_QUEUE}

		#zookeeper cluster. multiple are separated by commas. eg. 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181
		zookeeper.quorum=${ZOOKEEPER_QUORUM}
		#dolphinscheduler root directory
		zookeeper.dolphinscheduler.root=/dolphinscheduler
		#dolphinscheduler failover directory
		zookeeper.session.timeout=300
		zookeeper.connection.timeout=300
		zookeeper.retry.base.sleep=100
		zookeeper.retry.max.sleep=30000
		zookeeper.retry.maxtime=5

		#============================================================================
		# System
		#============================================================================
		# system env path. self configuration, please make sure the directory and file exists and have read write execute permissions
		dolphinscheduler.env.path=${DOLPHINSCHEDULER_ENV_PATH}
		#resource.view.suffixs
		resource.view.suffixs=txt,log,sh,conf,cfg,py,java,sql,hql,xml,properties
		# is development state? default "false"
		development.state=true

		# user data directory path, self configuration, please make sure the directory exists and have read write permissions
		data.basedir.path=${DOLPHINSCHEDULER_DATA_BASEDIR_PATH}
		# directory path for user data download. self configuration, please make sure the directory exists and have read write permissions
		data.download.basedir.path=${DOLPHINSCHEDULER_DATA_DOWNLOAD_BASEDIR_PATH}
		# process execute directory. self configuration, please make sure the directory exists and have read write permissions
		process.exec.basepath=${DOLPHINSCHEDULER_PROCESS_EXEC_BASEPATH}

		# resource upload startup type : HDFS,S3,NONE
		res.upload.startup.type=NONE
		resource.storage.type=NONE

		#============================================================================
		# HDFS
		#============================================================================
		# Users who have permission to create directories under the HDFS root path
		hdfs.root.user=hdfs
		# data base dir, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/dolphinscheduler" is recommended
		data.store2hdfs.basepath=/dolphinscheduler
		# resource store on HDFS/S3 path, resource file will store to this hadoop hdfs path, self configuration, please make sure the directory exists on hdfs and have read write permissions。"/dolphinscheduler" is recommended
		#resource.upload.path=/dolphinscheduler

		# whether kerberos starts
		hadoop.security.authentication.startup.state=false
		#hadoop.security.authentication.startup.state=false

		# java.security.krb5.conf path
		java.security.krb5.conf.path=/opt/krb5.conf
		#java.security.krb5.conf.path=/opt/krb5.conf

		# loginUserFromKeytab user
		login.user.keytab.username=hdfs-mycluster@ESZ.COM
		#login.user.keytab.username=hdfs-mycluster@ESZ.COM

		# loginUserFromKeytab path
		login.user.keytab.path=/opt/hdfs.headless.keytab
		#login.user.keytab.path=/opt/hdfs.headless.keytab

		#resource.view.suffixs
		#resource.view.suffixs=txt,log,sh,conf,cfg,py,java,sql,hql,xml,properties

		# if resource.storage.type=HDFS, the user need to have permission to create directories under the HDFS root path
		hdfs.root.user=hdfs

		# kerberos expire time
		kerberos.expire.time=7

		#============================================================================
		# S3
		#============================================================================
		# ha or single namenode,If namenode ha needs to copy core-site.xml and hdfs-site.xml
		# to the conf directory，support s3，for example : s3a://dolphinscheduler
		# if resource.storage.type=S3，the value like: s3a://dolphinscheduler ; if resource.storage.type=HDFS, When namenode HA is enabled, you need to copy core-site.xml and hdfs-site.xml to conf dir
		fs.defaultFS=hdfs://mycluster:8020
		# s3 need，s3 endpoint
		fs.s3a.endpoint=http://192.168.199.91:9010
		# s3 need，s3 access key
		fs.s3a.access.key=A3DXS30FO22544RE
		# s3 need，s3 secret key
		fs.s3a.secret.key=OloCLq3n+8+sdPHUhJ21XrSxTC+JK
		#resourcemanager ha note this need ips , this empty if single

		# if resource.storage.type=S3，s3 endpoint
		#fs.s3a.endpoint=http://192.168.199.91:9010

		# if resource.storage.type=S3，s3 access key
		#fs.s3a.access.key=A3DXS30FO22544RE

		# if resource.storage.type=S3，s3 secret key
		#fs.s3a.secret.key=OloCLq3n+8+sdPHUhJ21XrSxTC+JK

		# if not use hadoop resourcemanager, please keep default value; if resourcemanager HA enable, please type the HA ips ; if resourcemanager is single, make this value empty TODO
		yarn.resourcemanager.ha.rm.ids=192.168.xx.xx,192.168.xx.xx
		# If it is a single resourcemanager, you only need to configure one host name. If it is resourcemanager HA, the default configuration is fine

		# If resourcemanager HA enable or not use resourcemanager, please keep the default value; If resourcemanager is single, you only need to replace ark1 to actual resourcemanager hostname.
		yarn.application.status.address=http://ark1:8088/ws/v1/cluster/apps/%s