Apache Spark is an open-source cluster-computing framework
https://github.com/sequenceiq/docker-spark
create virtualbox
docker-machine create -d virtualbox spark
docker-machine create -d virtualbox namenode
docker-machine create -d virtualbox datanode1
name node
https://github.com/sequenceiq/hadoop-docker
# run docker hadoop
docker-machine start namenode; eval "$(docker-machine env namenode)";
# 이미지 다운로드
docker pull sequenceiq/hadoop-docker:2.7.1
# 실행
docker run -it -p 50070:50070 -p 8088:8088 sequenceiq/hadoop-docker:2.7.1 /etc/bootstrap.sh -bash
data node
# run docker hadoop
docker-machine start datanode1; eval "$(docker-machine env datanode1)";
# 이미지 다운로드
docker pull sequenceiq/hadoop-docker:2.7.1
# 실행
docker run -it -p 50070:50070 -p 8088:8088 sequenceiq/hadoop-docker:2.7.1 /etc/bootstrap.sh -bash
spark
https://github.com/sequenceiq/docker-spark
# run docker spark
docker-machine start spark; eval "$(docker-machine env spark)";
# bash
docker run -it -p 8088:8088 -p 8042:8042 -p 4040:4040 -h sandbox sequenceiq/spark:1.6.0 bash
port | components |
---|---|
50070 | HDFS NameNode |
50050 | HDFS DataNode |
50090 | HDFS Secondary NameNode |
8088 | YARN Resource Manager |
4042 | YARN Node Manager |
4040 |