Skip to content

Commit f38eace

Browse files
committed
Merge pull request #116 from shivaram/yarn-1.4
[branch-1.4] Support for launching YARN clusters
2 parents c525694 + 68c4b28 commit f38eace

File tree

14 files changed

+409
-10
lines changed

14 files changed

+409
-10
lines changed

deploy_templates.py

+1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"hadoop_major_version": os.getenv("HADOOP_MAJOR_VERSION"),
7070
"java_home": os.getenv("JAVA_HOME"),
7171
"default_tachyon_mem": "%dMB" % tachyon_mb,
72+
"system_ram_mb": "%d" % system_ram_mb,
7273
"aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"),
7374
"aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"),
7475
}

ephemeral-hdfs/init.sh

+11
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,17 @@ case "$HADOOP_MAJOR_VERSION" in
2323
rm hadoop-*.tar.gz
2424
mv hadoop-2.0.0-cdh4.2.0/ ephemeral-hdfs/
2525

26+
# Have single conf dir
27+
rm -rf /root/ephemeral-hdfs/etc/hadoop/
28+
ln -s /root/ephemeral-hdfs/conf /root/ephemeral-hdfs/etc/hadoop
29+
;;
30+
yarn)
31+
wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.4.0.tar.gz
32+
echo "Unpacking Hadoop"
33+
tar xvzf hadoop-*.tar.gz > /tmp/spark-ec2_hadoop.log
34+
rm hadoop-*.tar.gz
35+
mv hadoop-2.4.0/ ephemeral-hdfs/
36+
2637
# Have single conf dir
2738
rm -rf /root/ephemeral-hdfs/etc/hadoop/
2839
ln -s /root/ephemeral-hdfs/conf /root/ephemeral-hdfs/etc/hadoop

ephemeral-hdfs/setup-slave.sh

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
mkdir -p /mnt/ephemeral-hdfs/logs
55
mkdir -p /mnt/hadoop-logs
66

7+
# Setup yarn logs, local dirs
8+
mkdir -p /mnt/yarn-local
9+
mkdir -p /mnt/yarn-logs
10+
711
# Create Hadoop and HDFS directories in a given parent directory
812
# (for example /mnt, /mnt2, and so on)
913
function create_hadoop_dirs {

ephemeral-hdfs/setup.sh

+18-3
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,23 @@ else
2727
fi
2828

2929
echo "Starting ephemeral HDFS..."
30-
# This is different depending on version. Simple hack: just try both.
31-
$EPHEMERAL_HDFS/sbin/start-dfs.sh
32-
$EPHEMERAL_HDFS/bin/start-dfs.sh
30+
31+
# This is different depending on version.
32+
case "$HADOOP_MAJOR_VERSION" in
33+
1)
34+
$EPHEMERAL_HDFS/bin/start-dfs.sh
35+
;;
36+
2)
37+
$EPHEMERAL_HDFS/sbin/start-dfs.sh
38+
;;
39+
yarn)
40+
$EPHEMERAL_HDFS/sbin/start-dfs.sh
41+
echo "Starting YARN"
42+
$EPHEMERAL_HDFS/sbin/start-yarn.sh
43+
;;
44+
*)
45+
echo "ERROR: Unknown Hadoop version"
46+
return -1
47+
esac
3348

3449
popd > /dev/null

mapreduce/init.sh

+3
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ case "$HADOOP_MAJOR_VERSION" in
1111
rm mr1-*.tar.gz
1212
mv hadoop-2.0.0-mr1-cdh4.2.0/ mapreduce/
1313
;;
14+
yarn)
15+
echo "Nothing to initialize for MapReduce in Hadoop 2 YARN"
16+
;;
1417

1518
*)
1619
echo "ERROR: Unknown Hadoop version"

persistent-hdfs/init.sh

+11
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@ case "$HADOOP_MAJOR_VERSION" in
2222
rm hadoop-*.tar.gz
2323
mv hadoop-2.0.0-cdh4.2.0/ persistent-hdfs/
2424

25+
# Have single conf dir
26+
rm -rf /root/persistent-hdfs/etc/hadoop/
27+
ln -s /root/persistent-hdfs/conf /root/persistent-hdfs/etc/hadoop
28+
;;
29+
yarn)
30+
wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.4.0.tar.gz
31+
echo "Unpacking Hadoop"
32+
tar xvzf hadoop-*.tar.gz > /tmp/spark-ec2_hadoop.log
33+
rm hadoop-*.tar.gz
34+
mv hadoop-2.4.0/ persistent-hdfs/
35+
2536
# Have single conf dir
2637
rm -rf /root/persistent-hdfs/etc/hadoop/
2738
ln -s /root/persistent-hdfs/conf /root/persistent-hdfs/etc/hadoop

spark/init.sh

+23-6
Original file line numberDiff line numberDiff line change
@@ -91,34 +91,51 @@ else
9191
1.1.0)
9292
if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then
9393
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-hadoop1.tgz
94-
else
94+
elif [[ "$HADOOP_MAJOR_VERSION" == "2" ]]; then
9595
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-cdh4.tgz
96+
else
97+
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.0-bin-hadoop2.4.tgz
9698
fi
9799
;;
98100
1.1.1)
99101
if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then
100102
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.1-bin-hadoop1.tgz
101-
else
103+
elif [[ "$HADOOP_MAJOR_VERSION" == "2" ]]; then
102104
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.1-bin-cdh4.tgz
105+
else
106+
wget http://s3.amazonaws.com/spark-related-packages/spark-1.1.1-bin-hadoop2.4.tgz
103107
fi
104108
;;
105109
1.2.0)
106110
if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then
107111
wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.0-bin-hadoop1.tgz
108-
else
112+
elif [[ "$HADOOP_MAJOR_VERSION" == "2" ]]; then
109113
wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.0-bin-cdh4.tgz
114+
else
115+
wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.0-bin-hadoop2.4.tgz
110116
fi
111117
;;
112118
1.2.1)
113119
if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then
114120
wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.1-bin-hadoop1.tgz
115-
else
121+
elif [[ "$HADOOP_MAJOR_VERSION" == "2" ]]; then
116122
wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.1-bin-cdh4.tgz
123+
else
124+
wget http://s3.amazonaws.com/spark-related-packages/spark-1.2.1-bin-hadoop2.4.tgz
117125
fi
118126
;;
119127
*)
120-
echo "ERROR: Unknown Spark version"
121-
return
128+
if [[ "$HADOOP_MAJOR_VERSION" == "1" ]]; then
129+
wget http://s3.amazonaws.com/spark-related-packages/spark-$SPARK_VERSION-prebuilt-hadoop1.tgz
130+
elif [[ "$HADOOP_MAJOR_VERSION" == "2" ]]; then
131+
wget http://s3.amazonaws.com/spark-related-packages/spark-$SPARK_VERSION-prebuilt-cdh4.tgz
132+
else
133+
wget http://s3.amazonaws.com/spark-related-packages/spark-$SPARK_VERSION-bin-hadoop2.4.tgz
134+
fi
135+
if [ $? != 0 ]; then
136+
echo "ERROR: Unknown Spark version"
137+
return -1
138+
fi
122139
esac
123140

124141
echo "Unpacking Spark"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
<!--
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License. See accompanying LICENSE file.
13+
-->
14+
<configuration>
15+
16+
<property>
17+
<name>yarn.scheduler.capacity.maximum-applications</name>
18+
<value>10000</value>
19+
<description>
20+
Maximum number of applications that can be pending and running.
21+
</description>
22+
</property>
23+
24+
<property>
25+
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
26+
<value>0.1</value>
27+
<description>
28+
Maximum percent of resources in the cluster which can be used to run
29+
application masters i.e. controls number of concurrent running
30+
applications.
31+
</description>
32+
</property>
33+
34+
<property>
35+
<name>yarn.scheduler.capacity.resource-calculator</name>
36+
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
37+
<description>
38+
The ResourceCalculator implementation to be used to compare
39+
Resources in the scheduler.
40+
The default i.e. DefaultResourceCalculator only uses Memory while
41+
DominantResourceCalculator uses dominant-resource to compare
42+
multi-dimensional resources such as Memory, CPU etc.
43+
</description>
44+
</property>
45+
46+
<property>
47+
<name>yarn.scheduler.capacity.root.queues</name>
48+
<value>default</value>
49+
<description>
50+
The queues at the this level (root is the root queue).
51+
</description>
52+
</property>
53+
54+
<property>
55+
<name>yarn.scheduler.capacity.root.default.capacity</name>
56+
<value>100</value>
57+
<description>Default queue target capacity.</description>
58+
</property>
59+
60+
<property>
61+
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
62+
<value>1</value>
63+
<description>
64+
Default queue user limit a percentage from 0.0 to 1.0.
65+
</description>
66+
</property>
67+
68+
<property>
69+
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
70+
<value>100</value>
71+
<description>
72+
The maximum capacity of the default queue.
73+
</description>
74+
</property>
75+
76+
<property>
77+
<name>yarn.scheduler.capacity.root.default.state</name>
78+
<value>RUNNING</value>
79+
<description>
80+
The state of the default queue. State can be one of RUNNING or STOPPED.
81+
</description>
82+
</property>
83+
84+
<property>
85+
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
86+
<value>*</value>
87+
<description>
88+
The ACL of who can submit jobs to the default queue.
89+
</description>
90+
</property>
91+
92+
<property>
93+
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
94+
<value>*</value>
95+
<description>
96+
The ACL of who can administer jobs on the default queue.
97+
</description>
98+
</property>
99+
100+
<property>
101+
<name>yarn.scheduler.capacity.node-locality-delay</name>
102+
<value>40</value>
103+
<description>
104+
Number of missed scheduling opportunities after which the CapacityScheduler
105+
attempts to schedule rack-local containers.
106+
Typically this should be set to number of nodes in the cluster, By default is setting
107+
approximately number of nodes in one rack which is 40.
108+
</description>
109+
</property>
110+
111+
</configuration>

templates/root/ephemeral-hdfs/conf/core-site.xml

+10
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
<value>hdfs://{{active_master}}:9000</value>
1616
</property>
1717

18+
<property>
19+
<name>fs.defaultFS</name>
20+
<value>hdfs://{{active_master}}:9000</value>
21+
</property>
22+
1823
<property>
1924
<name>io.file.buffer.size</name>
2025
<value>65536</value>
@@ -55,4 +60,9 @@
5560
<value>{{aws_secret_access_key}}</value>
5661
</property>
5762

63+
<property>
64+
<name>hadoop.security.group.mapping</name>
65+
<value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
66+
</property>
67+
5868
</configuration>

templates/root/ephemeral-hdfs/conf/mapred-site.xml

+5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55

66
<configuration>
77

8+
<property>
9+
<name>mapreduce.framework.name</name>
10+
<value>yarn</value>
11+
</property>
12+
813
<property>
914
<name>mapred.job.tracker</name>
1015
<value>{{active_master}}:9001</value>

0 commit comments

Comments
 (0)