YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1527059 13f79535-47bb-0310-9956-ffa450edef68
@ -0,0 +1,45 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<assembly>
|
||||
<id>hadoop-sls</id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/bin</directory>
|
||||
<outputDirectory>sls/bin</outputDirectory>
|
||||
<fileMode>0755</fileMode>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/html</directory>
|
||||
<outputDirectory>sls/html</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/sample-conf</directory>
|
||||
<outputDirectory>sls/sample-conf</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/data</directory>
|
||||
<outputDirectory>sls/sample-data</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
</assembly>
|
@ -93,6 +93,17 @@
|
||||
<include>*-sources.jar</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>../hadoop-sls/target</directory>
|
||||
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
|
||||
<includes>
|
||||
<include>*-sources.jar</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
|
||||
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
|
@ -730,6 +730,16 @@
|
||||
<artifactId>hsqldb</artifactId>
|
||||
<version>2.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.codahale.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-sls</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
|
@ -95,6 +95,7 @@
|
||||
<item name="Fair Scheduler" href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html"/>
|
||||
<item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
|
||||
<item name="YARN Commands" href="hadoop-yarn/hadoop-yarn-site/YarnCommands.html"/>
|
||||
<item name="Scheduler Load Simulator" href="hadoop-sls/SchedulerLoadSimulator.html"/>
|
||||
</menu>
|
||||
|
||||
<menu name="YARN REST APIs" inherit="top">
|
||||
|
12
hadoop-tools/hadoop-sls/README
Normal file
@ -0,0 +1,12 @@
|
||||
Yarn Scheduler Load Simulator (SLS)
|
||||
|
||||
SLS is a stress and performance harness for the Yarn Resource Manager Scheduler
|
||||
that exercises the scheduler implementation simulating the cluster size and the
|
||||
applications load without having to have a cluster nor applications.
|
||||
|
||||
SLS runs a regular RM without RPC endpoints and uses a NodeManager and
|
||||
Application Manager simulators to send and receive events simulating cluster
|
||||
and application load behavior.
|
||||
|
||||
The size of the cluster and the application load is scripted in a configuration
|
||||
file.
|
26
hadoop-tools/hadoop-sls/dev-support/findbugs-exclude.xml
Normal file
@ -0,0 +1,26 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<FindBugsFilter>
|
||||
|
||||
<!-- Ignore comparedTo, equals warnings -->
|
||||
<Match>
|
||||
<Class name="org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator" />
|
||||
<Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
|
||||
</Match>
|
||||
|
||||
</FindBugsFilter>
|
184
hadoop-tools/hadoop-sls/pom.xml
Normal file
@ -0,0 +1,184 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-project</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../hadoop-project</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-sls</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
<description>Apache Hadoop Scheduler Load Simulator</description>
|
||||
<name>Apache Hadoop Scheduler Load Simulator</name>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-minicluster</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-rumen</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.codahale.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<configuration>
|
||||
<attach>true</attach>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>findbugs-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<findbugsXmlOutput>true</findbugsXmlOutput>
|
||||
<xmlOutput>true</xmlOutput>
|
||||
<excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml</excludeFilterFile>
|
||||
<effort>Max</effort>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.rat</groupId>
|
||||
<artifactId>apache-rat-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes>
|
||||
<exclude>src/main/data/2jobs2min-rumen-jh.json</exclude>
|
||||
<exclude>src/main/html/js/thirdparty/jquery.js</exclude>
|
||||
<exclude>src/main/html/js/thirdparty/d3-LICENSE</exclude>
|
||||
<exclude>src/main/html/js/thirdparty/d3.v3.js</exclude>
|
||||
<exclude>src/main/html/simulate.html.template</exclude>
|
||||
<exclude>src/main/html/simulate.info.html.template</exclude>
|
||||
<exclude>src/main/html/track.html.template</exclude>
|
||||
<exclude>src/test/resources/simulate.html.template</exclude>
|
||||
<exclude>src/test/resources/simulate.info.html.template</exclude>
|
||||
<exclude>src/test/resources/track.html.template</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>docs</id>
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-site-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>site</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>dist</id>
|
||||
<activation>
|
||||
<activeByDefault>false</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-assemblies</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dist</id>
|
||||
<phase>prepare-package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<appendAssemblyId>false</appendAssemblyId>
|
||||
<attach>false</attach>
|
||||
<finalName>${project.artifactId}-${project.version}</finalName>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>hadoop-sls</descriptorRef>
|
||||
</descriptorRefs>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
55
hadoop-tools/hadoop-sls/src/main/assemblies/sls.xml
Normal file
@ -0,0 +1,55 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<assembly>
|
||||
<id>sls</id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
<format>tar.gz</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/bin</directory>
|
||||
<outputDirectory>bin</outputDirectory>
|
||||
<fileMode>0755</fileMode>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/data</directory>
|
||||
<outputDirectory>sample-data</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/html</directory>
|
||||
<outputDirectory>html</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${basedir}/src/main/sample-conf</directory>
|
||||
<outputDirectory>sample-conf</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/lib</outputDirectory>
|
||||
<unpack>false</unpack>
|
||||
<scope>compile</scope>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
|
||||
</assembly>
|
106
hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh
Normal file
@ -0,0 +1,106 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License. See accompanying LICENSE file.
|
||||
#
|
||||
|
||||
###############################################################################
|
||||
printUsage() {
|
||||
echo "Usage: rumen2sls.sh <OPTIONS>"
|
||||
echo " --rumen-file=<RUMEN_FILE>"
|
||||
echo " --output-dir=<SLS_OUTPUT_DIR>"
|
||||
echo " [--output-prefix=<PREFIX>] (default is sls)"
|
||||
echo
|
||||
}
|
||||
###############################################################################
|
||||
parseArgs() {
|
||||
for i in $*
|
||||
do
|
||||
case $i in
|
||||
--rumen-file=*)
|
||||
rumenfile=${i#*=}
|
||||
;;
|
||||
--output-dir=*)
|
||||
outputdir=${i#*=}
|
||||
;;
|
||||
--output-prefix=*)
|
||||
outputprefix=${i#*=}
|
||||
;;
|
||||
*)
|
||||
echo "Invalid option"
|
||||
echo
|
||||
printUsage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
if [[ "${rumenfile}" == "" || "${outputdir}" == "" ]] ; then
|
||||
echo "Both --rumen-file ${rumenfile} and --output-dir \
|
||||
${outputfdir} must be specified"
|
||||
echo
|
||||
printUsage
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
###############################################################################
|
||||
calculateBasedir() {
|
||||
# resolve links - $0 may be a softlink
|
||||
PRG="${1}"
|
||||
|
||||
while [ -h "${PRG}" ]; do
|
||||
ls=`ls -ld "${PRG}"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "${PRG}"`/"$link"
|
||||
fi
|
||||
done
|
||||
|
||||
BASEDIR=`dirname ${PRG}`
|
||||
BASEDIR=`cd ${BASEDIR}/..;pwd`
|
||||
}
|
||||
###############################################################################
|
||||
calculateClasspath() {
|
||||
HADOOP_BASE=`which hadoop`
|
||||
HADOOP_BASE=`dirname $HADOOP_BASE`
|
||||
DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
|
||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}"
|
||||
}
|
||||
###############################################################################
|
||||
runSLSGenerator() {
|
||||
if [[ "${outputprefix}" == "" ]] ; then
|
||||
outputprefix="sls"
|
||||
fi
|
||||
|
||||
slsJobs=${outputdir}/${outputprefix}-jobs.json
|
||||
slsNodes=${outputdir}/${outputprefix}-nodes.json
|
||||
|
||||
args="-input ${rumenfile} -outputJobs ${slsJobs}";
|
||||
args="${args} -outputNodes ${slsNodes}";
|
||||
|
||||
hadoop org.apache.hadoop.yarn.sls.RumenToSLSConverter ${args}
|
||||
}
|
||||
###############################################################################
|
||||
|
||||
calculateBasedir $0
|
||||
calculateClasspath
|
||||
parseArgs "$@"
|
||||
runSLSGenerator
|
||||
|
||||
echo
|
||||
echo "SLS simulation files available at: ${outputdir}"
|
||||
echo
|
||||
|
||||
exit 0
|
112
hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh
Normal file
@ -0,0 +1,112 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License. See accompanying LICENSE file.
|
||||
#
|
||||
|
||||
###############################################################################
|
||||
printUsage() {
|
||||
echo "Usage: slsrun.sh <OPTIONS>"
|
||||
echo " --input-rumen|--input-sls=<FILE1,FILE2,...>"
|
||||
echo " --output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY>"
|
||||
echo " [--nodes=<SLS_NODES_FILE>]"
|
||||
echo " [--track-jobs=<JOBID1,JOBID2,...>]"
|
||||
echo " [--print-simulation]"
|
||||
echo
|
||||
}
|
||||
###############################################################################
|
||||
parseArgs() {
|
||||
for i in $*
|
||||
do
|
||||
case $i in
|
||||
--input-rumen=*)
|
||||
inputrumen=${i#*=}
|
||||
;;
|
||||
--input-sls=*)
|
||||
inputsls=${i#*=}
|
||||
;;
|
||||
--output-dir=*)
|
||||
outputdir=${i#*=}
|
||||
;;
|
||||
--nodes=*)
|
||||
nodes=${i#*=}
|
||||
;;
|
||||
--track-jobs=*)
|
||||
trackjobs=${i#*=}
|
||||
;;
|
||||
--print-simulation)
|
||||
printsimulation="true"
|
||||
;;
|
||||
*)
|
||||
echo "Invalid option"
|
||||
echo
|
||||
printUsage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "${inputrumen}" == "" && "${inputsls}" == "" ]] ; then
|
||||
echo "Either --input-rumen or --input-sls must be specified"
|
||||
echo
|
||||
printUsage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${outputdir}" == "" ]] ; then
|
||||
echo "The output directory --output-dir must be specified"
|
||||
echo
|
||||
printUsage
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
calculateClasspath() {
|
||||
HADOOP_BASE=`which hadoop`
|
||||
HADOOP_BASE=`dirname $HADOOP_BASE`
|
||||
DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
|
||||
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
|
||||
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
|
||||
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}:html"
|
||||
}
|
||||
###############################################################################
|
||||
runSimulation() {
|
||||
if [[ "${inputsls}" == "" ]] ; then
|
||||
args="-inputrumen ${inputrumen}"
|
||||
else
|
||||
args="-inputsls ${inputsls}"
|
||||
fi
|
||||
|
||||
args="${args} -output ${outputdir}"
|
||||
|
||||
if [[ "${nodes}" != "" ]] ; then
|
||||
args="${args} -nodes ${nodes}"
|
||||
fi
|
||||
|
||||
if [[ "${trackjobs}" != "" ]] ; then
|
||||
args="${args} -trackjobs ${trackjobs}"
|
||||
fi
|
||||
|
||||
if [[ "${printsimulation}" == "true" ]] ; then
|
||||
args="${args} -printsimulation"
|
||||
fi
|
||||
|
||||
hadoop org.apache.hadoop.yarn.sls.SLSRunner ${args}
|
||||
}
|
||||
###############################################################################
|
||||
|
||||
calculateClasspath
|
||||
parseArgs "$@"
|
||||
runSimulation
|
||||
|
||||
exit 0
|
10229
hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
Normal file
19
hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css
vendored
Normal file
37
hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css
vendored
Normal file
6
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js
vendored
Normal file
26
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3-LICENSE
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
Copyright (c) 2013, Michael Bostock
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* The name Michael Bostock may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
8823
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js
vendored
Normal file
9789
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js
vendored
Normal file
334
hadoop-tools/hadoop-sls/src/main/html/showSimulationTrace.html
Normal file
@ -0,0 +1,334 @@
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
<!doctype>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||
<style type="text/css">
|
||||
body {
|
||||
font: 20px sans-serif;
|
||||
}
|
||||
|
||||
.axis path,
|
||||
.axis line {
|
||||
fill: none;
|
||||
stroke: #000;
|
||||
shape-rendering: crispEdges;
|
||||
}
|
||||
.axis text {
|
||||
font-family: sans-serif;
|
||||
font-size: 20px;
|
||||
}
|
||||
|
||||
.line {
|
||||
fill: none;
|
||||
stroke: steelblue;
|
||||
stroke-width: 3px;
|
||||
}
|
||||
|
||||
.legend {
|
||||
padding: 1px;
|
||||
font: 18px sans-serif;
|
||||
background: yellow;
|
||||
box-shadow: 2px 2px 1px #888;
|
||||
}
|
||||
|
||||
.title {
|
||||
font: 24px sans-serif;
|
||||
}
|
||||
.divborder {
|
||||
border-width: 1px;
|
||||
border-style: solid;
|
||||
border-color: black;
|
||||
margin-top:10px
|
||||
}
|
||||
</style>
|
||||
<script src="js/thirdparty/d3.v3.js"></script>
|
||||
<script src="js/thirdparty/jquery.js"></script>
|
||||
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="row">
|
||||
<div class="offset5" style="margin-top:20px; margin-bottom:20px">
|
||||
Select the generated metrics log file (realtimetrack.json): <input type='file' id='jsonfile' /> <input type='button' value='Generate !' onClick='draw()' /><br>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
|
||||
<div class="divborder span8" id="area2"></div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
|
||||
<div class="divborder span8" id="area4"></div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
|
||||
<div class="divborder span8" id="area6"></div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
|
||||
<div class="span7" id="area8"></div>
|
||||
</div>
|
||||
<p> </p>
|
||||
<script>
|
||||
// select file and draw
|
||||
function draw() {
|
||||
var filepath = document.getElementById('jsonfile').value;
|
||||
if (filepath) {
|
||||
for (var i = 1; i < 9; i ++) {
|
||||
$('#area' + i).empty();
|
||||
}
|
||||
filepath = filepath.replace("C:\\fakepath\\", "");
|
||||
drawCharts(filepath);
|
||||
} else {
|
||||
alert('choose file firstly.');
|
||||
}
|
||||
}
|
||||
|
||||
function drawCharts(filepath) {
|
||||
$.getJSON(filepath, function(data) {
|
||||
var numQueues = 0;
|
||||
var queueNames = new Array();
|
||||
for (var j in data[0]) {
|
||||
if (j.substring(0, 'queue'.length) === 'queue') {
|
||||
queueNames[numQueues] = j;
|
||||
numQueues ++;
|
||||
}
|
||||
}
|
||||
numQueues /= 2;
|
||||
|
||||
// create graph
|
||||
$.getJSON(filepath, function(data) {
|
||||
var basetime = data[0].time;
|
||||
data.forEach(function(d) {
|
||||
d.time = (d.time - basetime) / 1000;
|
||||
});
|
||||
|
||||
var legends = ["running.applications", "running.containers"];
|
||||
drawEachChart("#area1", data, legends, "Cluster running applications & containers", "Number", 0, 0);
|
||||
legends = ["jvm.free.memory", "jvm.max.memory", "jvm.total.memory"];
|
||||
drawEachChart("#area2", data, legends, "JVM memory", "Memory (GB)", 0, 0);
|
||||
legends = ["cluster.allocated.memory", "cluster.available.memory"];
|
||||
drawEachChart("#area3", data, legends, "Cluster allocated & available memory", "Memory (GB)", 0, 0);
|
||||
legends = ["cluster.allocated.vcores", "cluster.available.vcores"];
|
||||
drawEachChart("#area4", data, legends, "Cluster allocated & available vcores", "Number", 0, 0);
|
||||
|
||||
for (var i = 0; i < numQueues; i ++) {
|
||||
legends[i] = queueNames[i * 2];
|
||||
}
|
||||
drawEachChart("#area5", data, legends, "Queue allocated memory", "Memory (GB)", 1, 100);
|
||||
for (var i = 0; i < numQueues; i ++) {
|
||||
legends[i] = queueNames[i * 2 + 1];
|
||||
}
|
||||
drawEachChart("#area6", data, legends, "Queue allocated vcores", "VCores", 1, 90);
|
||||
|
||||
legends = [
|
||||
"scheduler.allocate.timecost",
|
||||
"scheduler.handle-NODE_ADDED.timecost", "scheduler.handle-NODE_REMOVED.timecost",
|
||||
"scheduler.handle-NODE_UPDATE.timecost", "scheduler.handle-APP_ADDED.timecost",
|
||||
"scheduler.handle-APP_REMOVED.timecost", "scheduler.handle-CONTAINER_EXPIRED.timecost"
|
||||
];
|
||||
drawEachChart("#area7", data, legends, "Scheduler allocate & handle operations timecost", "Timecost (ms)", 0, 210);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// draw different chart
|
||||
function drawEachChart(chartArea, data, legends, title, yLabelTitle, isArea, pl) {
|
||||
// drawchart
|
||||
var margin = {top: 50, right: 250, bottom: 50, left: 70};
|
||||
var width = 800 - margin.left - margin.right;
|
||||
var height = 420 - margin.top - margin.bottom;
|
||||
|
||||
var x = d3.scale.linear().range([0, width]);
|
||||
var y = d3.scale.linear().range([height, 0]);
|
||||
var xAxis = d3.svg.axis().scale(x).orient("bottom");
|
||||
var yAxis = d3.svg.axis().scale(y).orient("left");
|
||||
|
||||
var color = d3.scale.category10();
|
||||
|
||||
if (isArea == 1){
|
||||
var area = d3.svg.area()
|
||||
.x(function(d) { return x(d.time); })
|
||||
.y0(function(d) { return y(d.y0); })
|
||||
.y1(function(d) { return y(d.y0 + d.y); });
|
||||
|
||||
var stack = d3.layout.stack()
|
||||
.values(function(d) { return d.values; });
|
||||
|
||||
// create chart
|
||||
var svg = d3.select(chartArea).append("svg")
|
||||
.attr("width", width + margin.left + margin.right)
|
||||
.attr("height", height + margin.top + margin.bottom)
|
||||
.append("g")
|
||||
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||
|
||||
color.domain(d3.keys(data[0])
|
||||
.filter(function(key) {return $.inArray(key, legends) !== -1; }));
|
||||
|
||||
var points = stack(color.domain().map(function(name) {
|
||||
return {
|
||||
name: name,
|
||||
values: data.map(function(d) {
|
||||
return {time: d.time, y: d[name]};
|
||||
})
|
||||
};
|
||||
}));
|
||||
|
||||
// x & y
|
||||
x.domain(d3.extent(data, function(d) { return d.time; }));
|
||||
y.domain([
|
||||
d3.min(points, function(c) {
|
||||
return 0.9 * d3.min(c.values, function(v) { return v.y; }); }),
|
||||
d3.max(points, function(c) {
|
||||
return 1.1 * d3.max(c.values, function(v) { return v.y + v.y0; }); })
|
||||
]);
|
||||
|
||||
svg.append("g").attr("class", "x axis")
|
||||
.attr("transform", "translate(0," + height + ")")
|
||||
.call(xAxis)
|
||||
.append("text")
|
||||
.attr("transform", "translate(" + (width / 2) + ", 45)")
|
||||
.style("text-anchor", "middle")
|
||||
.text("Time (s)");
|
||||
|
||||
svg.append("g")
|
||||
.attr("class", "y axis")
|
||||
.call(yAxis)
|
||||
.append("text")
|
||||
.attr("transform", "rotate(-90)")
|
||||
.attr("y", 0 - margin.left)
|
||||
.attr("x",0 - (height / 2))
|
||||
.attr("dy", "1em")
|
||||
.style("text-anchor", "middle")
|
||||
.text(yLabelTitle);
|
||||
|
||||
var point = svg.selectAll(".point")
|
||||
.data(points)
|
||||
.enter().append("g");
|
||||
|
||||
point.append("path")
|
||||
.attr("class", "area")
|
||||
.attr("d", function(d) { return area(d.values); })
|
||||
.style("fill", function(d) { return color(d.name); });
|
||||
} else {
|
||||
// lines
|
||||
var line = d3.svg.line()
|
||||
.interpolate("basis")
|
||||
.x(function(d) { return x(d.time); })
|
||||
.y(function(d) { return y(d.value); });
|
||||
|
||||
// create chart
|
||||
var svg = d3.select(chartArea).append("svg")
|
||||
.attr("id", title)
|
||||
.attr("width", width + margin.left + margin.right)
|
||||
.attr("height", height + margin.top + margin.bottom)
|
||||
.append("g")
|
||||
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||
|
||||
color.domain(d3.keys(data[0])
|
||||
.filter(function(key) {return $.inArray(key, legends) !== -1; }));
|
||||
|
||||
var values = color.domain().map(function(name) {
|
||||
return {
|
||||
name: name,
|
||||
values: data.map(function(d) {
|
||||
return {time: d.time, value: +d[name]};
|
||||
})
|
||||
};
|
||||
});
|
||||
|
||||
// x & y
|
||||
x.domain(d3.extent(data, function(d) { return d.time; }));
|
||||
y.domain([
|
||||
d3.min(values, function(c) { return 0.9 * d3.min(c.values, function(v) { return v.value; }); }),
|
||||
d3.max(values, function(c) { return 1.1 * d3.max(c.values, function(v) { return v.value; }); })
|
||||
]);
|
||||
|
||||
svg.append("g").attr("class", "x axis")
|
||||
.attr("transform", "translate(0," + height + ")")
|
||||
.call(xAxis)
|
||||
.append("text")
|
||||
.attr("transform", "translate(" + (width / 2) + ", 45)")
|
||||
.style("text-anchor", "middle")
|
||||
.text("Time (s)");
|
||||
|
||||
svg.append("g")
|
||||
.attr("class", "y axis")
|
||||
.call(yAxis)
|
||||
.append("text")
|
||||
.attr("transform", "rotate(-90)")
|
||||
.attr("y", 0 - margin.left)
|
||||
.attr("x",0 - (height / 2))
|
||||
.attr("dy", "1em")
|
||||
.style("text-anchor", "middle")
|
||||
.text(yLabelTitle);
|
||||
|
||||
var value = svg.selectAll(".city")
|
||||
.data(values)
|
||||
.enter().append("g")
|
||||
.attr("class", "city");
|
||||
|
||||
value.append("path")
|
||||
.attr("class", "line")
|
||||
.attr("d", function(d) { return line(d.values); })
|
||||
.style("stroke", function(d) { return color(d.name); });
|
||||
}
|
||||
// title
|
||||
svg.append("text")
|
||||
.attr("x", (width / 2))
|
||||
.attr("y", 10 - (margin.top / 2))
|
||||
.attr("text-anchor", "middle")
|
||||
.text(title);
|
||||
|
||||
// legend
|
||||
var legend = svg.append("g")
|
||||
.attr("class", "legend")
|
||||
.attr("x", width - 50)
|
||||
.attr("y", 25)
|
||||
.attr("height", 120)
|
||||
.attr("width", 140);
|
||||
|
||||
legend.selectAll('g').data(legends)
|
||||
.enter()
|
||||
.append('g')
|
||||
.each(function(d, i) {
|
||||
var g = d3.select(this);
|
||||
g.append("rect")
|
||||
.attr("x", width - 5 - pl)
|
||||
.attr("y", i*20 + 0)
|
||||
.attr("width", 10)
|
||||
.attr("height", 10)
|
||||
.style("fill", color(d));
|
||||
|
||||
g.append("text")
|
||||
.attr("x", width + 15 - pl)
|
||||
.attr("y", i * 20 + 8)
|
||||
.attr("height",30)
|
||||
.attr("width",250)
|
||||
.style("fill", color(d))
|
||||
.text(d);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
278
hadoop-tools/hadoop-sls/src/main/html/simulate.html.template
Normal file
@ -0,0 +1,278 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||
<style type="text/css">
|
||||
body '{' font: 20px sans-serif; '}'
|
||||
.axis path,
|
||||
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
|
||||
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||
.legend '{'
|
||||
padding: 5px;
|
||||
font: 18px sans-serif;
|
||||
background: yellow;
|
||||
box-shadow: 2px 2px 1px #888;
|
||||
'}'
|
||||
.title '{' font: 24px sans-serif; '}'
|
||||
.divborder '{'
|
||||
border-width: 1px;
|
||||
border-style: solid;
|
||||
border-color: black;
|
||||
margin-top:10px
|
||||
'}'
|
||||
</style>
|
||||
<script src="js/thirdparty/d3.v3.js"></script>
|
||||
<script src="js/thirdparty/jquery.js"></script>
|
||||
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="row">
|
||||
<div class="span10 offset2"><br>
|
||||
<input type="button" style="float: right;" value="Stop"
|
||||
onClick="stop()" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
|
||||
<div class="divborder span8" id="area2"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
|
||||
<div class="divborder span8" id="area4"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
|
||||
<div class="divborder span8" id="area6"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
|
||||
<div class="span8" id="area8"></div>
|
||||
</div><br/><br/>
|
||||
|
||||
<script>
|
||||
var basetime = 0;
|
||||
var running = 1;
|
||||
var data = [];
|
||||
var width, height;
|
||||
var legends = [];
|
||||
var titles = [];
|
||||
var yLabels = [];
|
||||
var isAreas = [];
|
||||
var svgs = [];
|
||||
var xs = [];
|
||||
var ys = [];
|
||||
var xAxiss = [];
|
||||
var yAxiss = [];
|
||||
var lineAreas = [];
|
||||
var stacks = [];
|
||||
|
||||
// legends
|
||||
legends[0] = [''running.applications'', ''running.containers''];
|
||||
legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
|
||||
legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
|
||||
legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
|
||||
legends[4] = [];
|
||||
legends[5] = [];
|
||||
{0}
|
||||
legends[6] = [''scheduler.allocate.timecost'',
|
||||
''scheduler.handle-NODE_ADDED.timecost'',
|
||||
''scheduler.handle-NODE_REMOVED.timecost'',
|
||||
''scheduler.handle-NODE_UPDATE.timecost'',
|
||||
''scheduler.handle-APP_ADDED.timecost'',
|
||||
''scheduler.handle-APP_REMOVED.timecost'',
|
||||
''scheduler.handle-CONTAINER_EXPIRED.timecost''];
|
||||
|
||||
// title
|
||||
titles[0] = ''Cluster running applications & containers'';
|
||||
titles[1] = ''JVM memory'';
|
||||
titles[2] = ''Cluster allocated & available memory'';
|
||||
titles[3] = ''Cluster allocated & available vcores'';
|
||||
titles[4] = ''Queue allocated memory'';
|
||||
titles[5] = ''Queue allocated vcores'';
|
||||
titles[6] = ''Scheduler allocate & handle operation timecost'';
|
||||
|
||||
// ylabels
|
||||
yLabels[0] = ''Number'';
|
||||
yLabels[1] = ''Memory (GB)'';
|
||||
yLabels[2] = ''Memory (GB)'';
|
||||
yLabels[3] = ''Number'';
|
||||
yLabels[4] = ''Memory (GB)'';
|
||||
yLabels[5] = ''Number'';
|
||||
yLabels[6] = ''Timecost (ms)'';
|
||||
|
||||
// is area?
|
||||
isAreas = [0, 0, 0, 0, 1, 1, 0];
|
||||
|
||||
// draw all charts
|
||||
for (var i = 0; i < 7; i ++) '{'
|
||||
drawEachChart(i);
|
||||
'}'
|
||||
|
||||
// draw each chart
|
||||
function drawEachChart(index) '{'
|
||||
var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
|
||||
width = 750 - margin.left - margin.right;
|
||||
height = 420 - margin.top - margin.bottom;
|
||||
|
||||
xs[index] = d3.scale.linear().range([0, width]);
|
||||
ys[index] = d3.scale.linear().range([height, 0]);
|
||||
xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
|
||||
yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
|
||||
|
||||
if (isAreas[index] == 1)'{'
|
||||
lineAreas[index] = d3.svg.area()
|
||||
.x(function(d) '{' return xs[index](d.time); '}')
|
||||
.y0(function(d) '{' return ys[index](d.y0); '}')
|
||||
.y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
|
||||
|
||||
stacks[index] = d3.layout.stack()
|
||||
.values(function(d) '{' return d.values; '}');
|
||||
'}' else '{'
|
||||
lineAreas[index] = d3.svg.line()
|
||||
.interpolate(''basis'')
|
||||
.x(function(d) '{' return xs[index](d.time); '}')
|
||||
.y(function(d) '{' return ys[index](d.value); '}');
|
||||
'}'
|
||||
|
||||
svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
|
||||
.attr(''width'', width + margin.left + margin.right)
|
||||
.attr(''height'', height + margin.top + margin.bottom)
|
||||
.append(''g'')
|
||||
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||
|
||||
// x, y and title
|
||||
svgs[index].append(''text'')
|
||||
.attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
|
||||
(height + margin.bottom - 10 ) + '')'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(''Time ({1})'');
|
||||
|
||||
svgs[index].append(''text'')
|
||||
.attr(''transform'', ''rotate(-90)'')
|
||||
.attr(''y'', 0 - margin.left)
|
||||
.attr(''x'',0 - (height / 2))
|
||||
.attr(''dy'', ''1em'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(yLabels[index]);
|
||||
|
||||
svgs[index].append(''text'')
|
||||
.attr(''x'', (width / 2))
|
||||
.attr(''y'', 10 - (margin.top / 2))
|
||||
.attr(''text-anchor'', ''middle'')
|
||||
.text(titles[index]);
|
||||
'}'
|
||||
|
||||
// request data
|
||||
function requestData() '{'
|
||||
$.ajax('{'url: ''simulateMetrics'',
|
||||
success: function(point) '{'
|
||||
// update data
|
||||
if (basetime == 0) basetime = point.time;
|
||||
point.time = (point.time - basetime) / {2};
|
||||
data.push(point);
|
||||
|
||||
// clear old
|
||||
for (var i = 0; i < 7; i ++) '{'
|
||||
svgs[i].selectAll(''g.tick'').remove();
|
||||
svgs[i].selectAll(''g'').remove();
|
||||
var color = d3.scale.category10();
|
||||
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||
return $.inArray(key, legends[i]) !== -1;
|
||||
'}'));
|
||||
|
||||
var values;
|
||||
if (isAreas[i] == 1) '{'
|
||||
values = stacks[i](color.domain().map(function(name) '{'
|
||||
return '{'
|
||||
name: name,
|
||||
values: data.map(function(d) '{'
|
||||
return '{'time: d.time, y: d[name]'}';
|
||||
'}')
|
||||
'}'
|
||||
'}'));
|
||||
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||
ys[i].domain([
|
||||
d3.min(values, function(c) '{' return 0; '}'),
|
||||
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||
function(v) '{' return v.y + v.y0; '}'); '}')
|
||||
]);
|
||||
'}' else '{'
|
||||
values = color.domain().map(function(name) '{'
|
||||
return '{'
|
||||
name: name,
|
||||
values: data.map(function(d) '{'
|
||||
return '{'time: d.time, value: d[name]'}';
|
||||
'}')
|
||||
'}'
|
||||
'}');
|
||||
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||
ys[i].domain([
|
||||
d3.min(values, function(c) '{' return 0; '}'),
|
||||
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||
function(v) '{' return v.value; '}'); '}')
|
||||
]);
|
||||
'}'
|
||||
|
||||
svgs[i].append(''g'').attr(''class'', ''x axis'')
|
||||
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
|
||||
|
||||
svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
|
||||
|
||||
var value = svgs[i].selectAll(''.path'')
|
||||
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||
|
||||
if(isAreas[i] == 1) '{'
|
||||
value.append(''path'').attr(''class'', ''area'')
|
||||
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||
.style(''fill'', function(d) '{'return color(d.name); '}');
|
||||
'}' else '{'
|
||||
value.append(''path'').attr(''class'', ''line'')
|
||||
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||
'}'
|
||||
|
||||
// legend
|
||||
var legend = svgs[i].append(''g'')
|
||||
.attr(''class'', ''legend'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', 25)
|
||||
.attr(''height'', 120)
|
||||
.attr(''width'', 140);
|
||||
legend.selectAll(''g'').data(legends[i])
|
||||
.enter()
|
||||
.append(''g'')
|
||||
.each(function(d, i) '{'
|
||||
var g = d3.select(this);
|
||||
g.append(''rect'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', i*20)
|
||||
.attr(''width'', 10)
|
||||
.attr(''height'', 10)
|
||||
.style(''fill'', color(d));
|
||||
g.append(''text'')
|
||||
.attr(''x'', width + 25)
|
||||
.attr(''y'', i * 20 + 8)
|
||||
.attr(''height'',30)
|
||||
.attr(''width'',250)
|
||||
.style(''fill'', color(d))
|
||||
.text(d);
|
||||
'}');
|
||||
'}'
|
||||
|
||||
if(running == 1)
|
||||
setTimeout(requestData, {3});
|
||||
'}',
|
||||
cache: false
|
||||
'}');
|
||||
'}'
|
||||
|
||||
// stop
|
||||
function stop() '{'
|
||||
running = 0;
|
||||
'}'
|
||||
requestData();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,50 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style type="text/css">
|
||||
.td1 '{'
|
||||
border-width: 1px;
|
||||
padding: 8px;
|
||||
border-style: solid;
|
||||
border-color: #666666;
|
||||
background-color: #dedede;
|
||||
width: 50%;
|
||||
'}'
|
||||
table.gridtable '{'
|
||||
font-family: verdana,arial,sans-serif;
|
||||
font-size:11px;
|
||||
color:#333333;
|
||||
border-width: 1px;
|
||||
border-color: #666666;
|
||||
border-collapse: collapse;
|
||||
margin-top: 80px;
|
||||
'}'
|
||||
.td2 '{'
|
||||
border-width: 1px;
|
||||
padding: 8px;
|
||||
border-style: solid;
|
||||
border-color: #666666;
|
||||
background-color: #ffffff;
|
||||
width: 50%;
|
||||
'}'
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<table class="gridtable" align="center" width="400px">
|
||||
<tr>
|
||||
<td colspan="2" class="td2" align="center">
|
||||
<b>SLS Simulate Information</b>
|
||||
</td>
|
||||
</tr>
|
||||
{0}
|
||||
<tr>
|
||||
<td align="center" height="80px">
|
||||
<a href="simulate">Simulation Charts</a>
|
||||
</td>
|
||||
<td align="center">
|
||||
<a href="track">Tracked Jobs & Queues</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
193
hadoop-tools/hadoop-sls/src/main/html/track.html.template
Normal file
@ -0,0 +1,193 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||
<style type="text/css">
|
||||
body '{' font: 20px sans-serif;'}'
|
||||
.axis path,
|
||||
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
|
||||
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||
.legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
|
||||
box-shadow: 2px 2px 1px #888;'}'
|
||||
.title '{' font: 24px sans-serif; '}'
|
||||
.divborder '{' border-width: 1px; border-style: solid; border-color: black;
|
||||
margin-top:10px '}'
|
||||
</style>
|
||||
<script src="js/thirdparty/d3.v3.js"></script>
|
||||
<script src="js/thirdparty/jquery.js"></script>
|
||||
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="row">
|
||||
<div class="offset4 span8"><br/><br/><br/>
|
||||
Select Tracked Job/Queue:
|
||||
<select id="trackedSelect" onchange="redrawChart()">
|
||||
<option>----Queue----</option>
|
||||
{0}
|
||||
<option>----Job----</option>
|
||||
{1}
|
||||
</select>
|
||||
<input type="button" style="float: right;" value="Stop"
|
||||
onClick="stop()" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span9 offset4" id="area1"></div>
|
||||
</div>
|
||||
<script>
|
||||
// global variables
|
||||
var basetime = 0;
|
||||
var running = 1;
|
||||
var para = '''';
|
||||
var data = [];
|
||||
var path, line, svg;
|
||||
var x, y;
|
||||
var width, height;
|
||||
var xAxis, yAxis;
|
||||
var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
|
||||
''minshare.memory'', ''fairshare.memory''];
|
||||
|
||||
// stop function
|
||||
function stop() '{'
|
||||
running = 0;
|
||||
'}'
|
||||
|
||||
// select changed event
|
||||
function redrawChart() '{'
|
||||
var value = $(''#trackedSelect'').val();
|
||||
if (value.substring(0, ''Job ''.length) === ''Job ''
|
||||
|| value.substring(0, ''Queue ''.length) === ''Queue '') '{'
|
||||
para = value;
|
||||
running = 0;
|
||||
basetime = 0;
|
||||
data = [];
|
||||
$(''#area1'').empty();
|
||||
drawChart(''Tracking '' + value);
|
||||
running = 1;
|
||||
requestData();
|
||||
}
|
||||
}
|
||||
|
||||
// draw chart
|
||||
function drawChart(title) '{'
|
||||
// location
|
||||
var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
|
||||
width = 800 - margin.left - margin.right;
|
||||
height = 420 - margin.top - margin.bottom;
|
||||
x = d3.scale.linear().range([0, width]);
|
||||
y = d3.scale.linear().range([height, 0]);
|
||||
xAxis = d3.svg.axis().scale(x).orient(''bottom'');
|
||||
yAxis = d3.svg.axis().scale(y).orient(''left'');
|
||||
// lines
|
||||
line = d3.svg.line().interpolate(''basis'')
|
||||
.x(function(d) '{' return x(d.time); })
|
||||
.y(function(d) '{' return y(d.value); });
|
||||
// create chart
|
||||
svg = d3.select(''#area1'').append(''svg'')
|
||||
.attr(''width'', width + margin.left + margin.right)
|
||||
.attr(''height'', height + margin.top + margin.bottom)
|
||||
.append(''g'')
|
||||
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||
// axis labels
|
||||
svg.append(''text'')
|
||||
.attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(''Time ({2})'');
|
||||
svg.append(''text'')
|
||||
.attr(''transform'', ''rotate(-90)'')
|
||||
.attr(''y'', 0 - margin.left)
|
||||
.attr(''x'',0 - (height / 2))
|
||||
.attr(''dy'', ''1em'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(''Memory (GB)'');
|
||||
// title
|
||||
svg.append(''text'')
|
||||
.attr(''x'', (width / 2))
|
||||
.attr(''y'', 10 - (margin.top / 2))
|
||||
.attr(''text-anchor'', ''middle'')
|
||||
.text(title);
|
||||
'}'
|
||||
|
||||
// request data
|
||||
function requestData() '{'
|
||||
$.ajax('{'url: ''trackMetrics?t='' + para,
|
||||
success: function(point) '{'
|
||||
// clear old
|
||||
svg.selectAll(''g.tick'').remove();
|
||||
svg.selectAll(''g'').remove();
|
||||
|
||||
if(basetime == 0) basetime = point.time;
|
||||
point.time = (point.time - basetime)/{3};
|
||||
data.push(point);
|
||||
|
||||
var color = d3.scale.category10();
|
||||
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||
return $.inArray(key, legends) !== -1;
|
||||
'}'));
|
||||
|
||||
var values = color.domain().map(function(name) '{'
|
||||
return '{'
|
||||
name: name,
|
||||
values: data.map(function(d) '{'
|
||||
return '{' time: d.time, value: d[name]'}';
|
||||
'}')
|
||||
'}';
|
||||
'}');
|
||||
|
||||
// set x/y range
|
||||
x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
|
||||
y.domain([
|
||||
d3.min(values, function(c) '{' return 0 '}'),
|
||||
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
|
||||
]);
|
||||
|
||||
svg.append(''g'').attr(''class'', ''x axis'')
|
||||
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
|
||||
svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
|
||||
var value = svg.selectAll(''.path'')
|
||||
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||
|
||||
value.append(''path'').attr(''class'', ''line'')
|
||||
.attr(''d'', function(d) '{'return line(d.values); '}')
|
||||
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||
|
||||
// legend
|
||||
var legend = svg.append(''g'')
|
||||
.attr(''class'', ''legend'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', 25)
|
||||
.attr(''height'', 120)
|
||||
.attr(''width'', 180);
|
||||
|
||||
legend.selectAll(''g'').data(legends)
|
||||
.enter()
|
||||
.append(''g'')
|
||||
.each(function(d, i) '{'
|
||||
var g = d3.select(this);
|
||||
g.append(''rect'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', i * 20)
|
||||
.attr(''width'', 10)
|
||||
.attr(''height'', 10)
|
||||
.style(''fill'', color(d));
|
||||
|
||||
g.append(''text'')
|
||||
.attr(''x'', width + 25)
|
||||
.attr(''y'', i * 20 + 8)
|
||||
.attr(''height'',30)
|
||||
.attr(''width'',250)
|
||||
.style(''fill'', color(d))
|
||||
.text(d);
|
||||
'}');
|
||||
|
||||
if(running == 1)
|
||||
setTimeout(requestData, {4});
|
||||
'}',
|
||||
cache: false
|
||||
'}');
|
||||
'}'
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,234 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.sls;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.CommandLineParser;
|
||||
import org.apache.commons.cli.GnuParser;
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
import org.codehaus.jackson.map.ObjectWriter;
|
||||
|
||||
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
public class RumenToSLSConverter {
|
||||
private static final String EOL = System.getProperty("line.separator");
|
||||
|
||||
private static long baseline = 0;
|
||||
private static Map<String, Set<String>> rackNodeMap =
|
||||
new TreeMap<String, Set<String>>();
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
Options options = new Options();
|
||||
options.addOption("input", true, "input rumen json file");
|
||||
options.addOption("outputJobs", true, "output jobs file");
|
||||
options.addOption("outputNodes", true, "output nodes file");
|
||||
|
||||
CommandLineParser parser = new GnuParser();
|
||||
CommandLine cmd = parser.parse(options, args);
|
||||
|
||||
if (! cmd.hasOption("input") ||
|
||||
! cmd.hasOption("outputJobs") ||
|
||||
! cmd.hasOption("outputNodes")) {
|
||||
System.err.println();
|
||||
System.err.println("ERROR: Missing input or output file");
|
||||
System.err.println();
|
||||
System.err.println("LoadGenerator creates a SLS script " +
|
||||
"from a Hadoop Rumen output");
|
||||
System.err.println();
|
||||
System.err.println("Options: -input FILE -outputJobs FILE " +
|
||||
"-outputNodes FILE");
|
||||
System.err.println();
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
String inputFile = cmd.getOptionValue("input");
|
||||
String outputJsonFile = cmd.getOptionValue("outputJobs");
|
||||
String outputNodeFile = cmd.getOptionValue("outputNodes");
|
||||
|
||||
// check existing
|
||||
if (! new File(inputFile).exists()) {
|
||||
System.err.println();
|
||||
System.err.println("ERROR: input does not exist");
|
||||
System.exit(1);
|
||||
}
|
||||
if (new File(outputJsonFile).exists()) {
|
||||
System.err.println();
|
||||
System.err.println("ERROR: output job file is existing");
|
||||
System.exit(1);
|
||||
}
|
||||
if (new File(outputNodeFile).exists()) {
|
||||
System.err.println();
|
||||
System.err.println("ERROR: output node file is existing");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
File jsonFile = new File(outputJsonFile);
|
||||
if (! jsonFile.getParentFile().exists()
|
||||
&& ! jsonFile.getParentFile().mkdirs()) {
|
||||
System.err.println("ERROR: Cannot create output directory in path: "
|
||||
+ jsonFile.getParentFile().getAbsoluteFile());
|
||||
System.exit(1);
|
||||
}
|
||||
File nodeFile = new File(outputNodeFile);
|
||||
if (! nodeFile.getParentFile().exists()
|
||||
&& ! nodeFile.getParentFile().mkdirs()) {
|
||||
System.err.println("ERROR: Cannot create output directory in path: "
|
||||
+ jsonFile.getParentFile().getAbsoluteFile());
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
generateSLSLoadFile(inputFile, outputJsonFile);
|
||||
generateSLSNodeFile(outputNodeFile);
|
||||
}
|
||||
|
||||
private static void generateSLSLoadFile(String inputFile, String outputFile)
|
||||
throws IOException {
|
||||
Reader input = new FileReader(inputFile);
|
||||
try {
|
||||
Writer output = new FileWriter(outputFile);
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
|
||||
Iterator<Map> i = mapper.readValues(
|
||||
new JsonFactory().createJsonParser(input), Map.class);
|
||||
while (i.hasNext()) {
|
||||
Map m = i.next();
|
||||
output.write(writer.writeValueAsString(createSLSJob(m)) + EOL);
|
||||
}
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static void generateSLSNodeFile(String outputFile)
|
||||
throws IOException {
|
||||
Writer output = new FileWriter(outputFile);
|
||||
try {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
|
||||
for (Map.Entry<String, Set<String>> entry : rackNodeMap.entrySet()) {
|
||||
Map rack = new LinkedHashMap();
|
||||
rack.put("rack", entry.getKey());
|
||||
List nodes = new ArrayList();
|
||||
for (String name : entry.getValue()) {
|
||||
Map node = new LinkedHashMap();
|
||||
node.put("node", name);
|
||||
nodes.add(node);
|
||||
}
|
||||
rack.put("nodes", nodes);
|
||||
output.write(writer.writeValueAsString(rack) + EOL);
|
||||
}
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static Map createSLSJob(Map rumenJob) {
|
||||
Map json = new LinkedHashMap();
|
||||
long jobStart = (Long) rumenJob.get("submitTime");
|
||||
long jobFinish = (Long) rumenJob.get("finishTime");
|
||||
String jobId = rumenJob.get("jobID").toString();
|
||||
String queue = rumenJob.get("queue").toString();
|
||||
String user = rumenJob.get("user").toString();
|
||||
if (baseline == 0) {
|
||||
baseline = jobStart;
|
||||
}
|
||||
jobStart -= baseline;
|
||||
jobFinish -= baseline;
|
||||
long offset = 0;
|
||||
if (jobStart < 0) {
|
||||
System.out.println("Warning: reset job " + jobId + " start time to 0.");
|
||||
offset = -jobStart;
|
||||
jobFinish = jobFinish - jobStart;
|
||||
jobStart = 0;
|
||||
}
|
||||
|
||||
json.put("am.type", "mapreduce");
|
||||
json.put("job.start.ms", jobStart);
|
||||
json.put("job.end.ms", jobFinish);
|
||||
json.put("job.queue.name", queue);
|
||||
json.put("job.id", jobId);
|
||||
json.put("job.user", user);
|
||||
|
||||
List maps = createSLSTasks("map",
|
||||
(List) rumenJob.get("mapTasks"), offset);
|
||||
List reduces = createSLSTasks("reduce",
|
||||
(List) rumenJob.get("reduceTasks"), offset);
|
||||
List tasks = new ArrayList();
|
||||
tasks.addAll(maps);
|
||||
tasks.addAll(reduces);
|
||||
json.put("job.tasks", tasks);
|
||||
return json;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static List createSLSTasks(String taskType,
|
||||
List rumenTasks, long offset) {
|
||||
int priority = taskType.equals("reduce") ? 10 : 20;
|
||||
List array = new ArrayList();
|
||||
for (Object e : rumenTasks) {
|
||||
Map rumenTask = (Map) e;
|
||||
for (Object ee : (List) rumenTask.get("attempts")) {
|
||||
Map rumenAttempt = (Map) ee;
|
||||
long taskStart = (Long) rumenAttempt.get("startTime");
|
||||
long taskFinish = (Long) rumenAttempt.get("finishTime");
|
||||
String hostname = (String) rumenAttempt.get("hostName");
|
||||
taskStart = taskStart - baseline + offset;
|
||||
taskFinish = taskFinish - baseline + offset;
|
||||
Map task = new LinkedHashMap();
|
||||
task.put("container.host", hostname);
|
||||
task.put("container.start.ms", taskStart);
|
||||
task.put("container.end.ms", taskFinish);
|
||||
task.put("container.priority", priority);
|
||||
task.put("container.type", taskType);
|
||||
array.add(task);
|
||||
String rackHost[] = SLSUtils.getRackHostName(hostname);
|
||||
if (rackNodeMap.containsKey(rackHost[0])) {
|
||||
rackNodeMap.get(rackHost[0]).add(rackHost[1]);
|
||||
} else {
|
||||
Set<String> hosts = new TreeSet<String>();
|
||||
hosts.add(rackHost[1]);
|
||||
rackNodeMap.put(rackHost[0], hosts);
|
||||
}
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
}
|
@ -0,0 +1,526 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.sls;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Random;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
||||
import org.apache.hadoop.tools.rumen.LoggedJob;
|
||||
import org.apache.hadoop.tools.rumen.LoggedTask;
|
||||
import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.sls.appmaster.AMSimulator;
|
||||
import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
|
||||
import org.apache.hadoop.yarn.sls.nodemanager.NMSimulator;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.CommandLineParser;
|
||||
import org.apache.commons.cli.GnuParser;
|
||||
import org.apache.commons.cli.Options;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class SLSRunner {
|
||||
// RM, Runner
|
||||
private ResourceManager rm;
|
||||
private static TaskRunner runner = new TaskRunner();
|
||||
private String[] inputTraces;
|
||||
private Configuration conf;
|
||||
private Map<String, Integer> queueAppNumMap;
|
||||
|
||||
// NM simulator
|
||||
private HashMap<NodeId, NMSimulator> nmMap;
|
||||
private int nmMemoryMB, nmVCores;
|
||||
private String nodeFile;
|
||||
|
||||
// AM simulator
|
||||
private int AM_ID;
|
||||
private Map<String, AMSimulator> amMap;
|
||||
private Set<String> trackedApps;
|
||||
private Map<String, Class> amClassMap;
|
||||
private static int remainingApps = 0;
|
||||
|
||||
// metrics
|
||||
private String metricsOutputDir;
|
||||
private boolean printSimulation;
|
||||
|
||||
// other simulation information
|
||||
private int numNMs, numRacks, numAMs, numTasks;
|
||||
private long maxRuntime;
|
||||
public final static Map<String, Object> simulateInfoMap =
|
||||
new HashMap<String, Object>();
|
||||
|
||||
// logger
|
||||
public final static Logger LOG = Logger.getLogger(SLSRunner.class);
|
||||
|
||||
// input traces, input-rumen or input-sls
|
||||
private boolean isSLS;
|
||||
|
||||
public SLSRunner(boolean isSLS, String inputTraces[], String nodeFile,
|
||||
String outputDir, Set<String> trackedApps,
|
||||
boolean printsimulation)
|
||||
throws IOException, ClassNotFoundException {
|
||||
this.isSLS = isSLS;
|
||||
this.inputTraces = inputTraces.clone();
|
||||
this.nodeFile = nodeFile;
|
||||
this.trackedApps = trackedApps;
|
||||
this.printSimulation = printsimulation;
|
||||
metricsOutputDir = outputDir;
|
||||
|
||||
nmMap = new HashMap<NodeId, NMSimulator>();
|
||||
queueAppNumMap = new HashMap<String, Integer>();
|
||||
amMap = new HashMap<String, AMSimulator>();
|
||||
amClassMap = new HashMap<String, Class>();
|
||||
|
||||
// runner configuration
|
||||
conf = new Configuration(false);
|
||||
conf.addResource("sls-runner.xml");
|
||||
// runner
|
||||
int poolSize = conf.getInt(SLSConfiguration.RUNNER_POOL_SIZE,
|
||||
SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT);
|
||||
SLSRunner.runner.setQueueSize(poolSize);
|
||||
// <AMType, Class> map
|
||||
for (Map.Entry e : conf) {
|
||||
String key = e.getKey().toString();
|
||||
if (key.startsWith(SLSConfiguration.AM_TYPE)) {
|
||||
String amType = key.substring(SLSConfiguration.AM_TYPE.length());
|
||||
amClassMap.put(amType, Class.forName(conf.get(key)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void start() throws Exception {
|
||||
// start resource manager
|
||||
startRM();
|
||||
// start node managers
|
||||
startNM();
|
||||
// start application masters
|
||||
startAM();
|
||||
// set queue & tracked apps information
|
||||
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||
.setQueueSet(this.queueAppNumMap.keySet());
|
||||
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||
.setTrackedAppSet(this.trackedApps);
|
||||
// print out simulation info
|
||||
printSimulationInfo();
|
||||
// blocked until all nodes RUNNING
|
||||
waitForNodesRunning();
|
||||
// starting the runner once everything is ready to go,
|
||||
runner.start();
|
||||
}
|
||||
|
||||
private void startRM() throws IOException, ClassNotFoundException {
|
||||
Configuration rmConf = new YarnConfiguration();
|
||||
String schedulerClass = rmConf.get(YarnConfiguration.RM_SCHEDULER);
|
||||
rmConf.set(SLSConfiguration.RM_SCHEDULER, schedulerClass);
|
||||
rmConf.set(YarnConfiguration.RM_SCHEDULER,
|
||||
ResourceSchedulerWrapper.class.getName());
|
||||
rmConf.set(SLSConfiguration.METRICS_OUTPUT_DIR, metricsOutputDir);
|
||||
rm = new ResourceManager();
|
||||
rm.init(rmConf);
|
||||
rm.start();
|
||||
}
|
||||
|
||||
private void startNM() throws YarnException, IOException {
|
||||
// nm configuration
|
||||
nmMemoryMB = conf.getInt(SLSConfiguration.NM_MEMORY_MB,
|
||||
SLSConfiguration.NM_MEMORY_MB_DEFAULT);
|
||||
nmVCores = conf.getInt(SLSConfiguration.NM_VCORES,
|
||||
SLSConfiguration.NM_VCORES_DEFAULT);
|
||||
int heartbeatInterval = conf.getInt(
|
||||
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS,
|
||||
SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS_DEFAULT);
|
||||
// nm information (fetch from topology file, or from sls/rumen json file)
|
||||
Set<String> nodeSet = new HashSet<String>();
|
||||
if (nodeFile.isEmpty()) {
|
||||
if (isSLS) {
|
||||
for (String inputTrace : inputTraces) {
|
||||
nodeSet.addAll(SLSUtils.parseNodesFromSLSTrace(inputTrace));
|
||||
}
|
||||
} else {
|
||||
for (String inputTrace : inputTraces) {
|
||||
nodeSet.addAll(SLSUtils.parseNodesFromRumenTrace(inputTrace));
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
nodeSet.addAll(SLSUtils.parseNodesFromNodeFile(nodeFile));
|
||||
}
|
||||
// create NM simulators
|
||||
Random random = new Random();
|
||||
Set<String> rackSet = new HashSet<String>();
|
||||
for (String hostName : nodeSet) {
|
||||
// we randomize the heartbeat start time from zero to 1 interval
|
||||
NMSimulator nm = new NMSimulator();
|
||||
nm.init(hostName, nmMemoryMB, nmVCores,
|
||||
random.nextInt(heartbeatInterval), heartbeatInterval, rm);
|
||||
nmMap.put(nm.getNode().getNodeID(), nm);
|
||||
runner.schedule(nm);
|
||||
rackSet.add(nm.getNode().getRackName());
|
||||
}
|
||||
numRacks = rackSet.size();
|
||||
numNMs = nmMap.size();
|
||||
}
|
||||
|
||||
private void waitForNodesRunning() throws InterruptedException {
|
||||
long startTimeMS = System.currentTimeMillis();
|
||||
while (true) {
|
||||
int numRunningNodes = 0;
|
||||
for (RMNode node : rm.getRMContext().getRMNodes().values()) {
|
||||
if (node.getState() == NodeState.RUNNING) {
|
||||
numRunningNodes ++;
|
||||
}
|
||||
}
|
||||
if (numRunningNodes == numNMs) {
|
||||
break;
|
||||
}
|
||||
LOG.info(MessageFormat.format("SLSRunner is waiting for all " +
|
||||
"nodes RUNNING. {0} of {1} NMs initialized.",
|
||||
numRunningNodes, numNMs));
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
LOG.info(MessageFormat.format("SLSRunner takes {0} ms to launch all nodes.",
|
||||
(System.currentTimeMillis() - startTimeMS)));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void startAM() throws YarnException, IOException {
|
||||
// application/container configuration
|
||||
int heartbeatInterval = conf.getInt(
|
||||
SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS,
|
||||
SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
|
||||
int containerMemoryMB = conf.getInt(SLSConfiguration.CONTAINER_MEMORY_MB,
|
||||
SLSConfiguration.CONTAINER_MEMORY_MB_DEFAULT);
|
||||
int containerVCores = conf.getInt(SLSConfiguration.CONTAINER_VCORES,
|
||||
SLSConfiguration.CONTAINER_VCORES_DEFAULT);
|
||||
Resource containerResource =
|
||||
BuilderUtils.newResource(containerMemoryMB, containerVCores);
|
||||
|
||||
// application workload
|
||||
if (isSLS) {
|
||||
startAMFromSLSTraces(containerResource, heartbeatInterval);
|
||||
} else {
|
||||
startAMFromRumenTraces(containerResource, heartbeatInterval);
|
||||
}
|
||||
numAMs = amMap.size();
|
||||
remainingApps = numAMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse workload information from sls trace files
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private void startAMFromSLSTraces(Resource containerResource,
|
||||
int heartbeatInterval) throws IOException {
|
||||
// parse from sls traces
|
||||
JsonFactory jsonF = new JsonFactory();
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
for (String inputTrace : inputTraces) {
|
||||
Reader input = new FileReader(inputTrace);
|
||||
try {
|
||||
Iterator<Map> i = mapper.readValues(jsonF.createJsonParser(input),
|
||||
Map.class);
|
||||
while (i.hasNext()) {
|
||||
Map jsonJob = i.next();
|
||||
|
||||
// load job information
|
||||
long jobStartTime = Long.parseLong(
|
||||
jsonJob.get("job.start.ms").toString());
|
||||
long jobFinishTime = Long.parseLong(
|
||||
jsonJob.get("job.end.ms").toString());
|
||||
|
||||
String user = (String) jsonJob.get("job.user");
|
||||
if (user == null) user = "default";
|
||||
String queue = jsonJob.get("job.queue.name").toString();
|
||||
|
||||
String oldAppId = jsonJob.get("job.id").toString();
|
||||
boolean isTracked = trackedApps.contains(oldAppId);
|
||||
int queueSize = queueAppNumMap.containsKey(queue) ?
|
||||
queueAppNumMap.get(queue) : 0;
|
||||
queueSize ++;
|
||||
queueAppNumMap.put(queue, queueSize);
|
||||
// tasks
|
||||
List tasks = (List) jsonJob.get("job.tasks");
|
||||
if (tasks == null || tasks.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
List<ContainerSimulator> containerList =
|
||||
new ArrayList<ContainerSimulator>();
|
||||
for (Object o : tasks) {
|
||||
Map jsonTask = (Map) o;
|
||||
String hostname = jsonTask.get("container.host").toString();
|
||||
long taskStart = Long.parseLong(
|
||||
jsonTask.get("container.start.ms").toString());
|
||||
long taskFinish = Long.parseLong(
|
||||
jsonTask.get("container.end.ms").toString());
|
||||
long lifeTime = taskFinish - taskStart;
|
||||
int priority = Integer.parseInt(
|
||||
jsonTask.get("container.priority").toString());
|
||||
String type = jsonTask.get("container.type").toString();
|
||||
containerList.add(new ContainerSimulator(containerResource,
|
||||
lifeTime, hostname, priority, type));
|
||||
}
|
||||
|
||||
// create a new AM
|
||||
String amType = jsonJob.get("am.type").toString();
|
||||
AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
|
||||
amClassMap.get(amType), new Configuration());
|
||||
if (amSim != null) {
|
||||
amSim.init(AM_ID++, heartbeatInterval, containerList, rm,
|
||||
this, jobStartTime, jobFinishTime, user, queue,
|
||||
isTracked, oldAppId);
|
||||
runner.schedule(amSim);
|
||||
maxRuntime = Math.max(maxRuntime, jobFinishTime);
|
||||
numTasks += containerList.size();
|
||||
amMap.put(oldAppId, amSim);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* parse workload information from rumen trace files
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private void startAMFromRumenTraces(Resource containerResource,
|
||||
int heartbeatInterval)
|
||||
throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", "file:///");
|
||||
long baselineTimeMS = 0;
|
||||
for (String inputTrace : inputTraces) {
|
||||
File fin = new File(inputTrace);
|
||||
JobTraceReader reader = new JobTraceReader(
|
||||
new Path(fin.getAbsolutePath()), conf);
|
||||
try {
|
||||
LoggedJob job = null;
|
||||
while ((job = reader.getNext()) != null) {
|
||||
// only support MapReduce currently
|
||||
String jobType = "mapreduce";
|
||||
String user = job.getUser() == null ?
|
||||
"default" : job.getUser().getValue();
|
||||
String jobQueue = job.getQueue().getValue();
|
||||
String oldJobId = job.getJobID().toString();
|
||||
long jobStartTimeMS = job.getSubmitTime();
|
||||
long jobFinishTimeMS = job.getFinishTime();
|
||||
if (baselineTimeMS == 0) {
|
||||
baselineTimeMS = jobStartTimeMS;
|
||||
}
|
||||
jobStartTimeMS -= baselineTimeMS;
|
||||
jobFinishTimeMS -= baselineTimeMS;
|
||||
if (jobStartTimeMS < 0) {
|
||||
LOG.warn("Warning: reset job " + oldJobId + " start time to 0.");
|
||||
jobFinishTimeMS = jobFinishTimeMS - jobStartTimeMS;
|
||||
jobStartTimeMS = 0;
|
||||
}
|
||||
|
||||
boolean isTracked = trackedApps.contains(oldJobId);
|
||||
int queueSize = queueAppNumMap.containsKey(jobQueue) ?
|
||||
queueAppNumMap.get(jobQueue) : 0;
|
||||
queueSize ++;
|
||||
queueAppNumMap.put(jobQueue, queueSize);
|
||||
|
||||
List<ContainerSimulator> containerList =
|
||||
new ArrayList<ContainerSimulator>();
|
||||
// map tasks
|
||||
for(LoggedTask mapTask : job.getMapTasks()) {
|
||||
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
|
||||
.get(mapTask.getAttempts().size() - 1);
|
||||
String hostname = taskAttempt.getHostName().getValue();
|
||||
long containerLifeTime = taskAttempt.getFinishTime()
|
||||
- taskAttempt.getStartTime();
|
||||
containerList.add(new ContainerSimulator(containerResource,
|
||||
containerLifeTime, hostname, 10, "map"));
|
||||
}
|
||||
|
||||
// reduce tasks
|
||||
for(LoggedTask reduceTask : job.getReduceTasks()) {
|
||||
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
|
||||
.get(reduceTask.getAttempts().size() - 1);
|
||||
String hostname = taskAttempt.getHostName().getValue();
|
||||
long containerLifeTime = taskAttempt.getFinishTime()
|
||||
- taskAttempt.getStartTime();
|
||||
containerList.add(new ContainerSimulator(containerResource,
|
||||
containerLifeTime, hostname, 20, "reduce"));
|
||||
}
|
||||
|
||||
// create a new AM
|
||||
AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
|
||||
amClassMap.get(jobType), conf);
|
||||
if (amSim != null) {
|
||||
amSim.init(AM_ID ++, heartbeatInterval, containerList,
|
||||
rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue,
|
||||
isTracked, oldJobId);
|
||||
runner.schedule(amSim);
|
||||
maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
|
||||
numTasks += containerList.size();
|
||||
amMap.put(oldJobId, amSim);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void printSimulationInfo() {
|
||||
if (printSimulation) {
|
||||
// node
|
||||
LOG.info("------------------------------------");
|
||||
LOG.info(MessageFormat.format("# nodes = {0}, # racks = {1}, capacity " +
|
||||
"of each node {2} MB memory and {3} vcores.",
|
||||
numNMs, numRacks, nmMemoryMB, nmVCores));
|
||||
LOG.info("------------------------------------");
|
||||
// job
|
||||
LOG.info(MessageFormat.format("# applications = {0}, # total " +
|
||||
"tasks = {1}, average # tasks per application = {2}",
|
||||
numAMs, numTasks, (int)(Math.ceil((numTasks + 0.0) / numAMs))));
|
||||
LOG.info("JobId\tQueue\tAMType\tDuration\t#Tasks");
|
||||
for (Map.Entry<String, AMSimulator> entry : amMap.entrySet()) {
|
||||
AMSimulator am = entry.getValue();
|
||||
LOG.info(entry.getKey() + "\t" + am.getQueue() + "\t" + am.getAMType()
|
||||
+ "\t" + am.getDuration() + "\t" + am.getNumTasks());
|
||||
}
|
||||
LOG.info("------------------------------------");
|
||||
// queue
|
||||
LOG.info(MessageFormat.format("number of queues = {0} average " +
|
||||
"number of apps = {1}", queueAppNumMap.size(),
|
||||
(int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size()))));
|
||||
LOG.info("------------------------------------");
|
||||
// runtime
|
||||
LOG.info(MessageFormat.format("estimated simulation time is {0}" +
|
||||
" seconds", (long)(Math.ceil(maxRuntime / 1000.0))));
|
||||
LOG.info("------------------------------------");
|
||||
}
|
||||
// package these information in the simulateInfoMap used by other places
|
||||
simulateInfoMap.put("Number of racks", numRacks);
|
||||
simulateInfoMap.put("Number of nodes", numNMs);
|
||||
simulateInfoMap.put("Node memory (MB)", nmMemoryMB);
|
||||
simulateInfoMap.put("Node VCores", nmVCores);
|
||||
simulateInfoMap.put("Number of applications", numAMs);
|
||||
simulateInfoMap.put("Number of tasks", numTasks);
|
||||
simulateInfoMap.put("Average tasks per applicaion",
|
||||
(int)(Math.ceil((numTasks + 0.0) / numAMs)));
|
||||
simulateInfoMap.put("Number of queues", queueAppNumMap.size());
|
||||
simulateInfoMap.put("Average applications per queue",
|
||||
(int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size())));
|
||||
simulateInfoMap.put("Estimated simulate time (s)",
|
||||
(long)(Math.ceil(maxRuntime / 1000.0)));
|
||||
}
|
||||
|
||||
public HashMap<NodeId, NMSimulator> getNmMap() {
|
||||
return nmMap;
|
||||
}
|
||||
|
||||
public static TaskRunner getRunner() {
|
||||
return runner;
|
||||
}
|
||||
|
||||
public static void decreaseRemainingApps() {
|
||||
remainingApps --;
|
||||
|
||||
if (remainingApps == 0) {
|
||||
LOG.info("SLSRunner tears down.");
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
Options options = new Options();
|
||||
options.addOption("inputrumen", true, "input rumen files");
|
||||
options.addOption("inputsls", true, "input sls files");
|
||||
options.addOption("nodes", true, "input topology");
|
||||
options.addOption("output", true, "output directory");
|
||||
options.addOption("trackjobs", true,
|
||||
"jobs to be tracked during simulating");
|
||||
options.addOption("printsimulation", false,
|
||||
"print out simulation information");
|
||||
|
||||
CommandLineParser parser = new GnuParser();
|
||||
CommandLine cmd = parser.parse(options, args);
|
||||
|
||||
String inputRumen = cmd.getOptionValue("inputrumen");
|
||||
String inputSLS = cmd.getOptionValue("inputsls");
|
||||
String output = cmd.getOptionValue("output");
|
||||
|
||||
if ((inputRumen == null && inputSLS == null) || output == null) {
|
||||
System.err.println();
|
||||
System.err.println("ERROR: Missing input or output file");
|
||||
System.err.println();
|
||||
System.err.println("Options: -inputrumen|-inputsls FILE,FILE... " +
|
||||
"-output FILE [-nodes FILE] [-trackjobs JobId,JobId...] " +
|
||||
"[-printsimulation]");
|
||||
System.err.println();
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
File outputFile = new File(output);
|
||||
if (! outputFile.exists()
|
||||
&& ! outputFile.mkdirs()) {
|
||||
System.err.println("ERROR: Cannot create output directory "
|
||||
+ outputFile.getAbsolutePath());
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
Set<String> trackedJobSet = new HashSet<String>();
|
||||
if (cmd.hasOption("trackjobs")) {
|
||||
String trackjobs = cmd.getOptionValue("trackjobs");
|
||||
String jobIds[] = trackjobs.split(",");
|
||||
trackedJobSet.addAll(Arrays.asList(jobIds));
|
||||
}
|
||||
|
||||
String nodeFile = cmd.hasOption("nodes") ? cmd.getOptionValue("nodes") : "";
|
||||
|
||||
boolean isSLS = inputSLS != null;
|
||||
String inputFiles[] = isSLS ? inputSLS.split(",") : inputRumen.split(",");
|
||||
SLSRunner sls = new SLSRunner(isSLS, inputFiles, nodeFile, output,
|
||||
trackedJobSet, cmd.hasOption("printsimulation"));
|
||||
sls.start();
|
||||
}
|
||||
}
|
@ -0,0 +1,385 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.appmaster;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords
|
||||
.FinishApplicationMasterRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
||||
|
||||
import org.apache.hadoop.yarn.api.protocolrecords
|
||||
.RegisterApplicationMasterRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords
|
||||
.RegisterApplicationMasterResponse;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.LocalResource;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
|
||||
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
|
||||
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||
|
||||
public abstract class AMSimulator extends TaskRunner.Task {
|
||||
// resource manager
|
||||
protected ResourceManager rm;
|
||||
// main
|
||||
protected SLSRunner se;
|
||||
// application
|
||||
protected ApplicationId appId;
|
||||
protected ApplicationAttemptId appAttemptId;
|
||||
protected String oldAppId; // jobId from the jobhistory file
|
||||
// record factory
|
||||
protected final static RecordFactory recordFactory =
|
||||
RecordFactoryProvider.getRecordFactory(null);
|
||||
// response queue
|
||||
protected final BlockingQueue<AllocateResponse> responseQueue;
|
||||
protected int RESPONSE_ID = 1;
|
||||
// user name
|
||||
protected String user;
|
||||
// queue name
|
||||
protected String queue;
|
||||
// am type
|
||||
protected String amtype;
|
||||
// job start/end time
|
||||
protected long traceStartTimeMS;
|
||||
protected long traceFinishTimeMS;
|
||||
protected long simulateStartTimeMS;
|
||||
protected long simulateFinishTimeMS;
|
||||
// whether tracked in Metrics
|
||||
protected boolean isTracked;
|
||||
// progress
|
||||
protected int totalContainers;
|
||||
protected int finishedContainers;
|
||||
|
||||
protected final Logger LOG = Logger.getLogger(AMSimulator.class);
|
||||
|
||||
public AMSimulator() {
|
||||
this.responseQueue = new LinkedBlockingQueue<AllocateResponse>();
|
||||
}
|
||||
|
||||
public void init(int id, int heartbeatInterval,
|
||||
List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
|
||||
long traceStartTime, long traceFinishTime, String user, String queue,
|
||||
boolean isTracked, String oldAppId) {
|
||||
super.init(traceStartTime, traceStartTime + 1000000L * heartbeatInterval,
|
||||
heartbeatInterval);
|
||||
this.user = user;
|
||||
this.rm = rm;
|
||||
this.se = se;
|
||||
this.user = user;
|
||||
this.queue = queue;
|
||||
this.oldAppId = oldAppId;
|
||||
this.isTracked = isTracked;
|
||||
this.traceStartTimeMS = traceStartTime;
|
||||
this.traceFinishTimeMS = traceFinishTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* register with RM
|
||||
*/
|
||||
@Override
|
||||
public void firstStep()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
simulateStartTimeMS = System.currentTimeMillis() -
|
||||
SLSRunner.getRunner().getStartTimeMS();
|
||||
|
||||
// submit application, waiting until ACCEPTED
|
||||
submitApp();
|
||||
|
||||
// register application master
|
||||
registerAM();
|
||||
|
||||
// track app metrics
|
||||
trackApp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep()
|
||||
throws InterruptedException, YarnException, IOException {
|
||||
// process responses in the queue
|
||||
processResponseQueue();
|
||||
|
||||
// send out request
|
||||
sendContainerRequest();
|
||||
|
||||
// check whether finish
|
||||
checkStop();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
LOG.info(MessageFormat.format("Application {0} is shutting down.", appId));
|
||||
// unregister tracking
|
||||
if (isTracked) {
|
||||
untrackApp();
|
||||
}
|
||||
// unregister application master
|
||||
final FinishApplicationMasterRequest finishAMRequest = recordFactory
|
||||
.newRecordInstance(FinishApplicationMasterRequest.class);
|
||||
finishAMRequest.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
|
||||
|
||||
try {
|
||||
UserGroupInformation ugi =
|
||||
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||
Token<AMRMTokenIdentifier> token =
|
||||
rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
|
||||
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||
ugi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||
@Override
|
||||
public Object run() throws Exception {
|
||||
rm.getApplicationMasterService()
|
||||
.finishApplicationMaster(finishAMRequest);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
simulateFinishTimeMS = System.currentTimeMillis() -
|
||||
SLSRunner.getRunner().getStartTimeMS();
|
||||
// record job running information
|
||||
((ResourceSchedulerWrapper)rm.getResourceScheduler())
|
||||
.addAMRuntime(appId,
|
||||
traceStartTimeMS, traceFinishTimeMS,
|
||||
simulateStartTimeMS, simulateFinishTimeMS);
|
||||
}
|
||||
|
||||
protected ResourceRequest createResourceRequest(
|
||||
Resource resource, String host, int priority, int numContainers) {
|
||||
ResourceRequest request = recordFactory
|
||||
.newRecordInstance(ResourceRequest.class);
|
||||
request.setCapability(resource);
|
||||
request.setResourceName(host);
|
||||
request.setNumContainers(numContainers);
|
||||
Priority prio = recordFactory.newRecordInstance(Priority.class);
|
||||
prio.setPriority(priority);
|
||||
request.setPriority(prio);
|
||||
return request;
|
||||
}
|
||||
|
||||
protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask,
|
||||
List<ContainerId> toRelease) {
|
||||
AllocateRequest allocateRequest =
|
||||
recordFactory.newRecordInstance(AllocateRequest.class);
|
||||
allocateRequest.setResponseId(RESPONSE_ID ++);
|
||||
allocateRequest.setAskList(ask);
|
||||
allocateRequest.setReleaseList(toRelease);
|
||||
return allocateRequest;
|
||||
}
|
||||
|
||||
protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask) {
|
||||
return createAllocateRequest(ask, new ArrayList<ContainerId>());
|
||||
}
|
||||
|
||||
protected abstract void processResponseQueue()
|
||||
throws InterruptedException, YarnException, IOException;
|
||||
|
||||
protected abstract void sendContainerRequest()
|
||||
throws YarnException, IOException, InterruptedException;
|
||||
|
||||
protected abstract void checkStop();
|
||||
|
||||
private void submitApp()
|
||||
throws YarnException, InterruptedException, IOException {
|
||||
// ask for new application
|
||||
GetNewApplicationRequest newAppRequest =
|
||||
Records.newRecord(GetNewApplicationRequest.class);
|
||||
GetNewApplicationResponse newAppResponse =
|
||||
rm.getClientRMService().getNewApplication(newAppRequest);
|
||||
appId = newAppResponse.getApplicationId();
|
||||
|
||||
// submit the application
|
||||
final SubmitApplicationRequest subAppRequest =
|
||||
Records.newRecord(SubmitApplicationRequest.class);
|
||||
ApplicationSubmissionContext appSubContext =
|
||||
Records.newRecord(ApplicationSubmissionContext.class);
|
||||
appSubContext.setApplicationId(appId);
|
||||
appSubContext.setMaxAppAttempts(1);
|
||||
appSubContext.setQueue(queue);
|
||||
appSubContext.setPriority(Priority.newInstance(0));
|
||||
ContainerLaunchContext conLauContext =
|
||||
Records.newRecord(ContainerLaunchContext.class);
|
||||
conLauContext.setApplicationACLs(
|
||||
new HashMap<ApplicationAccessType, String>());
|
||||
conLauContext.setCommands(new ArrayList<String>());
|
||||
conLauContext.setEnvironment(new HashMap<String, String>());
|
||||
conLauContext.setLocalResources(new HashMap<String, LocalResource>());
|
||||
conLauContext.setServiceData(new HashMap<String, ByteBuffer>());
|
||||
appSubContext.setAMContainerSpec(conLauContext);
|
||||
appSubContext.setUnmanagedAM(true);
|
||||
subAppRequest.setApplicationSubmissionContext(appSubContext);
|
||||
UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
|
||||
ugi.doAs(new PrivilegedExceptionAction<Object>() {
|
||||
@Override
|
||||
public Object run() throws YarnException {
|
||||
rm.getClientRMService().submitApplication(subAppRequest);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
LOG.info(MessageFormat.format("Submit a new application {0}", appId));
|
||||
|
||||
// waiting until application ACCEPTED
|
||||
RMApp app = rm.getRMContext().getRMApps().get(appId);
|
||||
while(app.getState() != RMAppState.ACCEPTED) {
|
||||
Thread.sleep(50);
|
||||
}
|
||||
|
||||
appAttemptId = rm.getRMContext().getRMApps().get(appId)
|
||||
.getCurrentAppAttempt().getAppAttemptId();
|
||||
}
|
||||
|
||||
private void registerAM()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
// register application master
|
||||
final RegisterApplicationMasterRequest amRegisterRequest =
|
||||
Records.newRecord(RegisterApplicationMasterRequest.class);
|
||||
amRegisterRequest.setHost("localhost");
|
||||
amRegisterRequest.setRpcPort(1000);
|
||||
amRegisterRequest.setTrackingUrl("localhost:1000");
|
||||
|
||||
UserGroupInformation ugi =
|
||||
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||
Token<AMRMTokenIdentifier> token =
|
||||
rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
|
||||
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||
|
||||
ugi.doAs(
|
||||
new PrivilegedExceptionAction<RegisterApplicationMasterResponse>() {
|
||||
@Override
|
||||
public RegisterApplicationMasterResponse run() throws Exception {
|
||||
return rm.getApplicationMasterService()
|
||||
.registerApplicationMaster(amRegisterRequest);
|
||||
}
|
||||
});
|
||||
|
||||
LOG.info(MessageFormat.format(
|
||||
"Register the application master for application {0}", appId));
|
||||
}
|
||||
|
||||
private void trackApp() {
|
||||
if (isTracked) {
|
||||
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||
.addTrackedApp(appAttemptId, oldAppId);
|
||||
}
|
||||
}
|
||||
public void untrackApp() {
|
||||
if (isTracked) {
|
||||
((ResourceSchedulerWrapper) rm.getResourceScheduler())
|
||||
.removeTrackedApp(appAttemptId, oldAppId);
|
||||
}
|
||||
}
|
||||
|
||||
protected List<ResourceRequest> packageRequests(
|
||||
List<ContainerSimulator> csList, int priority) {
|
||||
// create requests
|
||||
Map<String, ResourceRequest> rackLocalRequestMap = new HashMap<String, ResourceRequest>();
|
||||
Map<String, ResourceRequest> nodeLocalRequestMap = new HashMap<String, ResourceRequest>();
|
||||
ResourceRequest anyRequest = null;
|
||||
for (ContainerSimulator cs : csList) {
|
||||
String rackHostNames[] = SLSUtils.getRackHostName(cs.getHostname());
|
||||
// check rack local
|
||||
String rackname = rackHostNames[0];
|
||||
if (rackLocalRequestMap.containsKey(rackname)) {
|
||||
rackLocalRequestMap.get(rackname).setNumContainers(
|
||||
rackLocalRequestMap.get(rackname).getNumContainers() + 1);
|
||||
} else {
|
||||
ResourceRequest request = createResourceRequest(
|
||||
cs.getResource(), rackname, priority, 1);
|
||||
rackLocalRequestMap.put(rackname, request);
|
||||
}
|
||||
// check node local
|
||||
String hostname = rackHostNames[1];
|
||||
if (nodeLocalRequestMap.containsKey(hostname)) {
|
||||
nodeLocalRequestMap.get(hostname).setNumContainers(
|
||||
nodeLocalRequestMap.get(hostname).getNumContainers() + 1);
|
||||
} else {
|
||||
ResourceRequest request = createResourceRequest(
|
||||
cs.getResource(), hostname, priority, 1);
|
||||
nodeLocalRequestMap.put(hostname, request);
|
||||
}
|
||||
// any
|
||||
if (anyRequest == null) {
|
||||
anyRequest = createResourceRequest(
|
||||
cs.getResource(), ResourceRequest.ANY, priority, 1);
|
||||
} else {
|
||||
anyRequest.setNumContainers(anyRequest.getNumContainers() + 1);
|
||||
}
|
||||
}
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ask.addAll(nodeLocalRequestMap.values());
|
||||
ask.addAll(rackLocalRequestMap.values());
|
||||
if (anyRequest != null) {
|
||||
ask.add(anyRequest);
|
||||
}
|
||||
return ask;
|
||||
}
|
||||
|
||||
public String getQueue() {
|
||||
return queue;
|
||||
}
|
||||
public String getAMType() {
|
||||
return amtype;
|
||||
}
|
||||
public long getDuration() {
|
||||
return simulateFinishTimeMS - simulateStartTimeMS;
|
||||
}
|
||||
public int getNumTasks() {
|
||||
return totalContainers;
|
||||
}
|
||||
}
|
@ -0,0 +1,405 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.appmaster;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
public class MRAMSimulator extends AMSimulator {
|
||||
/*
|
||||
Vocabulary Used:
|
||||
pending -> requests which are NOT yet sent to RM
|
||||
scheduled -> requests which are sent to RM but not yet assigned
|
||||
assigned -> requests which are assigned to a container
|
||||
completed -> request corresponding to which container has completed
|
||||
|
||||
Maps are scheduled as soon as their requests are received. Reduces are
|
||||
scheduled when all maps have finished (not support slow-start currently).
|
||||
*/
|
||||
|
||||
private static final int PRIORITY_REDUCE = 10;
|
||||
private static final int PRIORITY_MAP = 20;
|
||||
|
||||
// pending maps
|
||||
private LinkedList<ContainerSimulator> pendingMaps =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// pending failed maps
|
||||
private LinkedList<ContainerSimulator> pendingFailedMaps =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// scheduled maps
|
||||
private LinkedList<ContainerSimulator> scheduledMaps =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// assigned maps
|
||||
private Map<ContainerId, ContainerSimulator> assignedMaps =
|
||||
new HashMap<ContainerId, ContainerSimulator>();
|
||||
|
||||
// reduces which are not yet scheduled
|
||||
private LinkedList<ContainerSimulator> pendingReduces =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// pending failed reduces
|
||||
private LinkedList<ContainerSimulator> pendingFailedReduces =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// scheduled reduces
|
||||
private LinkedList<ContainerSimulator> scheduledReduces =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// assigned reduces
|
||||
private Map<ContainerId, ContainerSimulator> assignedReduces =
|
||||
new HashMap<ContainerId, ContainerSimulator>();
|
||||
|
||||
// all maps & reduces
|
||||
private LinkedList<ContainerSimulator> allMaps =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
private LinkedList<ContainerSimulator> allReduces =
|
||||
new LinkedList<ContainerSimulator>();
|
||||
|
||||
// counters
|
||||
private int mapFinished = 0;
|
||||
private int mapTotal = 0;
|
||||
private int reduceFinished = 0;
|
||||
private int reduceTotal = 0;
|
||||
// waiting for AM container
|
||||
private boolean isAMContainerRunning = false;
|
||||
private Container amContainer;
|
||||
// finished
|
||||
private boolean isFinished = false;
|
||||
// resource for AM container
|
||||
private final static int MR_AM_CONTAINER_RESOURCE_MEMORY_MB = 1024;
|
||||
private final static int MR_AM_CONTAINER_RESOURCE_VCORES = 1;
|
||||
|
||||
public final Logger LOG = Logger.getLogger(MRAMSimulator.class);
|
||||
|
||||
public void init(int id, int heartbeatInterval,
|
||||
List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
|
||||
long traceStartTime, long traceFinishTime, String user, String queue,
|
||||
boolean isTracked, String oldAppId) {
|
||||
super.init(id, heartbeatInterval, containerList, rm, se,
|
||||
traceStartTime, traceFinishTime, user, queue,
|
||||
isTracked, oldAppId);
|
||||
amtype = "mapreduce";
|
||||
|
||||
// get map/reduce tasks
|
||||
for (ContainerSimulator cs : containerList) {
|
||||
if (cs.getType().equals("map")) {
|
||||
cs.setPriority(PRIORITY_MAP);
|
||||
pendingMaps.add(cs);
|
||||
} else if (cs.getType().equals("reduce")) {
|
||||
cs.setPriority(PRIORITY_REDUCE);
|
||||
pendingReduces.add(cs);
|
||||
}
|
||||
}
|
||||
allMaps.addAll(pendingMaps);
|
||||
allReduces.addAll(pendingReduces);
|
||||
mapTotal = pendingMaps.size();
|
||||
reduceTotal = pendingReduces.size();
|
||||
totalContainers = mapTotal + reduceTotal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
super.firstStep();
|
||||
|
||||
requestAMContainer();
|
||||
}
|
||||
|
||||
/**
|
||||
* send out request for AM container
|
||||
*/
|
||||
protected void requestAMContainer()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ResourceRequest amRequest = createResourceRequest(
|
||||
BuilderUtils.newResource(MR_AM_CONTAINER_RESOURCE_MEMORY_MB,
|
||||
MR_AM_CONTAINER_RESOURCE_VCORES),
|
||||
ResourceRequest.ANY, 1, 1);
|
||||
ask.add(amRequest);
|
||||
LOG.debug(MessageFormat.format("Application {0} sends out allocate " +
|
||||
"request for its AM", appId));
|
||||
final AllocateRequest request = this.createAllocateRequest(ask);
|
||||
|
||||
UserGroupInformation ugi =
|
||||
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||
Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
|
||||
.get(appAttemptId.getApplicationId())
|
||||
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||
AllocateResponse response = ugi.doAs(
|
||||
new PrivilegedExceptionAction<AllocateResponse>() {
|
||||
@Override
|
||||
public AllocateResponse run() throws Exception {
|
||||
return rm.getApplicationMasterService().allocate(request);
|
||||
}
|
||||
});
|
||||
|
||||
// waiting until the AM container is allocated
|
||||
while (true) {
|
||||
if (response != null && ! response.getAllocatedContainers().isEmpty()) {
|
||||
// get AM container
|
||||
Container container = response.getAllocatedContainers().get(0);
|
||||
se.getNmMap().get(container.getNodeId())
|
||||
.addNewContainer(container, -1L);
|
||||
// start AM container
|
||||
amContainer = container;
|
||||
LOG.debug(MessageFormat.format("Application {0} starts its " +
|
||||
"AM container ({1}).", appId, amContainer.getId()));
|
||||
isAMContainerRunning = true;
|
||||
break;
|
||||
}
|
||||
// this sleep time is different from HeartBeat
|
||||
Thread.sleep(1000);
|
||||
// send out empty request
|
||||
sendContainerRequest();
|
||||
response = responseQueue.take();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
protected void processResponseQueue()
|
||||
throws InterruptedException, YarnException, IOException {
|
||||
while (! responseQueue.isEmpty()) {
|
||||
AllocateResponse response = responseQueue.take();
|
||||
|
||||
// check completed containers
|
||||
if (! response.getCompletedContainersStatuses().isEmpty()) {
|
||||
for (ContainerStatus cs : response.getCompletedContainersStatuses()) {
|
||||
ContainerId containerId = cs.getContainerId();
|
||||
if (cs.getExitStatus() == ContainerExitStatus.SUCCESS) {
|
||||
if (assignedMaps.containsKey(containerId)) {
|
||||
LOG.debug(MessageFormat.format("Application {0} has one" +
|
||||
"mapper finished ({1}).", appId, containerId));
|
||||
assignedMaps.remove(containerId);
|
||||
mapFinished ++;
|
||||
finishedContainers ++;
|
||||
} else if (assignedReduces.containsKey(containerId)) {
|
||||
LOG.debug(MessageFormat.format("Application {0} has one" +
|
||||
"reducer finished ({1}).", appId, containerId));
|
||||
assignedReduces.remove(containerId);
|
||||
reduceFinished ++;
|
||||
finishedContainers ++;
|
||||
} else {
|
||||
// am container released event
|
||||
isFinished = true;
|
||||
LOG.info(MessageFormat.format("Application {0} goes to " +
|
||||
"finish.", appId));
|
||||
}
|
||||
} else {
|
||||
// container to be killed
|
||||
if (assignedMaps.containsKey(containerId)) {
|
||||
LOG.debug(MessageFormat.format("Application {0} has one " +
|
||||
"mapper killed ({1}).", appId, containerId));
|
||||
pendingFailedMaps.add(assignedMaps.remove(containerId));
|
||||
} else if (assignedReduces.containsKey(containerId)) {
|
||||
LOG.debug(MessageFormat.format("Application {0} has one " +
|
||||
"reducer killed ({1}).", appId, containerId));
|
||||
pendingFailedReduces.add(assignedReduces.remove(containerId));
|
||||
} else {
|
||||
LOG.info(MessageFormat.format("Application {0}'s AM is " +
|
||||
"going to be killed. Restarting...", appId));
|
||||
restart();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check finished
|
||||
if (isAMContainerRunning &&
|
||||
(mapFinished == mapTotal) &&
|
||||
(reduceFinished == reduceTotal)) {
|
||||
// to release the AM container
|
||||
se.getNmMap().get(amContainer.getNodeId())
|
||||
.cleanupContainer(amContainer.getId());
|
||||
isAMContainerRunning = false;
|
||||
LOG.debug(MessageFormat.format("Application {0} sends out event " +
|
||||
"to clean up its AM container.", appId));
|
||||
isFinished = true;
|
||||
}
|
||||
|
||||
// check allocated containers
|
||||
for (Container container : response.getAllocatedContainers()) {
|
||||
if (! scheduledMaps.isEmpty()) {
|
||||
ContainerSimulator cs = scheduledMaps.remove();
|
||||
LOG.debug(MessageFormat.format("Application {0} starts a " +
|
||||
"launch a mapper ({1}).", appId, container.getId()));
|
||||
assignedMaps.put(container.getId(), cs);
|
||||
se.getNmMap().get(container.getNodeId())
|
||||
.addNewContainer(container, cs.getLifeTime());
|
||||
} else if (! this.scheduledReduces.isEmpty()) {
|
||||
ContainerSimulator cs = scheduledReduces.remove();
|
||||
LOG.debug(MessageFormat.format("Application {0} starts a " +
|
||||
"launch a reducer ({1}).", appId, container.getId()));
|
||||
assignedReduces.put(container.getId(), cs);
|
||||
se.getNmMap().get(container.getNodeId())
|
||||
.addNewContainer(container, cs.getLifeTime());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* restart running because of the am container killed
|
||||
*/
|
||||
private void restart()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
// clear
|
||||
finishedContainers = 0;
|
||||
isFinished = false;
|
||||
mapFinished = 0;
|
||||
reduceFinished = 0;
|
||||
pendingFailedMaps.clear();
|
||||
pendingMaps.clear();
|
||||
pendingReduces.clear();
|
||||
pendingFailedReduces.clear();
|
||||
pendingMaps.addAll(allMaps);
|
||||
pendingReduces.addAll(pendingReduces);
|
||||
isAMContainerRunning = false;
|
||||
amContainer = null;
|
||||
// resent am container request
|
||||
requestAMContainer();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void sendContainerRequest()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
if (isFinished) {
|
||||
return;
|
||||
}
|
||||
|
||||
// send out request
|
||||
List<ResourceRequest> ask = null;
|
||||
if (isAMContainerRunning) {
|
||||
if (mapFinished != mapTotal) {
|
||||
// map phase
|
||||
if (! pendingMaps.isEmpty()) {
|
||||
ask = packageRequests(pendingMaps, PRIORITY_MAP);
|
||||
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||
"request for {1} mappers.", appId, pendingMaps.size()));
|
||||
scheduledMaps.addAll(pendingMaps);
|
||||
pendingMaps.clear();
|
||||
} else if (! pendingFailedMaps.isEmpty() && scheduledMaps.isEmpty()) {
|
||||
ask = packageRequests(pendingFailedMaps, PRIORITY_MAP);
|
||||
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||
"requests for {1} failed mappers.", appId,
|
||||
pendingFailedMaps.size()));
|
||||
scheduledMaps.addAll(pendingFailedMaps);
|
||||
pendingFailedMaps.clear();
|
||||
}
|
||||
} else if (reduceFinished != reduceTotal) {
|
||||
// reduce phase
|
||||
if (! pendingReduces.isEmpty()) {
|
||||
ask = packageRequests(pendingReduces, PRIORITY_REDUCE);
|
||||
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||
"requests for {1} reducers.", appId, pendingReduces.size()));
|
||||
scheduledReduces.addAll(pendingReduces);
|
||||
pendingReduces.clear();
|
||||
} else if (! pendingFailedReduces.isEmpty()
|
||||
&& scheduledReduces.isEmpty()) {
|
||||
ask = packageRequests(pendingFailedReduces, PRIORITY_REDUCE);
|
||||
LOG.debug(MessageFormat.format("Application {0} sends out " +
|
||||
"request for {1} failed reducers.", appId,
|
||||
pendingFailedReduces.size()));
|
||||
scheduledReduces.addAll(pendingFailedReduces);
|
||||
pendingFailedReduces.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ask == null) {
|
||||
ask = new ArrayList<ResourceRequest>();
|
||||
}
|
||||
|
||||
final AllocateRequest request = createAllocateRequest(ask);
|
||||
if (totalContainers == 0) {
|
||||
request.setProgress(1.0f);
|
||||
} else {
|
||||
request.setProgress((float) finishedContainers / totalContainers);
|
||||
}
|
||||
|
||||
UserGroupInformation ugi =
|
||||
UserGroupInformation.createRemoteUser(appAttemptId.toString());
|
||||
Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
|
||||
.get(appAttemptId.getApplicationId())
|
||||
.getRMAppAttempt(appAttemptId).getAMRMToken();
|
||||
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||
AllocateResponse response = ugi.doAs(
|
||||
new PrivilegedExceptionAction<AllocateResponse>() {
|
||||
@Override
|
||||
public AllocateResponse run() throws Exception {
|
||||
return rm.getApplicationMasterService().allocate(request);
|
||||
}
|
||||
});
|
||||
if (response != null) {
|
||||
responseQueue.put(response);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void checkStop() {
|
||||
if (isFinished) {
|
||||
super.setEndTime(System.currentTimeMillis());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
super.lastStep();
|
||||
|
||||
// clear data structures
|
||||
allMaps.clear();
|
||||
allReduces.clear();
|
||||
assignedMaps.clear();
|
||||
assignedReduces.clear();
|
||||
pendingFailedMaps.clear();
|
||||
pendingFailedReduces.clear();
|
||||
pendingMaps.clear();
|
||||
pendingReduces.clear();
|
||||
scheduledMaps.clear();
|
||||
scheduledReduces.clear();
|
||||
responseQueue.clear();
|
||||
}
|
||||
}
|
@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.conf;
|
||||
|
||||
public class SLSConfiguration {
|
||||
// sls
|
||||
public static final String PREFIX = "yarn.sls.";
|
||||
// runner
|
||||
public static final String RUNNER_PREFIX = PREFIX + "runner.";
|
||||
public static final String RUNNER_POOL_SIZE = RUNNER_PREFIX + "pool.size";
|
||||
public static final int RUNNER_POOL_SIZE_DEFAULT = 10;
|
||||
// scheduler
|
||||
public static final String SCHEDULER_PREFIX = PREFIX + "scheduler.";
|
||||
public static final String RM_SCHEDULER = SCHEDULER_PREFIX + "class";
|
||||
// metrics
|
||||
public static final String METRICS_PREFIX = PREFIX + "metrics.";
|
||||
public static final String METRICS_SWITCH = METRICS_PREFIX + "switch";
|
||||
public static final String METRICS_WEB_ADDRESS_PORT = METRICS_PREFIX
|
||||
+ "web.address.port";
|
||||
public static final String METRICS_OUTPUT_DIR = METRICS_PREFIX + "output";
|
||||
public static final int METRICS_WEB_ADDRESS_PORT_DEFAULT = 10001;
|
||||
public static final String METRICS_TIMER_WINDOW_SIZE = METRICS_PREFIX
|
||||
+ "timer.window.size";
|
||||
public static final int METRICS_TIMER_WINDOW_SIZE_DEFAULT = 100;
|
||||
public static final String METRICS_RECORD_INTERVAL_MS = METRICS_PREFIX
|
||||
+ "record.interval.ms";
|
||||
public static final int METRICS_RECORD_INTERVAL_MS_DEFAULT = 1000;
|
||||
// nm
|
||||
public static final String NM_PREFIX = PREFIX + "nm.";
|
||||
public static final String NM_MEMORY_MB = NM_PREFIX + "memory.mb";
|
||||
public static final int NM_MEMORY_MB_DEFAULT = 10240;
|
||||
public static final String NM_VCORES = NM_PREFIX + "vcores";
|
||||
public static final int NM_VCORES_DEFAULT = 10;
|
||||
public static final String NM_HEARTBEAT_INTERVAL_MS = NM_PREFIX
|
||||
+ "heartbeat.interval.ms";
|
||||
public static final int NM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
|
||||
// am
|
||||
public static final String AM_PREFIX = PREFIX + "am.";
|
||||
public static final String AM_HEARTBEAT_INTERVAL_MS = AM_PREFIX
|
||||
+ "heartbeat.interval.ms";
|
||||
public static final int AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
|
||||
public static final String AM_TYPE = AM_PREFIX + "type.";
|
||||
|
||||
// container
|
||||
public static final String CONTAINER_PREFIX = PREFIX + "container.";
|
||||
public static final String CONTAINER_MEMORY_MB = CONTAINER_PREFIX
|
||||
+ "memory.mb";
|
||||
public static final int CONTAINER_MEMORY_MB_DEFAULT = 1024;
|
||||
public static final String CONTAINER_VCORES = CONTAINER_PREFIX + "vcores";
|
||||
public static final int CONTAINER_VCORES_DEFAULT = 1;
|
||||
|
||||
}
|
@ -0,0 +1,261 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.nodemanager;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.DelayQueue;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords
|
||||
.RegisterNodeManagerRequest;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords
|
||||
.RegisterNodeManagerResponse;
|
||||
import org.apache.hadoop.yarn.server.api.records.MasterKey;
|
||||
import org.apache.hadoop.yarn.server.api.records.NodeAction;
|
||||
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
|
||||
import org.apache.hadoop.yarn.sls.utils.SLSUtils;
|
||||
|
||||
public class NMSimulator extends TaskRunner.Task {
|
||||
// node resource
|
||||
private RMNode node;
|
||||
// master key
|
||||
private MasterKey masterKey;
|
||||
// containers with various STATE
|
||||
private List<ContainerId> completedContainerList;
|
||||
private List<ContainerId> releasedContainerList;
|
||||
private DelayQueue<ContainerSimulator> containerQueue;
|
||||
private Map<ContainerId, ContainerSimulator> runningContainers;
|
||||
private List<ContainerId> amContainerList;
|
||||
// resource manager
|
||||
private ResourceManager rm;
|
||||
// heart beat response id
|
||||
private int RESPONSE_ID = 1;
|
||||
private final static Logger LOG = Logger.getLogger(NMSimulator.class);
|
||||
|
||||
public void init(String nodeIdStr, int memory, int cores,
|
||||
int dispatchTime, int heartBeatInterval, ResourceManager rm)
|
||||
throws IOException, YarnException {
|
||||
super.init(dispatchTime, dispatchTime + 1000000L * heartBeatInterval,
|
||||
heartBeatInterval);
|
||||
// create resource
|
||||
String rackHostName[] = SLSUtils.getRackHostName(nodeIdStr);
|
||||
this.node = NodeInfo.newNodeInfo(rackHostName[0], rackHostName[1],
|
||||
BuilderUtils.newResource(memory, cores));
|
||||
this.rm = rm;
|
||||
// init data structures
|
||||
completedContainerList =
|
||||
Collections.synchronizedList(new ArrayList<ContainerId>());
|
||||
releasedContainerList =
|
||||
Collections.synchronizedList(new ArrayList<ContainerId>());
|
||||
containerQueue = new DelayQueue<ContainerSimulator>();
|
||||
amContainerList =
|
||||
Collections.synchronizedList(new ArrayList<ContainerId>());
|
||||
runningContainers =
|
||||
new ConcurrentHashMap<ContainerId, ContainerSimulator>();
|
||||
// register NM with RM
|
||||
RegisterNodeManagerRequest req =
|
||||
Records.newRecord(RegisterNodeManagerRequest.class);
|
||||
req.setNodeId(node.getNodeID());
|
||||
req.setResource(node.getTotalCapability());
|
||||
req.setHttpPort(80);
|
||||
RegisterNodeManagerResponse response = rm.getResourceTrackerService()
|
||||
.registerNodeManager(req);
|
||||
masterKey = response.getNMTokenMasterKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep() throws YarnException, IOException {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep() {
|
||||
// we check the lifetime for each running containers
|
||||
ContainerSimulator cs = null;
|
||||
synchronized(completedContainerList) {
|
||||
while ((cs = containerQueue.poll()) != null) {
|
||||
runningContainers.remove(cs.getId());
|
||||
completedContainerList.add(cs.getId());
|
||||
LOG.debug(MessageFormat.format("Container {0} has completed",
|
||||
cs.getId()));
|
||||
}
|
||||
}
|
||||
|
||||
// send heart beat
|
||||
NodeHeartbeatRequest beatRequest =
|
||||
Records.newRecord(NodeHeartbeatRequest.class);
|
||||
beatRequest.setLastKnownNMTokenMasterKey(masterKey);
|
||||
NodeStatus ns = Records.newRecord(NodeStatus.class);
|
||||
|
||||
ns.setContainersStatuses(generateContainerStatusList());
|
||||
ns.setNodeId(node.getNodeID());
|
||||
ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
|
||||
ns.setResponseId(RESPONSE_ID ++);
|
||||
ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
|
||||
beatRequest.setNodeStatus(ns);
|
||||
try {
|
||||
NodeHeartbeatResponse beatResponse =
|
||||
rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
|
||||
if (! beatResponse.getContainersToCleanup().isEmpty()) {
|
||||
// remove from queue
|
||||
synchronized(releasedContainerList) {
|
||||
for (ContainerId containerId : beatResponse.getContainersToCleanup()){
|
||||
if (amContainerList.contains(containerId)) {
|
||||
// AM container (not killed?, only release)
|
||||
synchronized(amContainerList) {
|
||||
amContainerList.remove(containerId);
|
||||
}
|
||||
LOG.debug(MessageFormat.format("NodeManager {0} releases " +
|
||||
"an AM ({1}).", node.getNodeID(), containerId));
|
||||
} else {
|
||||
cs = runningContainers.remove(containerId);
|
||||
containerQueue.remove(cs);
|
||||
releasedContainerList.add(containerId);
|
||||
LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
|
||||
"container ({1}).", node.getNodeID(), containerId));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
|
||||
lastStep();
|
||||
}
|
||||
} catch (YarnException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* catch status of all containers located on current node
|
||||
*/
|
||||
private ArrayList<ContainerStatus> generateContainerStatusList() {
|
||||
ArrayList<ContainerStatus> csList = new ArrayList<ContainerStatus>();
|
||||
// add running containers
|
||||
for (ContainerSimulator container : runningContainers.values()) {
|
||||
csList.add(newContainerStatus(container.getId(),
|
||||
ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
|
||||
}
|
||||
synchronized(amContainerList) {
|
||||
for (ContainerId cId : amContainerList) {
|
||||
csList.add(newContainerStatus(cId,
|
||||
ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
|
||||
}
|
||||
}
|
||||
// add complete containers
|
||||
synchronized(completedContainerList) {
|
||||
for (ContainerId cId : completedContainerList) {
|
||||
LOG.debug(MessageFormat.format("NodeManager {0} completed" +
|
||||
" container ({1}).", node.getNodeID(), cId));
|
||||
csList.add(newContainerStatus(
|
||||
cId, ContainerState.COMPLETE, ContainerExitStatus.SUCCESS));
|
||||
}
|
||||
completedContainerList.clear();
|
||||
}
|
||||
// released containers
|
||||
synchronized(releasedContainerList) {
|
||||
for (ContainerId cId : releasedContainerList) {
|
||||
LOG.debug(MessageFormat.format("NodeManager {0} released container" +
|
||||
" ({1}).", node.getNodeID(), cId));
|
||||
csList.add(newContainerStatus(
|
||||
cId, ContainerState.COMPLETE, ContainerExitStatus.ABORTED));
|
||||
}
|
||||
releasedContainerList.clear();
|
||||
}
|
||||
return csList;
|
||||
}
|
||||
|
||||
private ContainerStatus newContainerStatus(ContainerId cId,
|
||||
ContainerState state,
|
||||
int exitState) {
|
||||
ContainerStatus cs = Records.newRecord(ContainerStatus.class);
|
||||
cs.setContainerId(cId);
|
||||
cs.setState(state);
|
||||
cs.setExitStatus(exitState);
|
||||
return cs;
|
||||
}
|
||||
|
||||
public RMNode getNode() {
|
||||
return node;
|
||||
}
|
||||
|
||||
/**
|
||||
* launch a new container with the given life time
|
||||
*/
|
||||
public void addNewContainer(Container container, long lifeTimeMS) {
|
||||
LOG.debug(MessageFormat.format("NodeManager {0} launches a new " +
|
||||
"container ({1}).", node.getNodeID(), container.getId()));
|
||||
if (lifeTimeMS != -1) {
|
||||
// normal container
|
||||
ContainerSimulator cs = new ContainerSimulator(container.getId(),
|
||||
container.getResource(), lifeTimeMS + System.currentTimeMillis(),
|
||||
lifeTimeMS);
|
||||
containerQueue.add(cs);
|
||||
runningContainers.put(cs.getId(), cs);
|
||||
} else {
|
||||
// AM container
|
||||
// -1 means AMContainer
|
||||
synchronized(amContainerList) {
|
||||
amContainerList.add(container.getId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* clean up an AM container and add to completed list
|
||||
* @param containerId id of the container to be cleaned
|
||||
*/
|
||||
public void cleanupContainer(ContainerId containerId) {
|
||||
synchronized(amContainerList) {
|
||||
amContainerList.remove(containerId);
|
||||
}
|
||||
synchronized(completedContainerList) {
|
||||
completedContainerList.add(containerId);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,167 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.nodemanager;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.net.Node;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||
.UpdatedContainerInfo;
|
||||
|
||||
public class NodeInfo {
|
||||
private static int NODE_ID = 0;
|
||||
|
||||
public static NodeId newNodeID(String host, int port) {
|
||||
return NodeId.newInstance(host, port);
|
||||
}
|
||||
|
||||
private static class FakeRMNodeImpl implements RMNode {
|
||||
private NodeId nodeId;
|
||||
private String hostName;
|
||||
private String nodeAddr;
|
||||
private String httpAddress;
|
||||
private int cmdPort;
|
||||
private Resource perNode;
|
||||
private String rackName;
|
||||
private String healthReport;
|
||||
private NodeState state;
|
||||
private List<ContainerId> toCleanUpContainers;
|
||||
private List<ApplicationId> toCleanUpApplications;
|
||||
|
||||
public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||
Resource perNode, String rackName, String healthReport,
|
||||
int cmdPort, String hostName, NodeState state) {
|
||||
this.nodeId = nodeId;
|
||||
this.nodeAddr = nodeAddr;
|
||||
this.httpAddress = httpAddress;
|
||||
this.perNode = perNode;
|
||||
this.rackName = rackName;
|
||||
this.healthReport = healthReport;
|
||||
this.cmdPort = cmdPort;
|
||||
this.hostName = hostName;
|
||||
this.state = state;
|
||||
toCleanUpApplications = new ArrayList<ApplicationId>();
|
||||
toCleanUpContainers = new ArrayList<ContainerId>();
|
||||
}
|
||||
|
||||
public NodeId getNodeID() {
|
||||
return nodeId;
|
||||
}
|
||||
|
||||
public String getHostName() {
|
||||
return hostName;
|
||||
}
|
||||
|
||||
public int getCommandPort() {
|
||||
return cmdPort;
|
||||
}
|
||||
|
||||
public int getHttpPort() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public String getNodeAddress() {
|
||||
return nodeAddr;
|
||||
}
|
||||
|
||||
public String getHttpAddress() {
|
||||
return httpAddress;
|
||||
}
|
||||
|
||||
public String getHealthReport() {
|
||||
return healthReport;
|
||||
}
|
||||
|
||||
public long getLastHealthReportTime() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Resource getTotalCapability() {
|
||||
return perNode;
|
||||
}
|
||||
|
||||
public String getRackName() {
|
||||
return rackName;
|
||||
}
|
||||
|
||||
public Node getNode() {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
public NodeState getState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
public List<ContainerId> getContainersToCleanUp() {
|
||||
return toCleanUpContainers;
|
||||
}
|
||||
|
||||
public List<ApplicationId> getAppsToCleanup() {
|
||||
return toCleanUpApplications;
|
||||
}
|
||||
|
||||
public void updateNodeHeartbeatResponseForCleanup(
|
||||
NodeHeartbeatResponse response) {
|
||||
}
|
||||
|
||||
public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
||||
ArrayList<UpdatedContainerInfo> list = new ArrayList<UpdatedContainerInfo>();
|
||||
|
||||
ArrayList<ContainerStatus> list2 = new ArrayList<ContainerStatus>();
|
||||
for(ContainerId cId : this.toCleanUpContainers) {
|
||||
list2.add(ContainerStatus.newInstance(cId, ContainerState.RUNNING, "",
|
||||
ContainerExitStatus.SUCCESS));
|
||||
}
|
||||
list.add(new UpdatedContainerInfo(new ArrayList<ContainerStatus>(),
|
||||
list2));
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
||||
public static RMNode newNodeInfo(String rackName, String hostName,
|
||||
final Resource resource, int port) {
|
||||
final NodeId nodeId = newNodeID(hostName, port);
|
||||
final String nodeAddr = hostName + ":" + port;
|
||||
final String httpAddress = hostName;
|
||||
|
||||
return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress,
|
||||
resource, rackName, "Me good",
|
||||
port, hostName, null);
|
||||
}
|
||||
|
||||
public static RMNode newNodeInfo(String rackName, String hostName,
|
||||
final Resource resource) {
|
||||
return newNodeInfo(rackName, hostName, resource, NODE_ID++);
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
public class CapacitySchedulerMetrics extends SchedulerMetrics {
|
||||
|
||||
public CapacitySchedulerMetrics() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void trackQueue(String queueName) {
|
||||
trackedQueues.add(queueName);
|
||||
}
|
||||
}
|
@ -0,0 +1,113 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import java.util.concurrent.Delayed;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
|
||||
public class ContainerSimulator implements Delayed {
|
||||
// id
|
||||
private ContainerId id;
|
||||
// resource allocated
|
||||
private Resource resource;
|
||||
// end time
|
||||
private long endTime;
|
||||
// life time (ms)
|
||||
private long lifeTime;
|
||||
// host name
|
||||
private String hostname;
|
||||
// priority
|
||||
private int priority;
|
||||
// type
|
||||
private String type;
|
||||
|
||||
/**
|
||||
* invoked when AM schedules containers to allocate
|
||||
*/
|
||||
public ContainerSimulator(Resource resource, long lifeTime,
|
||||
String hostname, int priority, String type) {
|
||||
this.resource = resource;
|
||||
this.lifeTime = lifeTime;
|
||||
this.hostname = hostname;
|
||||
this.priority = priority;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* invoke when NM schedules containers to run
|
||||
*/
|
||||
public ContainerSimulator(ContainerId id, Resource resource, long endTime,
|
||||
long lifeTime) {
|
||||
this.id = id;
|
||||
this.resource = resource;
|
||||
this.endTime = endTime;
|
||||
this.lifeTime = lifeTime;
|
||||
}
|
||||
|
||||
public Resource getResource() {
|
||||
return resource;
|
||||
}
|
||||
|
||||
public ContainerId getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Delayed o) {
|
||||
if (!(o instanceof ContainerSimulator)) {
|
||||
throw new IllegalArgumentException(
|
||||
"Parameter must be a ContainerSimulator instance");
|
||||
}
|
||||
ContainerSimulator other = (ContainerSimulator) o;
|
||||
return (int) Math.signum(endTime - other.endTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDelay(TimeUnit unit) {
|
||||
return unit.convert(endTime - System.currentTimeMillis(),
|
||||
TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
public long getLifeTime() {
|
||||
return lifeTime;
|
||||
}
|
||||
|
||||
public String getHostname() {
|
||||
return hostname;
|
||||
}
|
||||
|
||||
public long getEndTime() {
|
||||
return endTime;
|
||||
}
|
||||
|
||||
public int getPriority() {
|
||||
return priority;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setPriority(int p) {
|
||||
priority = p;
|
||||
}
|
||||
}
|
@ -0,0 +1,266 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
|
||||
.AppSchedulable;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
|
||||
.FairScheduler;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||
|
||||
public class FairSchedulerMetrics extends SchedulerMetrics {
|
||||
|
||||
private int totalMemoryMB = Integer.MAX_VALUE;
|
||||
private int totalVCores = Integer.MAX_VALUE;
|
||||
private boolean maxReset = false;
|
||||
|
||||
public FairSchedulerMetrics() {
|
||||
super();
|
||||
appTrackedMetrics.add("demand.memory");
|
||||
appTrackedMetrics.add("demand.vcores");
|
||||
appTrackedMetrics.add("usage.memory");
|
||||
appTrackedMetrics.add("usage.vcores");
|
||||
appTrackedMetrics.add("minshare.memory");
|
||||
appTrackedMetrics.add("minshare.vcores");
|
||||
appTrackedMetrics.add("maxshare.memory");
|
||||
appTrackedMetrics.add("maxshare.vcores");
|
||||
appTrackedMetrics.add("fairshare.memory");
|
||||
appTrackedMetrics.add("fairshare.vcores");
|
||||
queueTrackedMetrics.add("demand.memory");
|
||||
queueTrackedMetrics.add("demand.vcores");
|
||||
queueTrackedMetrics.add("usage.memory");
|
||||
queueTrackedMetrics.add("usage.vcores");
|
||||
queueTrackedMetrics.add("minshare.memory");
|
||||
queueTrackedMetrics.add("minshare.vcores");
|
||||
queueTrackedMetrics.add("maxshare.memory");
|
||||
queueTrackedMetrics.add("maxshare.vcores");
|
||||
queueTrackedMetrics.add("fairshare.memory");
|
||||
queueTrackedMetrics.add("fairshare.vcores");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void trackApp(ApplicationAttemptId appAttemptId, String oldAppId) {
|
||||
super.trackApp(appAttemptId, oldAppId);
|
||||
FairScheduler fair = (FairScheduler) scheduler;
|
||||
final AppSchedulable app = fair.getSchedulerApp(appAttemptId)
|
||||
.getAppSchedulable();
|
||||
metrics.register("variable.app." + oldAppId + ".demand.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getDemand().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".demand.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getDemand().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".usage.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getResourceUsage().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".usage.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getResourceUsage().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".minshare.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getMinShare().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".minshare.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getMinShare().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".maxshare.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return Math.min(app.getMaxShare().getMemory(), totalMemoryMB);
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".maxshare.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return Math.min(app.getMaxShare().getVirtualCores(), totalVCores);
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".fairshare.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getFairShare().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".fairshare.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return app.getFairShare().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void trackQueue(String queueName) {
|
||||
trackedQueues.add(queueName);
|
||||
FairScheduler fair = (FairScheduler) scheduler;
|
||||
final FSQueue queue = fair.getQueueManager().getQueue(queueName);
|
||||
metrics.register("variable.queue." + queueName + ".demand.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getDemand().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".demand.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getDemand().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".usage.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getResourceUsage().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".usage.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getResourceUsage().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".minshare.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getMinShare().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".minshare.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getMinShare().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".maxshare.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if (! maxReset &&
|
||||
SLSRunner.simulateInfoMap.containsKey("Number of nodes") &&
|
||||
SLSRunner.simulateInfoMap.containsKey("Node memory (MB)") &&
|
||||
SLSRunner.simulateInfoMap.containsKey("Node VCores")) {
|
||||
int numNMs = Integer.parseInt(
|
||||
SLSRunner.simulateInfoMap.get("Number of nodes").toString());
|
||||
int numMemoryMB = Integer.parseInt(
|
||||
SLSRunner.simulateInfoMap.get("Node memory (MB)").toString());
|
||||
int numVCores = Integer.parseInt(
|
||||
SLSRunner.simulateInfoMap.get("Node VCores").toString());
|
||||
|
||||
totalMemoryMB = numNMs * numMemoryMB;
|
||||
totalVCores = numNMs * numVCores;
|
||||
maxReset = false;
|
||||
}
|
||||
|
||||
return Math.min(queue.getMaxShare().getMemory(), totalMemoryMB);
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".maxshare.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return Math.min(queue.getMaxShare().getVirtualCores(), totalVCores);
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".fairshare.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getFairShare().getMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".fairshare.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
return queue.getFairShare().getVirtualCores();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void untrackQueue(String queueName) {
|
||||
trackedQueues.remove(queueName);
|
||||
metrics.remove("variable.queue." + queueName + ".demand.memory");
|
||||
metrics.remove("variable.queue." + queueName + ".demand.vcores");
|
||||
metrics.remove("variable.queue." + queueName + ".usage.memory");
|
||||
metrics.remove("variable.queue." + queueName + ".usage.vcores");
|
||||
metrics.remove("variable.queue." + queueName + ".minshare.memory");
|
||||
metrics.remove("variable.queue." + queueName + ".minshare.vcores");
|
||||
metrics.remove("variable.queue." + queueName + ".maxshare.memory");
|
||||
metrics.remove("variable.queue." + queueName + ".maxshare.vcores");
|
||||
metrics.remove("variable.queue." + queueName + ".fairshare.memory");
|
||||
metrics.remove("variable.queue." + queueName + ".fairshare.vcores");
|
||||
}
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
|
||||
.FifoScheduler;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
|
||||
public class FifoSchedulerMetrics extends SchedulerMetrics {
|
||||
|
||||
public FifoSchedulerMetrics() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void trackQueue(String queueName) {
|
||||
trackedQueues.add(queueName);
|
||||
FifoScheduler fifo = (FifoScheduler) scheduler;
|
||||
// for FifoScheduler, only DEFAULT_QUEUE
|
||||
// here the three parameters doesn't affect results
|
||||
final QueueInfo queue = fifo.getQueueInfo(queueName, false, false);
|
||||
// track currentCapacity, maximumCapacity (always 1.0f)
|
||||
metrics.register("variable.queue." + queueName + ".currentcapacity",
|
||||
new Gauge<Float>() {
|
||||
@Override
|
||||
public Float getValue() {
|
||||
return queue.getCurrentCapacity();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.queue." + queueName + ".",
|
||||
new Gauge<Float>() {
|
||||
@Override
|
||||
public Float getValue() {
|
||||
return queue.getCurrentCapacity();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.NodeUpdateSchedulerEvent;
|
||||
|
||||
public class NodeUpdateSchedulerEventWrapper extends NodeUpdateSchedulerEvent {
|
||||
|
||||
public NodeUpdateSchedulerEventWrapper(NodeUpdateSchedulerEvent event) {
|
||||
super(new RMNodeWrapper(event.getRMNode()));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,141 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import org.apache.hadoop.net.Node;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||
.UpdatedContainerInfo;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class RMNodeWrapper implements RMNode {
|
||||
private RMNode node;
|
||||
private List<UpdatedContainerInfo> updates;
|
||||
private boolean pulled = false;
|
||||
|
||||
public RMNodeWrapper(RMNode node) {
|
||||
this.node = node;
|
||||
updates = node.pullContainerUpdates();
|
||||
}
|
||||
|
||||
@Override
|
||||
public NodeId getNodeID() {
|
||||
return node.getNodeID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHostName() {
|
||||
return node.getHostName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCommandPort() {
|
||||
return node.getCommandPort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getHttpPort() {
|
||||
return node.getHttpPort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNodeAddress() {
|
||||
return node.getNodeAddress();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHttpAddress() {
|
||||
return node.getHttpAddress();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHealthReport() {
|
||||
return node.getHealthReport();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLastHealthReportTime() {
|
||||
return node.getLastHealthReportTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Resource getTotalCapability() {
|
||||
return node.getTotalCapability();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRackName() {
|
||||
return node.getRackName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Node getNode() {
|
||||
return node.getNode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public NodeState getState() {
|
||||
return node.getState();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ContainerId> getContainersToCleanUp() {
|
||||
return node.getContainersToCleanUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ApplicationId> getAppsToCleanup() {
|
||||
return node.getAppsToCleanup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateNodeHeartbeatResponseForCleanup(
|
||||
NodeHeartbeatResponse nodeHeartbeatResponse) {
|
||||
node.updateNodeHeartbeatResponseForCleanup(nodeHeartbeatResponse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
|
||||
return node.getLastNodeHeartBeatResponse();
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
||||
List<UpdatedContainerInfo> list = Collections.EMPTY_LIST;
|
||||
if (! pulled) {
|
||||
list = updates;
|
||||
pulled = true;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
List<UpdatedContainerInfo> getContainerUpdates() {
|
||||
return updates;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,855 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import org.apache.hadoop.util.ShutdownHookManager;
|
||||
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||
import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
|
||||
import org.apache.hadoop.yarn.sls.web.SLSWebApp;
|
||||
import com.codahale.metrics.Counter;
|
||||
import com.codahale.metrics.CsvReporter;
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.Histogram;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.codahale.metrics.SlidingWindowReservoir;
|
||||
import com.codahale.metrics.Timer;
|
||||
|
||||
import org.apache.hadoop.conf.Configurable;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||
.UpdatedContainerInfo;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.SchedulerAppReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.SchedulerNodeReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
|
||||
.CapacityScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.AppRemovedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.NodeUpdateSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.SchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.SchedulerEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
|
||||
.FairScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
|
||||
.FifoScheduler;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
public class ResourceSchedulerWrapper implements ResourceScheduler,
|
||||
Configurable {
|
||||
private static final String EOL = System.getProperty("line.separator");
|
||||
private static final int SAMPLING_SIZE = 60;
|
||||
private ScheduledExecutorService pool;
|
||||
// counters for scheduler allocate/handle operations
|
||||
private Counter schedulerAllocateCounter;
|
||||
private Counter schedulerHandleCounter;
|
||||
private Map<SchedulerEventType, Counter> schedulerHandleCounterMap;
|
||||
// Timers for scheduler allocate/handle operations
|
||||
private Timer schedulerAllocateTimer;
|
||||
private Timer schedulerHandleTimer;
|
||||
private Map<SchedulerEventType, Timer> schedulerHandleTimerMap;
|
||||
private List<Histogram> schedulerHistogramList;
|
||||
private Map<Histogram, Timer> histogramTimerMap;
|
||||
private Lock samplerLock;
|
||||
private Lock queueLock;
|
||||
|
||||
private Configuration conf;
|
||||
private ResourceScheduler scheduler;
|
||||
private Map<ApplicationAttemptId, String> appQueueMap =
|
||||
new ConcurrentHashMap<ApplicationAttemptId, String>();
|
||||
private BufferedWriter jobRuntimeLogBW;
|
||||
|
||||
// Priority of the ResourceSchedulerWrapper shutdown hook.
|
||||
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
|
||||
|
||||
// web app
|
||||
private SLSWebApp web;
|
||||
|
||||
private Map<ContainerId, Resource> preemptionContainerMap =
|
||||
new ConcurrentHashMap<ContainerId, Resource>();
|
||||
|
||||
// metrics
|
||||
private MetricRegistry metrics;
|
||||
private SchedulerMetrics schedulerMetrics;
|
||||
private boolean metricsON;
|
||||
private String metricsOutputDir;
|
||||
private BufferedWriter metricsLogBW;
|
||||
private boolean running = false;
|
||||
private static Map<Class, Class> defaultSchedulerMetricsMap =
|
||||
new HashMap<Class, Class>();
|
||||
static {
|
||||
defaultSchedulerMetricsMap.put(FairScheduler.class,
|
||||
FairSchedulerMetrics.class);
|
||||
defaultSchedulerMetricsMap.put(FifoScheduler.class,
|
||||
FifoSchedulerMetrics.class);
|
||||
defaultSchedulerMetricsMap.put(CapacityScheduler.class,
|
||||
CapacitySchedulerMetrics.class);
|
||||
}
|
||||
// must set by outside
|
||||
private Set<String> queueSet;
|
||||
private Set<String> trackedAppSet;
|
||||
|
||||
public final Logger LOG = Logger.getLogger(ResourceSchedulerWrapper.class);
|
||||
|
||||
public ResourceSchedulerWrapper() {
|
||||
samplerLock = new ReentrantLock();
|
||||
queueLock = new ReentrantLock();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
this.conf = conf;
|
||||
// set scheduler
|
||||
Class<? extends ResourceScheduler> klass =
|
||||
conf.getClass(SLSConfiguration.RM_SCHEDULER, null,
|
||||
ResourceScheduler.class);
|
||||
|
||||
scheduler = ReflectionUtils.newInstance(klass, conf);
|
||||
// start metrics
|
||||
metricsON = conf.getBoolean(SLSConfiguration.METRICS_SWITCH, true);
|
||||
if (metricsON) {
|
||||
try {
|
||||
initMetrics();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
ShutdownHookManager.get().addShutdownHook(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
if (metricsLogBW != null) {
|
||||
metricsLogBW.write("]");
|
||||
metricsLogBW.close();
|
||||
}
|
||||
if (web != null) {
|
||||
web.stop();
|
||||
}
|
||||
tearDown();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}, SHUTDOWN_HOOK_PRIORITY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Allocation allocate(ApplicationAttemptId attemptId,
|
||||
List<ResourceRequest> resourceRequests,
|
||||
List<ContainerId> containerIds,
|
||||
List<String> strings, List<String> strings2) {
|
||||
if (metricsON) {
|
||||
final Timer.Context context = schedulerAllocateTimer.time();
|
||||
Allocation allocation = null;
|
||||
try {
|
||||
allocation = scheduler.allocate(attemptId, resourceRequests,
|
||||
containerIds, strings, strings2);
|
||||
return allocation;
|
||||
} finally {
|
||||
context.stop();
|
||||
schedulerAllocateCounter.inc();
|
||||
try {
|
||||
updateQueueWithAllocateRequest(allocation, attemptId,
|
||||
resourceRequests, containerIds);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return scheduler.allocate(attemptId,
|
||||
resourceRequests, containerIds, strings, strings2);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handle(SchedulerEvent schedulerEvent) {
|
||||
// metrics off
|
||||
if (! metricsON) {
|
||||
scheduler.handle(schedulerEvent);
|
||||
return;
|
||||
}
|
||||
if(!running) running = true;
|
||||
|
||||
// metrics on
|
||||
Timer.Context handlerTimer = null;
|
||||
Timer.Context operationTimer = null;
|
||||
|
||||
NodeUpdateSchedulerEventWrapper eventWrapper;
|
||||
try {
|
||||
//if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
|
||||
if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE
|
||||
&& schedulerEvent instanceof NodeUpdateSchedulerEvent) {
|
||||
eventWrapper = new NodeUpdateSchedulerEventWrapper(
|
||||
(NodeUpdateSchedulerEvent)schedulerEvent);
|
||||
schedulerEvent = eventWrapper;
|
||||
updateQueueWithNodeUpdate(eventWrapper);
|
||||
} else if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
|
||||
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
|
||||
// check if having AM Container, update resource usage information
|
||||
AppRemovedSchedulerEvent appRemoveEvent =
|
||||
(AppRemovedSchedulerEvent) schedulerEvent;
|
||||
ApplicationAttemptId appAttemptId =
|
||||
appRemoveEvent.getApplicationAttemptID();
|
||||
String queue = appQueueMap.get(appAttemptId);
|
||||
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
if (! app.getLiveContainers().isEmpty()) { // have 0 or 1
|
||||
// should have one container which is AM container
|
||||
RMContainer rmc = app.getLiveContainers().iterator().next();
|
||||
updateQueueMetrics(queue,
|
||||
rmc.getContainer().getResource().getMemory(),
|
||||
rmc.getContainer().getResource().getVirtualCores());
|
||||
}
|
||||
}
|
||||
|
||||
handlerTimer = schedulerHandleTimer.time();
|
||||
operationTimer = schedulerHandleTimerMap
|
||||
.get(schedulerEvent.getType()).time();
|
||||
|
||||
scheduler.handle(schedulerEvent);
|
||||
} finally {
|
||||
if (handlerTimer != null) handlerTimer.stop();
|
||||
if (operationTimer != null) operationTimer.stop();
|
||||
schedulerHandleCounter.inc();
|
||||
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
|
||||
|
||||
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
|
||||
&& schedulerEvent instanceof AppRemovedSchedulerEvent) {
|
||||
SLSRunner.decreaseRemainingApps();
|
||||
AppRemovedSchedulerEvent appRemoveEvent =
|
||||
(AppRemovedSchedulerEvent) schedulerEvent;
|
||||
ApplicationAttemptId appAttemptId =
|
||||
appRemoveEvent.getApplicationAttemptID();
|
||||
appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
|
||||
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
|
||||
&& schedulerEvent instanceof AppAddedSchedulerEvent) {
|
||||
AppAddedSchedulerEvent appAddEvent =
|
||||
(AppAddedSchedulerEvent) schedulerEvent;
|
||||
String queueName = appAddEvent.getQueue();
|
||||
appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void updateQueueWithNodeUpdate(
|
||||
NodeUpdateSchedulerEventWrapper eventWrapper) {
|
||||
RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode();
|
||||
List<UpdatedContainerInfo> containerList = node.getContainerUpdates();
|
||||
for (UpdatedContainerInfo info : containerList) {
|
||||
for (ContainerStatus status : info.getCompletedContainers()) {
|
||||
ContainerId containerId = status.getContainerId();
|
||||
SchedulerAppReport app = scheduler.getSchedulerAppInfo(
|
||||
containerId.getApplicationAttemptId());
|
||||
|
||||
if (app == null) {
|
||||
// this happens for the AM container
|
||||
// The app have already removed when the NM sends the release
|
||||
// information.
|
||||
continue;
|
||||
}
|
||||
|
||||
String queue = appQueueMap.get(containerId.getApplicationAttemptId());
|
||||
int releasedMemory = 0, releasedVCores = 0;
|
||||
if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
|
||||
for (RMContainer rmc : app.getLiveContainers()) {
|
||||
if (rmc.getContainerId() == containerId) {
|
||||
releasedMemory += rmc.getContainer().getResource().getMemory();
|
||||
releasedVCores += rmc.getContainer()
|
||||
.getResource().getVirtualCores();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (status.getExitStatus() == ContainerExitStatus.ABORTED) {
|
||||
if (preemptionContainerMap.containsKey(containerId)) {
|
||||
Resource preResource = preemptionContainerMap.get(containerId);
|
||||
releasedMemory += preResource.getMemory();
|
||||
releasedVCores += preResource.getVirtualCores();
|
||||
preemptionContainerMap.remove(containerId);
|
||||
}
|
||||
}
|
||||
// update queue counters
|
||||
updateQueueMetrics(queue, releasedMemory, releasedVCores);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void updateQueueWithAllocateRequest(Allocation allocation,
|
||||
ApplicationAttemptId attemptId,
|
||||
List<ResourceRequest> resourceRequests,
|
||||
List<ContainerId> containerIds) throws IOException {
|
||||
// update queue information
|
||||
Resource pendingResource = Resources.createResource(0, 0);
|
||||
Resource allocatedResource = Resources.createResource(0, 0);
|
||||
String queueName = appQueueMap.get(attemptId);
|
||||
// container requested
|
||||
for (ResourceRequest request : resourceRequests) {
|
||||
if (request.getResourceName().equals(ResourceRequest.ANY)) {
|
||||
Resources.addTo(pendingResource,
|
||||
Resources.multiply(request.getCapability(),
|
||||
request.getNumContainers()));
|
||||
}
|
||||
}
|
||||
// container allocated
|
||||
for (Container container : allocation.getContainers()) {
|
||||
Resources.addTo(allocatedResource, container.getResource());
|
||||
Resources.subtractFrom(pendingResource, container.getResource());
|
||||
}
|
||||
// container released from AM
|
||||
SchedulerAppReport report = scheduler.getSchedulerAppInfo(attemptId);
|
||||
for (ContainerId containerId : containerIds) {
|
||||
Container container = null;
|
||||
for (RMContainer c : report.getLiveContainers()) {
|
||||
if (c.getContainerId().equals(containerId)) {
|
||||
container = c.getContainer();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (container != null) {
|
||||
// released allocated containers
|
||||
Resources.subtractFrom(allocatedResource, container.getResource());
|
||||
} else {
|
||||
for (RMContainer c : report.getReservedContainers()) {
|
||||
if (c.getContainerId().equals(containerId)) {
|
||||
container = c.getContainer();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (container != null) {
|
||||
// released reserved containers
|
||||
Resources.subtractFrom(pendingResource, container.getResource());
|
||||
}
|
||||
}
|
||||
}
|
||||
// containers released/preemption from scheduler
|
||||
Set<ContainerId> preemptionContainers = new HashSet<ContainerId>();
|
||||
if (allocation.getContainerPreemptions() != null) {
|
||||
preemptionContainers.addAll(allocation.getContainerPreemptions());
|
||||
}
|
||||
if (allocation.getStrictContainerPreemptions() != null) {
|
||||
preemptionContainers.addAll(allocation.getStrictContainerPreemptions());
|
||||
}
|
||||
if (! preemptionContainers.isEmpty()) {
|
||||
for (ContainerId containerId : preemptionContainers) {
|
||||
if (! preemptionContainerMap.containsKey(containerId)) {
|
||||
Container container = null;
|
||||
for (RMContainer c : report.getLiveContainers()) {
|
||||
if (c.getContainerId().equals(containerId)) {
|
||||
container = c.getContainer();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (container != null) {
|
||||
preemptionContainerMap.put(containerId, container.getResource());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// update metrics
|
||||
SortedMap<String, Counter> counterMap = metrics.getCounters();
|
||||
String names[] = new String[]{
|
||||
"counter.queue." + queueName + ".pending.memory",
|
||||
"counter.queue." + queueName + ".pending.cores",
|
||||
"counter.queue." + queueName + ".allocated.memory",
|
||||
"counter.queue." + queueName + ".allocated.cores"};
|
||||
int values[] = new int[]{pendingResource.getMemory(),
|
||||
pendingResource.getVirtualCores(),
|
||||
allocatedResource.getMemory(), allocatedResource.getVirtualCores()};
|
||||
for (int i = names.length - 1; i >= 0; i --) {
|
||||
if (! counterMap.containsKey(names[i])) {
|
||||
metrics.counter(names[i]);
|
||||
counterMap = metrics.getCounters();
|
||||
}
|
||||
counterMap.get(names[i]).inc(values[i]);
|
||||
}
|
||||
|
||||
queueLock.lock();
|
||||
try {
|
||||
if (! schedulerMetrics.isTracked(queueName)) {
|
||||
schedulerMetrics.trackQueue(queueName);
|
||||
}
|
||||
} finally {
|
||||
queueLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void tearDown() throws IOException {
|
||||
// close job runtime writer
|
||||
if (jobRuntimeLogBW != null) {
|
||||
jobRuntimeLogBW.close();
|
||||
}
|
||||
// shut pool
|
||||
if (pool != null) pool.shutdown();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void initMetrics() throws Exception {
|
||||
metrics = new MetricRegistry();
|
||||
// configuration
|
||||
metricsOutputDir = conf.get(SLSConfiguration.METRICS_OUTPUT_DIR);
|
||||
int metricsWebAddressPort = conf.getInt(
|
||||
SLSConfiguration.METRICS_WEB_ADDRESS_PORT,
|
||||
SLSConfiguration.METRICS_WEB_ADDRESS_PORT_DEFAULT);
|
||||
// create SchedulerMetrics for current scheduler
|
||||
String schedulerMetricsType = conf.get(scheduler.getClass().getName());
|
||||
Class schedulerMetricsClass = schedulerMetricsType == null?
|
||||
defaultSchedulerMetricsMap.get(scheduler.getClass()) :
|
||||
Class.forName(schedulerMetricsType);
|
||||
schedulerMetrics = (SchedulerMetrics)ReflectionUtils
|
||||
.newInstance(schedulerMetricsClass, new Configuration());
|
||||
schedulerMetrics.init(scheduler, metrics);
|
||||
|
||||
// register various metrics
|
||||
registerJvmMetrics();
|
||||
registerClusterResourceMetrics();
|
||||
registerContainerAppNumMetrics();
|
||||
registerSchedulerMetrics();
|
||||
|
||||
// .csv output
|
||||
initMetricsCSVOutput();
|
||||
|
||||
// start web app to provide real-time tracking
|
||||
web = new SLSWebApp(this, metricsWebAddressPort);
|
||||
web.start();
|
||||
|
||||
// a thread to update histogram timer
|
||||
pool = new ScheduledThreadPoolExecutor(2);
|
||||
pool.scheduleAtFixedRate(new HistogramsRunnable(), 0, 1000,
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
// a thread to output metrics for real-tiem tracking
|
||||
pool.scheduleAtFixedRate(new MetricsLogRunnable(), 0, 1000,
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
// application running information
|
||||
jobRuntimeLogBW = new BufferedWriter(
|
||||
new FileWriter(metricsOutputDir + "/jobruntime.csv"));
|
||||
jobRuntimeLogBW.write("JobID,real_start_time,real_end_time," +
|
||||
"simulate_start_time,simulate_end_time" + EOL);
|
||||
jobRuntimeLogBW.flush();
|
||||
}
|
||||
|
||||
private void registerJvmMetrics() {
|
||||
// add JVM gauges
|
||||
metrics.register("variable.jvm.free.memory",
|
||||
new Gauge<Long>() {
|
||||
@Override
|
||||
public Long getValue() {
|
||||
return Runtime.getRuntime().freeMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.jvm.max.memory",
|
||||
new Gauge<Long>() {
|
||||
@Override
|
||||
public Long getValue() {
|
||||
return Runtime.getRuntime().maxMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.jvm.total.memory",
|
||||
new Gauge<Long>() {
|
||||
@Override
|
||||
public Long getValue() {
|
||||
return Runtime.getRuntime().totalMemory();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private void registerClusterResourceMetrics() {
|
||||
metrics.register("variable.cluster.allocated.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return scheduler.getRootQueueMetrics().getAllocatedMB();
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.cluster.allocated.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return scheduler.getRootQueueMetrics().getAllocatedVirtualCores();
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.cluster.available.memory",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return scheduler.getRootQueueMetrics().getAvailableMB();
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.cluster.available.vcores",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return scheduler.getRootQueueMetrics().getAvailableVirtualCores();
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private void registerContainerAppNumMetrics() {
|
||||
metrics.register("variable.running.application",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return scheduler.getRootQueueMetrics().getAppsRunning();
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.running.container",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
if(scheduler == null || scheduler.getRootQueueMetrics() == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return scheduler.getRootQueueMetrics().getAllocatedContainers();
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
private void registerSchedulerMetrics() {
|
||||
samplerLock.lock();
|
||||
try {
|
||||
// counters for scheduler operations
|
||||
schedulerAllocateCounter = metrics.counter(
|
||||
"counter.scheduler.operation.allocate");
|
||||
schedulerHandleCounter = metrics.counter(
|
||||
"counter.scheduler.operation.handle");
|
||||
schedulerHandleCounterMap = new HashMap<SchedulerEventType, Counter>();
|
||||
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||
Counter counter = metrics.counter(
|
||||
"counter.scheduler.operation.handle." + e);
|
||||
schedulerHandleCounterMap.put(e, counter);
|
||||
}
|
||||
// timers for scheduler operations
|
||||
int timeWindowSize = conf.getInt(
|
||||
SLSConfiguration.METRICS_TIMER_WINDOW_SIZE,
|
||||
SLSConfiguration.METRICS_TIMER_WINDOW_SIZE_DEFAULT);
|
||||
schedulerAllocateTimer = new Timer(
|
||||
new SlidingWindowReservoir(timeWindowSize));
|
||||
schedulerHandleTimer = new Timer(
|
||||
new SlidingWindowReservoir(timeWindowSize));
|
||||
schedulerHandleTimerMap = new HashMap<SchedulerEventType, Timer>();
|
||||
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||
Timer timer = new Timer(new SlidingWindowReservoir(timeWindowSize));
|
||||
schedulerHandleTimerMap.put(e, timer);
|
||||
}
|
||||
// histogram for scheduler operations (Samplers)
|
||||
schedulerHistogramList = new ArrayList<Histogram>();
|
||||
histogramTimerMap = new HashMap<Histogram, Timer>();
|
||||
Histogram schedulerAllocateHistogram = new Histogram(
|
||||
new SlidingWindowReservoir(SAMPLING_SIZE));
|
||||
metrics.register("sampler.scheduler.operation.allocate.timecost",
|
||||
schedulerAllocateHistogram);
|
||||
schedulerHistogramList.add(schedulerAllocateHistogram);
|
||||
histogramTimerMap.put(schedulerAllocateHistogram, schedulerAllocateTimer);
|
||||
Histogram schedulerHandleHistogram = new Histogram(
|
||||
new SlidingWindowReservoir(SAMPLING_SIZE));
|
||||
metrics.register("sampler.scheduler.operation.handle.timecost",
|
||||
schedulerHandleHistogram);
|
||||
schedulerHistogramList.add(schedulerHandleHistogram);
|
||||
histogramTimerMap.put(schedulerHandleHistogram, schedulerHandleTimer);
|
||||
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||
Histogram histogram = new Histogram(
|
||||
new SlidingWindowReservoir(SAMPLING_SIZE));
|
||||
metrics.register(
|
||||
"sampler.scheduler.operation.handle." + e + ".timecost",
|
||||
histogram);
|
||||
schedulerHistogramList.add(histogram);
|
||||
histogramTimerMap.put(histogram, schedulerHandleTimerMap.get(e));
|
||||
}
|
||||
} finally {
|
||||
samplerLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void initMetricsCSVOutput() {
|
||||
int timeIntervalMS = conf.getInt(
|
||||
SLSConfiguration.METRICS_RECORD_INTERVAL_MS,
|
||||
SLSConfiguration.METRICS_RECORD_INTERVAL_MS_DEFAULT);
|
||||
File dir = new File(metricsOutputDir + "/metrics");
|
||||
if(! dir.exists()
|
||||
&& ! dir.mkdirs()) {
|
||||
LOG.error("Cannot create directory " + dir.getAbsoluteFile());
|
||||
}
|
||||
final CsvReporter reporter = CsvReporter.forRegistry(metrics)
|
||||
.formatFor(Locale.US)
|
||||
.convertRatesTo(TimeUnit.SECONDS)
|
||||
.convertDurationsTo(TimeUnit.MILLISECONDS)
|
||||
.build(new File(metricsOutputDir + "/metrics"));
|
||||
reporter.start(timeIntervalMS, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
class HistogramsRunnable implements Runnable {
|
||||
@Override
|
||||
public void run() {
|
||||
samplerLock.lock();
|
||||
try {
|
||||
for (Histogram histogram : schedulerHistogramList) {
|
||||
Timer timer = histogramTimerMap.get(histogram);
|
||||
histogram.update((int) timer.getSnapshot().getMean());
|
||||
}
|
||||
} finally {
|
||||
samplerLock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class MetricsLogRunnable implements Runnable {
|
||||
private boolean firstLine = true;
|
||||
public MetricsLogRunnable() {
|
||||
try {
|
||||
metricsLogBW = new BufferedWriter(
|
||||
new FileWriter(metricsOutputDir + "/realtimetrack.json"));
|
||||
metricsLogBW.write("[");
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
if(running) {
|
||||
// all WebApp to get real tracking json
|
||||
String metrics = web.generateRealTimeTrackingMetrics();
|
||||
// output
|
||||
try {
|
||||
if(firstLine) {
|
||||
metricsLogBW.write(metrics + EOL);
|
||||
firstLine = false;
|
||||
} else {
|
||||
metricsLogBW.write("," + metrics + EOL);
|
||||
}
|
||||
metricsLogBW.flush();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// the following functions are used by AMSimulator
|
||||
public void addAMRuntime(ApplicationId appId,
|
||||
long traceStartTimeMS, long traceEndTimeMS,
|
||||
long simulateStartTimeMS, long simulateEndTimeMS) {
|
||||
|
||||
try {
|
||||
// write job runtime information
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(appId).append(",").append(traceStartTimeMS).append(",")
|
||||
.append(traceEndTimeMS).append(",").append(simulateStartTimeMS)
|
||||
.append(",").append(simulateEndTimeMS);
|
||||
jobRuntimeLogBW.write(sb.toString() + EOL);
|
||||
jobRuntimeLogBW.flush();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private void updateQueueMetrics(String queue,
|
||||
int releasedMemory, int releasedVCores) {
|
||||
// update queue counters
|
||||
SortedMap<String, Counter> counterMap = metrics.getCounters();
|
||||
if (releasedMemory != 0) {
|
||||
String name = "counter.queue." + queue + ".allocated.memory";
|
||||
if (! counterMap.containsKey(name)) {
|
||||
metrics.counter(name);
|
||||
counterMap = metrics.getCounters();
|
||||
}
|
||||
counterMap.get(name).inc(-releasedMemory);
|
||||
}
|
||||
if (releasedVCores != 0) {
|
||||
String name = "counter.queue." + queue + ".allocated.cores";
|
||||
if (! counterMap.containsKey(name)) {
|
||||
metrics.counter(name);
|
||||
counterMap = metrics.getCounters();
|
||||
}
|
||||
counterMap.get(name).inc(-releasedVCores);
|
||||
}
|
||||
}
|
||||
|
||||
public void setQueueSet(Set<String> queues) {
|
||||
this.queueSet = queues;
|
||||
}
|
||||
|
||||
public Set<String> getQueueSet() {
|
||||
return this.queueSet;
|
||||
}
|
||||
|
||||
public void setTrackedAppSet(Set<String> apps) {
|
||||
this.trackedAppSet = apps;
|
||||
}
|
||||
|
||||
public Set<String> getTrackedAppSet() {
|
||||
return this.trackedAppSet;
|
||||
}
|
||||
|
||||
public MetricRegistry getMetrics() {
|
||||
return metrics;
|
||||
}
|
||||
|
||||
public SchedulerMetrics getSchedulerMetrics() {
|
||||
return schedulerMetrics;
|
||||
}
|
||||
|
||||
// API open to out classes
|
||||
public void addTrackedApp(ApplicationAttemptId appAttemptId,
|
||||
String oldAppId) {
|
||||
if (metricsON) {
|
||||
schedulerMetrics.trackApp(appAttemptId, oldAppId);
|
||||
}
|
||||
}
|
||||
|
||||
public void removeTrackedApp(ApplicationAttemptId appAttemptId,
|
||||
String oldAppId) {
|
||||
if (metricsON) {
|
||||
schedulerMetrics.untrackApp(appAttemptId, oldAppId);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Configuration getConf() {
|
||||
return conf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reinitialize(Configuration entries, RMContext rmContext)
|
||||
throws IOException {
|
||||
scheduler.reinitialize(entries, rmContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recover(RMStateStore.RMState rmState) throws Exception {
|
||||
scheduler.recover(rmState);
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueueInfo getQueueInfo(String s, boolean b, boolean b2)
|
||||
throws IOException {
|
||||
return scheduler.getQueueInfo(s, b, b2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<QueueUserACLInfo> getQueueUserAclInfo() {
|
||||
return scheduler.getQueueUserAclInfo();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Resource getMinimumResourceCapability() {
|
||||
return scheduler.getMinimumResourceCapability();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Resource getMaximumResourceCapability() {
|
||||
return scheduler.getMaximumResourceCapability();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumClusterNodes() {
|
||||
return scheduler.getNumClusterNodes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SchedulerNodeReport getNodeReport(NodeId nodeId) {
|
||||
return scheduler.getNodeReport(nodeId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SchedulerAppReport getSchedulerAppInfo(
|
||||
ApplicationAttemptId attemptId) {
|
||||
return scheduler.getSchedulerAppInfo(attemptId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueueMetrics getRootQueueMetrics() {
|
||||
return scheduler.getRootQueueMetrics();
|
||||
}
|
||||
}
|
@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||
.SchedulerAppReport;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
|
||||
public abstract class SchedulerMetrics {
|
||||
protected ResourceScheduler scheduler;
|
||||
protected Set<String> trackedQueues;
|
||||
protected MetricRegistry metrics;
|
||||
protected Set<String> appTrackedMetrics;
|
||||
protected Set<String> queueTrackedMetrics;
|
||||
|
||||
public SchedulerMetrics() {
|
||||
appTrackedMetrics = new HashSet<String>();
|
||||
appTrackedMetrics.add("live.containers");
|
||||
appTrackedMetrics.add("reserved.containers");
|
||||
queueTrackedMetrics = new HashSet<String>();
|
||||
}
|
||||
|
||||
public void init(ResourceScheduler scheduler, MetricRegistry metrics) {
|
||||
this.scheduler = scheduler;
|
||||
this.trackedQueues = new HashSet<String>();
|
||||
this.metrics = metrics;
|
||||
}
|
||||
|
||||
public void trackApp(final ApplicationAttemptId appAttemptId,
|
||||
String oldAppId) {
|
||||
metrics.register("variable.app." + oldAppId + ".live.containers",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
return app.getLiveContainers().size();
|
||||
}
|
||||
}
|
||||
);
|
||||
metrics.register("variable.app." + oldAppId + ".reserved.containers",
|
||||
new Gauge<Integer>() {
|
||||
@Override
|
||||
public Integer getValue() {
|
||||
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
return app.getReservedContainers().size();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
public void untrackApp(ApplicationAttemptId appAttemptId,
|
||||
String oldAppId) {
|
||||
for (String m : appTrackedMetrics) {
|
||||
metrics.remove("variable.app." + oldAppId + "." + m);
|
||||
}
|
||||
}
|
||||
|
||||
public abstract void trackQueue(String queueName);
|
||||
|
||||
public void untrackQueue(String queueName) {
|
||||
for (String m : queueTrackedMetrics) {
|
||||
metrics.remove("variable.queue." + queueName + "." + m);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isTracked(String queueName) {
|
||||
return trackedQueues.contains(queueName);
|
||||
}
|
||||
|
||||
public Set<String> getAppTrackedMetrics() {
|
||||
return appTrackedMetrics;
|
||||
}
|
||||
public Set<String> getQueueTrackedMetrics() {
|
||||
return queueTrackedMetrics;
|
||||
}
|
||||
}
|
@ -0,0 +1,183 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.DelayQueue;
|
||||
import java.util.concurrent.Delayed;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
|
||||
public class TaskRunner {
|
||||
public abstract static class Task implements Runnable, Delayed {
|
||||
private long start;
|
||||
private long end;
|
||||
private long nextRun;
|
||||
private long startTime;
|
||||
private long endTime;
|
||||
private long repeatInterval;
|
||||
private Queue<Task> queue;
|
||||
|
||||
public Task(){}
|
||||
|
||||
//values in milliseconds, start/end are milliseconds from now
|
||||
public void init(long startTime, long endTime, long repeatInterval) {
|
||||
if (endTime - startTime < 0) {
|
||||
throw new IllegalArgumentException(MessageFormat.format(
|
||||
"endTime[{0}] cannot be smaller than startTime[{1}]", endTime,
|
||||
startTime));
|
||||
}
|
||||
if (repeatInterval < 0) {
|
||||
throw new IllegalArgumentException(MessageFormat.format(
|
||||
"repeatInterval[{0}] cannot be less than 1", repeatInterval));
|
||||
}
|
||||
if ((endTime - startTime) % repeatInterval != 0) {
|
||||
throw new IllegalArgumentException(MessageFormat.format(
|
||||
"Invalid parameters: (endTime[{0}] - startTime[{1}]) " +
|
||||
"% repeatInterval[{2}] != 0",
|
||||
endTime, startTime, repeatInterval));
|
||||
}
|
||||
start = startTime;
|
||||
end = endTime;
|
||||
this.repeatInterval = repeatInterval;
|
||||
}
|
||||
|
||||
private void timeRebase(long now) {
|
||||
startTime = now + start;
|
||||
endTime = now + end;
|
||||
this.nextRun = startTime;
|
||||
}
|
||||
|
||||
//values in milliseconds, start is milliseconds from now
|
||||
//it only executes firstStep()
|
||||
public void init(long startTime) {
|
||||
init(startTime, startTime, 1);
|
||||
}
|
||||
|
||||
private void setQueue(Queue<Task> queue) {
|
||||
this.queue = queue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void run() {
|
||||
try {
|
||||
if (nextRun == startTime) {
|
||||
firstStep();
|
||||
nextRun += repeatInterval;
|
||||
if (nextRun <= endTime) {
|
||||
queue.add(this);
|
||||
}
|
||||
} else if (nextRun < endTime) {
|
||||
middleStep();
|
||||
nextRun += repeatInterval;
|
||||
queue.add(this);
|
||||
} else {
|
||||
lastStep();
|
||||
}
|
||||
} catch (YarnException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDelay(TimeUnit unit) {
|
||||
return unit.convert(nextRun - System.currentTimeMillis(),
|
||||
TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Delayed o) {
|
||||
if (!(o instanceof Task)) {
|
||||
throw new IllegalArgumentException("Parameter must be a Task instance");
|
||||
}
|
||||
Task other = (Task) o;
|
||||
return (int) Math.signum(nextRun - other.nextRun);
|
||||
}
|
||||
|
||||
|
||||
public abstract void firstStep()
|
||||
throws YarnException, IOException, InterruptedException;
|
||||
|
||||
public abstract void middleStep()
|
||||
throws YarnException, InterruptedException, IOException;
|
||||
|
||||
public abstract void lastStep() throws YarnException;
|
||||
|
||||
public void setEndTime(long et) {
|
||||
endTime = et;
|
||||
}
|
||||
}
|
||||
|
||||
private DelayQueue queue;
|
||||
private int threadPoolSize;
|
||||
private ThreadPoolExecutor executor;
|
||||
private long startTimeMS = 0;
|
||||
|
||||
public TaskRunner() {
|
||||
queue = new DelayQueue();
|
||||
}
|
||||
|
||||
public void setQueueSize(int threadPoolSize) {
|
||||
this.threadPoolSize = threadPoolSize;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void start() {
|
||||
if (executor != null) {
|
||||
throw new IllegalStateException("Already started");
|
||||
}
|
||||
DelayQueue preStartQueue = queue;
|
||||
|
||||
queue = new DelayQueue();
|
||||
executor = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 0,
|
||||
TimeUnit.MILLISECONDS, queue);
|
||||
executor.prestartAllCoreThreads();
|
||||
|
||||
startTimeMS = System.currentTimeMillis();
|
||||
for (Object d : preStartQueue) {
|
||||
schedule((Task) d, startTimeMS);
|
||||
}
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
executor.shutdownNow();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void schedule(Task task, long timeNow) {
|
||||
task.timeRebase(timeNow);
|
||||
task.setQueue(queue);
|
||||
queue.add(task);
|
||||
}
|
||||
|
||||
public void schedule(Task task) {
|
||||
schedule(task, System.currentTimeMillis());
|
||||
}
|
||||
|
||||
public long getStartTimeMS() {
|
||||
return this.startTimeMS;
|
||||
}
|
||||
}
|
@ -0,0 +1,133 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.sls.utils;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.tools.rumen.JobTraceReader;
|
||||
import org.apache.hadoop.tools.rumen.LoggedJob;
|
||||
import org.apache.hadoop.tools.rumen.LoggedTask;
|
||||
import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class SLSUtils {
|
||||
|
||||
public static String[] getRackHostName(String hostname) {
|
||||
hostname = hostname.substring(1);
|
||||
return hostname.split("/");
|
||||
}
|
||||
|
||||
/**
|
||||
* parse the rumen trace file, return each host name
|
||||
*/
|
||||
public static Set<String> parseNodesFromRumenTrace(String jobTrace)
|
||||
throws IOException {
|
||||
Set<String> nodeSet = new HashSet<String>();
|
||||
|
||||
File fin = new File(jobTrace);
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.defaultFS", "file:///");
|
||||
JobTraceReader reader = new JobTraceReader(
|
||||
new Path(fin.getAbsolutePath()), conf);
|
||||
try {
|
||||
LoggedJob job = null;
|
||||
while ((job = reader.getNext()) != null) {
|
||||
for(LoggedTask mapTask : job.getMapTasks()) {
|
||||
// select the last attempt
|
||||
LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
|
||||
.get(mapTask.getAttempts().size() - 1);
|
||||
nodeSet.add(taskAttempt.getHostName().getValue());
|
||||
}
|
||||
for(LoggedTask reduceTask : job.getReduceTasks()) {
|
||||
LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
|
||||
.get(reduceTask.getAttempts().size() - 1);
|
||||
nodeSet.add(taskAttempt.getHostName().getValue());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
return nodeSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse the sls trace file, return each host name
|
||||
*/
|
||||
public static Set<String> parseNodesFromSLSTrace(String jobTrace)
|
||||
throws IOException {
|
||||
Set<String> nodeSet = new HashSet<String>();
|
||||
JsonFactory jsonF = new JsonFactory();
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Reader input = new FileReader(jobTrace);
|
||||
try {
|
||||
Iterator<Map> i = mapper.readValues(
|
||||
jsonF.createJsonParser(input), Map.class);
|
||||
while (i.hasNext()) {
|
||||
Map jsonE = i.next();
|
||||
List tasks = (List) jsonE.get("job.tasks");
|
||||
for (Object o : tasks) {
|
||||
Map jsonTask = (Map) o;
|
||||
String hostname = jsonTask.get("container.host").toString();
|
||||
nodeSet.add(hostname);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
return nodeSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse the input node file, return each host name
|
||||
*/
|
||||
public static Set<String> parseNodesFromNodeFile(String nodeFile)
|
||||
throws IOException {
|
||||
Set<String> nodeSet = new HashSet<String>();
|
||||
JsonFactory jsonF = new JsonFactory();
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
Reader input = new FileReader(nodeFile);
|
||||
try {
|
||||
Iterator<Map> i = mapper.readValues(
|
||||
jsonF.createJsonParser(input), Map.class);
|
||||
while (i.hasNext()) {
|
||||
Map jsonE = i.next();
|
||||
String rack = "/" + jsonE.get("rack");
|
||||
List tasks = (List) jsonE.get("nodes");
|
||||
for (Object o : tasks) {
|
||||
Map jsonNode = (Map) o;
|
||||
nodeSet.add(rack + "/" + jsonNode.get("node"));
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
return nodeSet;
|
||||
}
|
||||
}
|
@ -0,0 +1,527 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.web;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event
|
||||
.SchedulerEventType;
|
||||
import org.mortbay.jetty.Handler;
|
||||
import org.mortbay.jetty.Server;
|
||||
import org.mortbay.jetty.handler.AbstractHandler;
|
||||
import org.mortbay.jetty.Request;
|
||||
|
||||
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
|
||||
import org.apache.hadoop.yarn.sls.scheduler.SchedulerMetrics;
|
||||
import com.codahale.metrics.Counter;
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.Histogram;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import org.mortbay.jetty.handler.ResourceHandler;
|
||||
|
||||
public class SLSWebApp extends HttpServlet {
|
||||
private static final long serialVersionUID = 1905162041950251407L;
|
||||
private transient Server server;
|
||||
private transient ResourceSchedulerWrapper wrapper;
|
||||
private transient MetricRegistry metrics;
|
||||
private transient SchedulerMetrics schedulerMetrics;
|
||||
// metrics objects
|
||||
private transient Gauge jvmFreeMemoryGauge;
|
||||
private transient Gauge jvmMaxMemoryGauge;
|
||||
private transient Gauge jvmTotalMemoryGauge;
|
||||
private transient Gauge numRunningAppsGauge;
|
||||
private transient Gauge numRunningContainersGauge;
|
||||
private transient Gauge allocatedMemoryGauge;
|
||||
private transient Gauge allocatedVCoresGauge;
|
||||
private transient Gauge availableMemoryGauge;
|
||||
private transient Gauge availableVCoresGauge;
|
||||
private transient Histogram allocateTimecostHistogram;
|
||||
private transient Histogram handleTimecostHistogram;
|
||||
private Map<SchedulerEventType, Histogram> handleOperTimecostHistogramMap;
|
||||
private Map<String, Counter> queueAllocatedMemoryCounterMap;
|
||||
private Map<String, Counter> queueAllocatedVCoresCounterMap;
|
||||
private int port;
|
||||
private int ajaxUpdateTimeMS = 1000;
|
||||
// html page templates
|
||||
private String simulateInfoTemplate;
|
||||
private String simulateTemplate;
|
||||
private String trackTemplate;
|
||||
|
||||
{
|
||||
// load templates
|
||||
ClassLoader cl = Thread.currentThread().getContextClassLoader();
|
||||
try {
|
||||
simulateInfoTemplate = FileUtils.readFileToString(new File(
|
||||
cl.getResource("simulate.info.html.template").getFile()));
|
||||
simulateTemplate = FileUtils.readFileToString(new File(
|
||||
cl.getResource("simulate.html.template").getFile()));
|
||||
trackTemplate = FileUtils.readFileToString(new File(
|
||||
cl.getResource("track.html.template").getFile()));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public SLSWebApp(ResourceSchedulerWrapper wrapper, int metricsAddressPort) {
|
||||
this.wrapper = wrapper;
|
||||
metrics = wrapper.getMetrics();
|
||||
handleOperTimecostHistogramMap =
|
||||
new HashMap<SchedulerEventType, Histogram>();
|
||||
queueAllocatedMemoryCounterMap = new HashMap<String, Counter>();
|
||||
queueAllocatedVCoresCounterMap = new HashMap<String, Counter>();
|
||||
schedulerMetrics = wrapper.getSchedulerMetrics();
|
||||
port = metricsAddressPort;
|
||||
}
|
||||
|
||||
public void start() throws Exception {
|
||||
// static files
|
||||
final ResourceHandler staticHandler = new ResourceHandler();
|
||||
staticHandler.setResourceBase("html");
|
||||
|
||||
Handler handler = new AbstractHandler() {
|
||||
@Override
|
||||
public void handle(String target, HttpServletRequest request,
|
||||
HttpServletResponse response, int dispatch) {
|
||||
try{
|
||||
// timeunit
|
||||
int timeunit = 1000; // second, divide millionsecond / 1000
|
||||
String timeunitLabel = "second";
|
||||
if (request.getParameter("u")!= null &&
|
||||
request.getParameter("u").equalsIgnoreCase("m")) {
|
||||
timeunit = 1000 * 60;
|
||||
timeunitLabel = "minute";
|
||||
}
|
||||
|
||||
// http request
|
||||
if (target.equals("/")) {
|
||||
printPageIndex(request, response);
|
||||
} else if (target.equals("/simulate")) {
|
||||
printPageSimulate(request, response, timeunit, timeunitLabel);
|
||||
} else if (target.equals("/track")) {
|
||||
printPageTrack(request, response, timeunit, timeunitLabel);
|
||||
} else
|
||||
// js/css request
|
||||
if (target.startsWith("/js") || target.startsWith("/css")) {
|
||||
response.setCharacterEncoding("utf-8");
|
||||
staticHandler.handle(target, request, response, dispatch);
|
||||
} else
|
||||
// json request
|
||||
if (target.equals("/simulateMetrics")) {
|
||||
printJsonMetrics(request, response);
|
||||
} else if (target.equals("/trackMetrics")) {
|
||||
printJsonTrack(request, response);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
server = new Server(port);
|
||||
server.setHandler(handler);
|
||||
|
||||
server.start();
|
||||
}
|
||||
|
||||
public void stop() throws Exception {
|
||||
if (server != null) {
|
||||
server.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* index html page, show simulation info
|
||||
* path ""
|
||||
* @param request http request
|
||||
* @param response http response
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
private void printPageIndex(HttpServletRequest request,
|
||||
HttpServletResponse response) throws IOException {
|
||||
response.setContentType("text/html");
|
||||
response.setStatus(HttpServletResponse.SC_OK);
|
||||
|
||||
String simulateInfo;
|
||||
if (SLSRunner.simulateInfoMap.isEmpty()) {
|
||||
String empty = "<tr><td colspan='2' align='center'>" +
|
||||
"No information available</td></tr>";
|
||||
simulateInfo = MessageFormat.format(simulateInfoTemplate, empty);
|
||||
} else {
|
||||
StringBuilder info = new StringBuilder();
|
||||
for (Map.Entry<String, Object> entry :
|
||||
SLSRunner.simulateInfoMap.entrySet()) {
|
||||
info.append("<tr>");
|
||||
info.append("<td class='td1'>").append(entry.getKey()).append("</td>");
|
||||
info.append("<td class='td2'>").append(entry.getValue())
|
||||
.append("</td>");
|
||||
info.append("</tr>");
|
||||
}
|
||||
simulateInfo =
|
||||
MessageFormat.format(simulateInfoTemplate, info.toString());
|
||||
}
|
||||
response.getWriter().println(simulateInfo);
|
||||
|
||||
((Request) request).setHandled(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* simulate html page, show several real-runtime chart
|
||||
* path "/simulate"
|
||||
* use d3.js
|
||||
* @param request http request
|
||||
* @param response http response
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
private void printPageSimulate(HttpServletRequest request,
|
||||
HttpServletResponse response, int timeunit,
|
||||
String timeunitLabel)
|
||||
throws IOException {
|
||||
response.setContentType("text/html");
|
||||
response.setStatus(HttpServletResponse.SC_OK);
|
||||
|
||||
// queues {0}
|
||||
Set<String> queues = wrapper.getQueueSet();
|
||||
StringBuilder queueInfo = new StringBuilder();
|
||||
|
||||
int i = 0;
|
||||
for (String queue : queues) {
|
||||
queueInfo.append("legends[4][").append(i).append("] = 'queue.")
|
||||
.append(queue).append(".allocated.memory';");
|
||||
queueInfo.append("legends[5][").append(i).append("] = 'queue.")
|
||||
.append(queue).append(".allocated.vcores';");
|
||||
i ++;
|
||||
}
|
||||
|
||||
// time unit label {1}
|
||||
// time unit {2}
|
||||
// ajax update time interval {3}
|
||||
String simulateInfo = MessageFormat.format(simulateTemplate,
|
||||
queueInfo.toString(), timeunitLabel, "" + timeunit,
|
||||
"" + ajaxUpdateTimeMS);
|
||||
response.getWriter().println(simulateInfo);
|
||||
|
||||
((Request) request).setHandled(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* html page for tracking one queue or job
|
||||
* use d3.js
|
||||
* @param request http request
|
||||
* @param response http response
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
private void printPageTrack(HttpServletRequest request,
|
||||
HttpServletResponse response, int timeunit,
|
||||
String timeunitLabel)
|
||||
throws IOException {
|
||||
response.setContentType("text/html");
|
||||
response.setStatus(HttpServletResponse.SC_OK);
|
||||
|
||||
// tracked queues {0}
|
||||
StringBuilder trackedQueueInfo = new StringBuilder();
|
||||
Set<String> trackedQueues = wrapper.getQueueSet();
|
||||
for(String queue : trackedQueues) {
|
||||
trackedQueueInfo.append("<option value='Queue ").append(queue)
|
||||
.append("'>").append(queue).append("</option>");
|
||||
}
|
||||
|
||||
// tracked apps {1}
|
||||
StringBuilder trackedAppInfo = new StringBuilder();
|
||||
Set<String> trackedApps = wrapper.getTrackedAppSet();
|
||||
for(String job : trackedApps) {
|
||||
trackedAppInfo.append("<option value='Job ").append(job)
|
||||
.append("'>").append(job).append("</option>");
|
||||
}
|
||||
|
||||
// timeunit label {2}
|
||||
// time unit {3}
|
||||
// ajax update time {4}
|
||||
// final html
|
||||
String trackInfo = MessageFormat.format(trackTemplate,
|
||||
trackedQueueInfo.toString(), trackedAppInfo.toString(),
|
||||
timeunitLabel, "" + timeunit, "" + ajaxUpdateTimeMS);
|
||||
response.getWriter().println(trackInfo);
|
||||
|
||||
((Request) request).setHandled(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* package metrics information in a json and return
|
||||
* @param request http request
|
||||
* @param response http response
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
private void printJsonMetrics(HttpServletRequest request,
|
||||
HttpServletResponse response)
|
||||
throws IOException {
|
||||
response.setContentType("text/json");
|
||||
response.setStatus(HttpServletResponse.SC_OK);
|
||||
|
||||
response.getWriter().println(generateRealTimeTrackingMetrics());
|
||||
((Request) request).setHandled(true);
|
||||
}
|
||||
|
||||
public String generateRealTimeTrackingMetrics() {
|
||||
// JVM
|
||||
double jvmFreeMemoryGB, jvmMaxMemoryGB, jvmTotalMemoryGB;
|
||||
if (jvmFreeMemoryGauge == null &&
|
||||
metrics.getGauges().containsKey("variable.jvm.free.memory")) {
|
||||
jvmFreeMemoryGauge = metrics.getGauges().get("variable.jvm.free.memory");
|
||||
}
|
||||
if (jvmMaxMemoryGauge == null &&
|
||||
metrics.getGauges().containsKey("variable.jvm.max.memory")) {
|
||||
jvmMaxMemoryGauge = metrics.getGauges().get("variable.jvm.max.memory");
|
||||
}
|
||||
if (jvmTotalMemoryGauge == null &&
|
||||
metrics.getGauges().containsKey("variable.jvm.total.memory")) {
|
||||
jvmTotalMemoryGauge = metrics.getGauges()
|
||||
.get("variable.jvm.total.memory");
|
||||
}
|
||||
jvmFreeMemoryGB = jvmFreeMemoryGauge == null ? 0 :
|
||||
Double.parseDouble(jvmFreeMemoryGauge.getValue().toString())
|
||||
/1024/1024/1024;
|
||||
jvmMaxMemoryGB = jvmMaxMemoryGauge == null ? 0 :
|
||||
Double.parseDouble(jvmMaxMemoryGauge.getValue().toString())
|
||||
/1024/1024/1024;
|
||||
jvmTotalMemoryGB = jvmTotalMemoryGauge == null ? 0 :
|
||||
Double.parseDouble(jvmTotalMemoryGauge.getValue().toString())
|
||||
/1024/1024/1024;
|
||||
|
||||
// number of running applications/containers
|
||||
String numRunningApps, numRunningContainers;
|
||||
if (numRunningAppsGauge == null &&
|
||||
metrics.getGauges().containsKey("variable.running.application")) {
|
||||
numRunningAppsGauge =
|
||||
metrics.getGauges().get("variable.running.application");
|
||||
}
|
||||
if (numRunningContainersGauge == null &&
|
||||
metrics.getGauges().containsKey("variable.running.container")) {
|
||||
numRunningContainersGauge =
|
||||
metrics.getGauges().get("variable.running.container");
|
||||
}
|
||||
numRunningApps = numRunningAppsGauge == null ? "0" :
|
||||
numRunningAppsGauge.getValue().toString();
|
||||
numRunningContainers = numRunningContainersGauge == null ? "0" :
|
||||
numRunningContainersGauge.getValue().toString();
|
||||
|
||||
// cluster available/allocate resource
|
||||
double allocatedMemoryGB, allocatedVCoresGB,
|
||||
availableMemoryGB, availableVCoresGB;
|
||||
if (allocatedMemoryGauge == null &&
|
||||
metrics.getGauges()
|
||||
.containsKey("variable.cluster.allocated.memory")) {
|
||||
allocatedMemoryGauge = metrics.getGauges()
|
||||
.get("variable.cluster.allocated.memory");
|
||||
}
|
||||
if (allocatedVCoresGauge == null &&
|
||||
metrics.getGauges()
|
||||
.containsKey("variable.cluster.allocated.vcores")) {
|
||||
allocatedVCoresGauge = metrics.getGauges()
|
||||
.get("variable.cluster.allocated.vcores");
|
||||
}
|
||||
if (availableMemoryGauge == null &&
|
||||
metrics.getGauges()
|
||||
.containsKey("variable.cluster.available.memory")) {
|
||||
availableMemoryGauge = metrics.getGauges()
|
||||
.get("variable.cluster.available.memory");
|
||||
}
|
||||
if (availableVCoresGauge == null &&
|
||||
metrics.getGauges()
|
||||
.containsKey("variable.cluster.available.vcores")) {
|
||||
availableVCoresGauge = metrics.getGauges()
|
||||
.get("variable.cluster.available.vcores");
|
||||
}
|
||||
allocatedMemoryGB = allocatedMemoryGauge == null ? 0 :
|
||||
Double.parseDouble(allocatedMemoryGauge.getValue().toString())/1024;
|
||||
allocatedVCoresGB = allocatedVCoresGauge == null ? 0 :
|
||||
Double.parseDouble(allocatedVCoresGauge.getValue().toString());
|
||||
availableMemoryGB = availableMemoryGauge == null ? 0 :
|
||||
Double.parseDouble(availableMemoryGauge.getValue().toString())/1024;
|
||||
availableVCoresGB = availableVCoresGauge == null ? 0 :
|
||||
Double.parseDouble(availableVCoresGauge.getValue().toString());
|
||||
|
||||
// scheduler operation
|
||||
double allocateTimecost, handleTimecost;
|
||||
if (allocateTimecostHistogram == null &&
|
||||
metrics.getHistograms().containsKey(
|
||||
"sampler.scheduler.operation.allocate.timecost")) {
|
||||
allocateTimecostHistogram = metrics.getHistograms()
|
||||
.get("sampler.scheduler.operation.allocate.timecost");
|
||||
}
|
||||
if (handleTimecostHistogram == null &&
|
||||
metrics.getHistograms().containsKey(
|
||||
"sampler.scheduler.operation.handle.timecost")) {
|
||||
handleTimecostHistogram = metrics.getHistograms()
|
||||
.get("sampler.scheduler.operation.handle.timecost");
|
||||
}
|
||||
allocateTimecost = allocateTimecostHistogram == null ? 0.0 :
|
||||
allocateTimecostHistogram.getSnapshot().getMean()/1000000;
|
||||
handleTimecost = handleTimecostHistogram == null ? 0.0 :
|
||||
handleTimecostHistogram.getSnapshot().getMean()/1000000;
|
||||
// various handle operation
|
||||
Map<SchedulerEventType, Double> handleOperTimecostMap =
|
||||
new HashMap<SchedulerEventType, Double>();
|
||||
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||
String key = "sampler.scheduler.operation.handle." + e + ".timecost";
|
||||
if (! handleOperTimecostHistogramMap.containsKey(e) &&
|
||||
metrics.getHistograms().containsKey(key)) {
|
||||
handleOperTimecostHistogramMap.put(e, metrics.getHistograms().get(key));
|
||||
}
|
||||
double timecost = handleOperTimecostHistogramMap.containsKey(e) ?
|
||||
handleOperTimecostHistogramMap.get(e).getSnapshot().getMean()/1000000
|
||||
: 0;
|
||||
handleOperTimecostMap.put(e, timecost);
|
||||
}
|
||||
|
||||
// allocated resource for each queue
|
||||
Map<String, Double> queueAllocatedMemoryMap = new HashMap<String, Double>();
|
||||
Map<String, Long> queueAllocatedVCoresMap = new HashMap<String, Long>();
|
||||
for (String queue : wrapper.getQueueSet()) {
|
||||
// memory
|
||||
String key = "counter.queue." + queue + ".allocated.memory";
|
||||
if (! queueAllocatedMemoryCounterMap.containsKey(queue) &&
|
||||
metrics.getCounters().containsKey(key)) {
|
||||
queueAllocatedMemoryCounterMap.put(queue,
|
||||
metrics.getCounters().get(key));
|
||||
}
|
||||
double queueAllocatedMemoryGB =
|
||||
queueAllocatedMemoryCounterMap.containsKey(queue) ?
|
||||
queueAllocatedMemoryCounterMap.get(queue).getCount()/1024.0
|
||||
: 0;
|
||||
queueAllocatedMemoryMap.put(queue, queueAllocatedMemoryGB);
|
||||
// vCores
|
||||
key = "counter.queue." + queue + ".allocated.cores";
|
||||
if (! queueAllocatedVCoresCounterMap.containsKey(queue) &&
|
||||
metrics.getCounters().containsKey(key)) {
|
||||
queueAllocatedVCoresCounterMap.put(
|
||||
queue, metrics.getCounters().get(key));
|
||||
}
|
||||
long queueAllocatedVCores =
|
||||
queueAllocatedVCoresCounterMap.containsKey(queue) ?
|
||||
queueAllocatedVCoresCounterMap.get(queue).getCount(): 0;
|
||||
queueAllocatedVCoresMap.put(queue, queueAllocatedVCores);
|
||||
}
|
||||
|
||||
// package results
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("{");
|
||||
sb.append("\"time\":" ).append(System.currentTimeMillis())
|
||||
.append(",\"jvm.free.memory\":").append(jvmFreeMemoryGB)
|
||||
.append(",\"jvm.max.memory\":").append(jvmMaxMemoryGB)
|
||||
.append(",\"jvm.total.memory\":").append(jvmTotalMemoryGB)
|
||||
.append(",\"running.applications\":").append(numRunningApps)
|
||||
.append(",\"running.containers\":").append(numRunningContainers)
|
||||
.append(",\"cluster.allocated.memory\":").append(allocatedMemoryGB)
|
||||
.append(",\"cluster.allocated.vcores\":").append(allocatedVCoresGB)
|
||||
.append(",\"cluster.available.memory\":").append(availableMemoryGB)
|
||||
.append(",\"cluster.available.vcores\":").append(availableVCoresGB);
|
||||
|
||||
for (String queue : wrapper.getQueueSet()) {
|
||||
sb.append(",\"queue.").append(queue).append(".allocated.memory\":")
|
||||
.append(queueAllocatedMemoryMap.get(queue));
|
||||
sb.append(",\"queue.").append(queue).append(".allocated.vcores\":")
|
||||
.append(queueAllocatedVCoresMap.get(queue));
|
||||
}
|
||||
// scheduler allocate & handle
|
||||
sb.append(",\"scheduler.allocate.timecost\":").append(allocateTimecost);
|
||||
sb.append(",\"scheduler.handle.timecost\":").append(handleTimecost);
|
||||
for (SchedulerEventType e : SchedulerEventType.values()) {
|
||||
sb.append(",\"scheduler.handle-").append(e).append(".timecost\":")
|
||||
.append(handleOperTimecostMap.get(e));
|
||||
}
|
||||
sb.append("}");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* package metrics information for one tracked queue/app
|
||||
* only support FairScheduler currently
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
private void printJsonTrack(HttpServletRequest request,
|
||||
HttpServletResponse response) throws IOException {
|
||||
response.setContentType("text/json");
|
||||
response.setStatus(HttpServletResponse.SC_OK);
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if(schedulerMetrics instanceof FairSchedulerMetrics) {
|
||||
String para = request.getParameter("t");
|
||||
if (para.startsWith("Job ")) {
|
||||
String appId = para.substring("Job ".length());
|
||||
|
||||
sb.append("{");
|
||||
sb.append("\"time\": ").append(System.currentTimeMillis()).append(",");
|
||||
sb.append("\"appId\": \"").append(appId).append("\"");
|
||||
for(String metric : this.schedulerMetrics.getAppTrackedMetrics()) {
|
||||
String key = "variable.app." + appId + "." + metric;
|
||||
sb.append(",\"").append(metric).append("\": ");
|
||||
if (metrics.getGauges().containsKey(key)) {
|
||||
double memoryGB =
|
||||
Double.parseDouble(
|
||||
metrics.getGauges().get(key).getValue().toString())
|
||||
/ 1024;
|
||||
sb.append(memoryGB);
|
||||
} else {
|
||||
sb.append(-1);
|
||||
}
|
||||
}
|
||||
sb.append("}");
|
||||
|
||||
} else if(para.startsWith("Queue ")) {
|
||||
String queueName = para.substring("Queue ".length());
|
||||
sb.append("{");
|
||||
sb.append("\"time\": ").append(System.currentTimeMillis()).append(",");
|
||||
sb.append("\"queueName\": \"").append(queueName).append("\"");
|
||||
for(String metric : this.schedulerMetrics.getQueueTrackedMetrics()) {
|
||||
String key = "variable.queue." + queueName + "." + metric;
|
||||
sb.append(",\"").append(metric).append("\": ");
|
||||
if (metrics.getGauges().containsKey(key)) {
|
||||
double memoryGB =
|
||||
Double.parseDouble(
|
||||
metrics.getGauges().get(key).getValue().toString())
|
||||
/ 1024;
|
||||
sb.append(memoryGB);
|
||||
} else {
|
||||
sb.append(-1);
|
||||
}
|
||||
}
|
||||
sb.append("}");
|
||||
}
|
||||
}
|
||||
String output = sb.toString();
|
||||
if (output.isEmpty()) {
|
||||
output = "[]";
|
||||
}
|
||||
response.getWriter().println(output);
|
||||
// package result
|
||||
((Request) request).setHandled(true);
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This file contains queue allocations for the Capacity Scheduler.
|
||||
Its format is explained in the Capacity Scheduler documentation at
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html.
|
||||
The documentation also includes a sample config file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.queues</name>
|
||||
<value>sls_queue_1,sls_queue_2,sls_queue_3</value>
|
||||
<description>The queues at the this level (root is the root queue).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.sls_queue_1.capacity</name>
|
||||
<value>25</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.sls_queue_1.maximum-capacity</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.sls_queue_2.capacity</name>
|
||||
<value>25</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.sls_queue_2.maximum-capacity</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.sls_queue_3.capacity</name>
|
||||
<value>50</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.root.sls_queue_3.maximum-capacity</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.maximum-applications</name>
|
||||
<value>1000</value>
|
||||
<description>Maximum number of applications in the system which
|
||||
can be concurrently active both running and pending</description>
|
||||
</property>
|
||||
</configuration>
|
@ -0,0 +1,50 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This file contains pool and user allocations for the Fair Scheduler.
|
||||
Its format is explained in the Fair Scheduler documentation at
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html
|
||||
The documentation also includes a sample config file.
|
||||
-->
|
||||
|
||||
<allocations>
|
||||
<user name="jenkins">
|
||||
<!-- Limit on running jobs for the user across all pools. If more
|
||||
jobs than this are submitted, only the first <maxRunningJobs> will
|
||||
be scheduled at any given time. Defaults to infinity or the
|
||||
userMaxJobsDefault value set below. -->
|
||||
<maxRunningJobs>1000</maxRunningJobs>
|
||||
</user>
|
||||
<userMaxAppsDefault>1000</userMaxAppsDefault>
|
||||
<queue name="sls_queue_1">
|
||||
<minResources>1024 mb, 1 vcores</minResources>
|
||||
<schedulingMode>fair</schedulingMode>
|
||||
<weight>0.25</weight>
|
||||
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||
</queue>
|
||||
<queue name="sls_queue_2">
|
||||
<minResources>1024 mb, 1 vcores</minResources>
|
||||
<schedulingMode>fair</schedulingMode>
|
||||
<weight>0.25</weight>
|
||||
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||
</queue>
|
||||
<queue name="sls_queue_3">
|
||||
<minResources>1024 mb, 1 vcores</minResources>
|
||||
<weight>0.5</weight>
|
||||
<schedulingMode>fair</schedulingMode>
|
||||
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||
</queue>
|
||||
</allocations>
|
@ -0,0 +1,47 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This file contains pool and user allocations for the Fair Scheduler.
|
||||
Its format is explained in the Fair Scheduler documentation at
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||
The documentation also includes a sample config file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<description>Absolute path to allocation file. An allocation file is an XML
|
||||
manifest describing queues and their properties, in addition to certain
|
||||
policy defaults. This file must be in XML format as described in
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||
</description>
|
||||
<name>yarn.scheduler.fair.allocation.file</name>
|
||||
<value>fair-scheduler-allocation.xml</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Whether to use preemption. Note that preemption is experimental
|
||||
in the current version. Defaults to false.</description>
|
||||
<name>yarn.scheduler.fair.preemption</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Whether to allow multiple container assignments in one
|
||||
heartbeat. Defaults to false.</description>
|
||||
<name>yarn.scheduler.fair.assignmultiple</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
@ -0,0 +1,19 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License. See accompanying LICENSE file.
|
||||
#
|
||||
log4j.appender.test=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.test.Target=System.out
|
||||
log4j.appender.test.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.test.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
|
||||
|
||||
log4j.logger=NONE, test
|
81
hadoop-tools/hadoop-sls/src/main/sample-conf/sls-runner.xml
Normal file
@ -0,0 +1,81 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
|
||||
<!-- SLSRunner configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.runner.pool.size</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
|
||||
<!-- Nodes configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.nm.memory.mb</name>
|
||||
<value>10240</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.nm.vcores</name>
|
||||
<value>10</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.nm.heartbeat.interval.ms</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
|
||||
<!-- Apps configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.am.heartbeat.interval.ms</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.am.type.mapreduce</name>
|
||||
<value>org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator</value>
|
||||
</property>
|
||||
|
||||
<!-- Containers configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.container.memory.mb</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.container.vcores</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
|
||||
<!-- metrics -->
|
||||
<property>
|
||||
<name>yarn.sls.metrics.switch</name>
|
||||
<value>ON</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.metrics.web.address.port</name>
|
||||
<value>10001</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</name>
|
||||
<value>org.apache.hadoop.yarn.sls.scheduler.FifoSchedulerMetrics</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</name>
|
||||
<value>org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</name>
|
||||
<value>org.apache.hadoop.yarn.sls.scheduler.CapacitySchedulerMetrics</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
60
hadoop-tools/hadoop-sls/src/main/sample-conf/yarn-site.xml
Normal file
@ -0,0 +1,60 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.scheduler.class</name>
|
||||
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
|
||||
<!-- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</value> -->
|
||||
<!-- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> -->
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the RM web application.</description>
|
||||
<name>yarn.resourcemanager.webapp.address</name>
|
||||
<value>localhost:18088</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.resourcemanager.resource-tracker.address</name>
|
||||
<value>localhost:18031</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the scheduler interface.</description>
|
||||
<name>yarn.resourcemanager.scheduler.address</name>
|
||||
<value>localhost:18030</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the applications manager interface in the RM.</description>
|
||||
<name>yarn.resourcemanager.address</name>
|
||||
<value>localhost:18032</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the RM admin interface.</description>
|
||||
<name>yarn.resourcemanager.admin.address</name>
|
||||
<value>localhost:18033</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Set to false, to avoid ip check</description>
|
||||
<name>hadoop.security.token.service.use_ip</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
@ -0,0 +1,440 @@
|
||||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License.
|
||||
|
||||
---
|
||||
Yarn Scheduler Load Simulator (SLS)
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
Yarn Scheduler Load Simulator (SLS)
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
||||
** Overview
|
||||
|
||||
The Yarn scheduler is a fertile area of interest with different
|
||||
implementations, e.g., Fifo, Capacity and Fair schedulers. Meanwhile, several
|
||||
optimizations are also made to improve scheduler performance for different
|
||||
scenarios and workload. Each scheduler algorithm has its own set of features,
|
||||
and drives scheduling decisions by many factors, such as fairness, capacity
|
||||
guarantee, resource availability, etc. It is very important to evaluate a
|
||||
scheduler algorithm very well before we deploy in a production cluster.
|
||||
Unfortunately, currently it is non-trivial to evaluate a scheduler algorithm.
|
||||
Evaluating in a real cluster is always time and cost consuming, and it is
|
||||
also very hard to find a large-enough cluster. Hence, a simulator which can
|
||||
predict how well a scheduler algorithm for some specific workload would be
|
||||
quite useful.
|
||||
|
||||
The Yarn Scheduler Load Simulator (SLS) is such a tool, which can simulate
|
||||
large-scale Yarn clusters and application loads in a single machine.This
|
||||
simulator would be invaluable in furthering Yarn by providing a tool for
|
||||
researchers and developers to prototype new scheduler features and predict
|
||||
their behavior and performance with reasonable amount of confidence,
|
||||
thereby aiding rapid innovation.
|
||||
|
||||
The simulator will exercise the real Yarn <<<ResourceManager>>> removing the
|
||||
network factor by simulating <<<NodeManagers>>> and <<<ApplicationMasters>>>
|
||||
via handling and dispatching <<<NM>>>/<<<AMs>>> heartbeat events from within
|
||||
the same JVM. To keep tracking of scheduler behavior and performance, a
|
||||
scheduler wrapper will wrap the real scheduler.
|
||||
|
||||
The size of the cluster and the application load can be loaded from
|
||||
configuration files, which are generated from job history files directly by
|
||||
adopting {{{https://hadoop.apache.org/docs/stable/rumen.html}Apache Rumen}}.
|
||||
|
||||
The simulator will produce real time metrics while executing, including:
|
||||
|
||||
* Resource usages for whole cluster and each queue, which can be utilized to
|
||||
configure cluster and queue's capacity.
|
||||
|
||||
* The detailed application execution trace (recorded in relation to simulated
|
||||
time), which can be analyzed to understand/validate the scheduler behavior
|
||||
(individual jobs turn around time, throughput, fairness, capacity guarantee,
|
||||
etc.).
|
||||
|
||||
* Several key metrics of scheduler algorithm, such as time cost of each
|
||||
scheduler operation (allocate, handle, etc.), which can be utilized by Hadoop
|
||||
developers to find the code spots and scalability limits.
|
||||
|
||||
** Goals
|
||||
|
||||
* Exercise the scheduler at scale without a real cluster using real job
|
||||
traces.
|
||||
|
||||
* Being able to simulate real workloads.
|
||||
|
||||
** Architecture
|
||||
|
||||
The following figure illustrates the implementation architecture of the
|
||||
simulator.
|
||||
|
||||
[images/sls_arch.png] The architecture of the simulator
|
||||
|
||||
The simulator takes input of workload traces, and fetches the cluster and
|
||||
applications information. For each NM and AM, the simulator builds a simulator
|
||||
to simulate their running. All NM/AM simulators run in a thread pool. The
|
||||
simulator reuses Yarn Resource Manager, and builds a wrapper out of the
|
||||
scheduler. The Scheduler Wrapper can track the scheduler behaviors and
|
||||
generates several logs, which are the outputs of the simulator and can be
|
||||
further analyzed.
|
||||
|
||||
** Usecases
|
||||
|
||||
* Engineering
|
||||
|
||||
* Verify correctness of scheduler algorithm under load
|
||||
|
||||
* Cheap/practical way for finding code hotspots/critical-path.
|
||||
|
||||
* Validate the impact of changes and new features.
|
||||
|
||||
* Determine what drives the scheduler scalability limits.
|
||||
|
||||
[]
|
||||
|
||||
* QA
|
||||
|
||||
* Validate scheduler behavior for "large" clusters and several workload
|
||||
profiles.
|
||||
|
||||
* Solutions/Sales.
|
||||
|
||||
* Sizing model for predefined/typical workloads.
|
||||
|
||||
* Cluster sizing tool using real customer data (job traces).
|
||||
|
||||
* Determine minimum SLAs under a particular workload.
|
||||
|
||||
* Usage
|
||||
|
||||
This section will show how to use the simulator. Here let <<<$HADOOP_ROOT>>>
|
||||
represent the Hadoop install directory. If you build Hadoop yourself,
|
||||
<<<$HADOOP_ROOT>>> is <<<hadoop-dist/target/hadoop-$VERSION>>>. The simulator
|
||||
is located at <<<$HADOOP_ROOT/share/hadoop/tools/sls>>>. The fold <<<sls>>>
|
||||
containers four directories: <<<bin>>>, <<<html>>>, <<<sample-conf>>>, and
|
||||
<<<sample-data>>>
|
||||
|
||||
* <<<bin>>>: contains running scripts for the simulator.
|
||||
|
||||
* <<<html>>>: contains several html/css/js files we needed for real-time
|
||||
tracking.
|
||||
|
||||
* <<<sample-conf>>>: specifies the simulator configurations.
|
||||
|
||||
* <<<sample-data>>>: provides an example rumen trace, which can be used to
|
||||
generate inputs of the simulator.
|
||||
|
||||
[]
|
||||
|
||||
The following sections will describe how to use the simulator step by step.
|
||||
Before start, make sure that command <<<hadoop>>> is included in your
|
||||
<<<$PATH>>> environment parameter.
|
||||
|
||||
** Step 1: Configure Hadoop and the simulator
|
||||
|
||||
Before we start, make sure Hadoop and the simulator are configured well.
|
||||
All configuration files for Hadoop and the simulator should be placed in
|
||||
directory <<<$HADOOP_ROOT/etc/hadoop>>>, where the <<<ResourceManager>>>
|
||||
and Yarn scheduler load their configurations. Directory
|
||||
<<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/>>> provides several
|
||||
example configurations, that can be used to start a demo.
|
||||
|
||||
For configuration of Hadoop and Yarn scheduler, users can refer to Yarn’s
|
||||
website ({{{http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/}
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/}}).
|
||||
|
||||
For the simulator, it loads configuration information from file
|
||||
<<<$HADOOP_ROOT/etc/hadoop/sls-runner.xml>>>.
|
||||
|
||||
Here we illustrate each configuration parameter in <<<sls-runner.xml>>>.
|
||||
Note that <<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-conf/sls-runner.xml>>>
|
||||
contains all the default values for these configuration parameters.
|
||||
|
||||
* <<<yarn.sls.runner.pool.size>>>
|
||||
|
||||
The simulator uses a thread pool to simulate the <<<NM>>> and <<<AM>>> running
|
||||
, and this parameter specifies the number of threads in the pool.
|
||||
|
||||
* <<<yarn.sls.nm.memory.mb>>>
|
||||
|
||||
The total memory for each <<<NMSimulator>>>.
|
||||
|
||||
* <<<yarn.sls.nm.vcores>>>
|
||||
|
||||
The total vCores for each <<<NMSimulator>>>.
|
||||
|
||||
* <<<yarn.sls.nm.heartbeat.interval.ms>>>
|
||||
|
||||
The heartbeat interval for each <<<NMSimulator>>>.
|
||||
|
||||
* <<<yarn.sls.am.heartbeat.interval.ms>>>
|
||||
|
||||
The heartbeat interval for each <<<AMSimulator>>>.
|
||||
|
||||
* <<<yarn.sls.am.type.mapreduce>>>
|
||||
|
||||
The <<<AMSimulator>>> implementation for MapReduce-like applications.
|
||||
Users can specify implementations for other type of applications.
|
||||
|
||||
* <<<yarn.sls.container.memory.mb>>>
|
||||
|
||||
The memory required for each container simulator.
|
||||
|
||||
* <<<yarn.sls.container.vcores>>>
|
||||
|
||||
The vCores required for each container simulator.
|
||||
|
||||
* <<<yarn.sls.runner.metrics.switch>>>
|
||||
|
||||
The simulator introduces {{{http://metrics.codahale.com/}Metrics}} to measure
|
||||
the behaviors of critical components and operations. This field specifies
|
||||
whether we open (<<<ON>>>) or close (<<<OFF>>>) the Metrics running.
|
||||
|
||||
* <<<yarn.sls.metrics.web.address.port>>>
|
||||
|
||||
The port used by simulator to provide real-time tracking. The default value is
|
||||
10001.
|
||||
|
||||
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler>>>
|
||||
|
||||
The implementation of scheduler metrics of Fifo Scheduler.
|
||||
|
||||
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler>>>
|
||||
|
||||
The implementation of scheduler metrics of Fair Scheduler.
|
||||
|
||||
* <<<org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler>>>
|
||||
|
||||
The implementation of scheduler metrics of Capacity Scheduler.
|
||||
|
||||
** Step 2: Run the simulator
|
||||
|
||||
The simulator supports two types of input files: the rumen traces and its own
|
||||
input traces. The script to start the simulator is <<<slsrun.sh>>>.
|
||||
|
||||
+----+
|
||||
$ $HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh
|
||||
--input-rumen|--input-sls=<TRACE_FILE1,TRACE_FILE2,...>
|
||||
--output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY> [--nodes=<SLS_NODES_FILE>]
|
||||
[--track-jobs=<JOBID1,JOBID2,...>] [--print-simulation]
|
||||
+----+
|
||||
|
||||
* <<<--input-rumen>>>: The input rumen trace files. Users can input multiple
|
||||
files, separated by comma. One example trace is provided in
|
||||
<<<$HADOOP_ROOT/share/hadoop/tools/sls/sample-data/2jobs2min-rumen-jh.json>>>.
|
||||
|
||||
* <<<--input-sls>>>: Simulator its own file format. The simulator also
|
||||
provides a tool to convert rumen traces to sls traces (<<<rumen2sls.sh>>>).
|
||||
Refer to appendix for an example of sls input json file.
|
||||
|
||||
* <<<--output-dir>>>: The output directory for generated running logs and
|
||||
metrics.
|
||||
|
||||
* <<<--nodes>>>: The cluster topology. By default, the simulator will use the
|
||||
topology fetched from the input json files. Users can specifies a new topology
|
||||
by setting this parameter. Refer to the appendix for the topology file format.
|
||||
|
||||
* <<<--track-jobs>>>: The particular jobs that will be tracked during
|
||||
simulator running, spearated by comma.
|
||||
|
||||
* <<<--print-simulation>>>: Whether to print out simulation information
|
||||
before simulator running, including number of nodes, applications, tasks,
|
||||
and information for each application.
|
||||
|
||||
In comparison to rumen format, here the sls format is much simpler and users
|
||||
can easily generate various workload. The simulator also provides a tool to
|
||||
convert rumen traces to sls traces.
|
||||
|
||||
+----+
|
||||
$ $HADOOP_ROOT/share/hadoop/tools/sls/bin/rumen2sls.sh
|
||||
--rumen-file=<RUMEN_FILE>
|
||||
--output-dir=<SLS_OUTPUT_DIRECTORY>
|
||||
[--output-prefix=<SLS_FILE_PREFIX>]
|
||||
+----+
|
||||
|
||||
* <<<--rumen-file>>>: The rumen format file. One example trace is provided
|
||||
in directory <<<sample-data>>>.
|
||||
|
||||
* <<<--output-dir>>>: The output directory of generated simulation traces.
|
||||
Two files will be generated in this output directory, including one trace
|
||||
file including all job and task information, and another file showing the
|
||||
topology information.
|
||||
|
||||
* <<<--output-prefix>>>: The prefix of the generated files. The default value
|
||||
is ”sls”, and the two generated files are <<<sls-jobs.json>>> and
|
||||
<<<sls-nodes.json>>>.
|
||||
|
||||
* Metrics
|
||||
|
||||
The Yarn Scheduler Load Simulator has integrated
|
||||
{{{http://metrics.codahale.com/}Metrics}} to measure the behaviors of critical
|
||||
components and operations, including running applications and containers,
|
||||
cluster available resources, scheduler operation timecost, et al. If the
|
||||
switch <<<yarn.sls.runner.metrics.switch>>> is set <<<ON>>>, <<<Metrics>>>
|
||||
will run and output it logs in <<<--output-dir>>> directory specified by users.
|
||||
Users can track these information during simulator running, and can also
|
||||
analyze these logs after running to evaluate the scheduler performance.
|
||||
|
||||
** Real-time Tracking
|
||||
|
||||
The simulator provides an interface for tracking its running in real-time.
|
||||
Users can go to <<<http://host:port/simulate>>> to track whole running,
|
||||
and <<<http://host:port/track>>> to track a particular job or queue. Here
|
||||
the <<<host>>> is the place when we run the simulator, and <<<port>>> is
|
||||
the value configured by <<<yarn.sls.metrics.web.address.port>>> (default value
|
||||
is 10001).
|
||||
|
||||
Here we'll illustrate each chart shown in the webpage.
|
||||
|
||||
The first figure describes the number of running applications and containers.
|
||||
|
||||
[images/sls_running_apps_containers.png] Number of running applications/containers
|
||||
|
||||
The second figure describes the allocated and available resources (memory)
|
||||
in the cluster.
|
||||
|
||||
[images/sls_cluster_memory.png] Cluster Resource (Memory)
|
||||
|
||||
The third figure describes the allocated resource for each queue. Here we have
|
||||
three queues: sls_queue_1, sls_queue_2, and sls_queue_3.The first two queues
|
||||
are configured with 25% share, while the last one has 50% share.
|
||||
|
||||
[images/sls_queue_allocated_memory.png] Queue Allocated Resource (Memory)
|
||||
|
||||
The fourth figure describes the timecost for each scheduler operation.
|
||||
|
||||
[images/sls_scheduler_operation_timecost.png] Scheduler Opertion Timecost
|
||||
|
||||
Finally, we measure the memory used by the simulator.
|
||||
|
||||
[images/sls_JVM.png] JVM Memory
|
||||
|
||||
The simulator also provides an interface for tracking some particular
|
||||
jobs and queues. Go to <<<http://<Host>:<Port>/track>>> to get these
|
||||
information.
|
||||
|
||||
Here the first figure illustrates the resource usage information for queue
|
||||
<<<SLS_Queue_1>>>.
|
||||
|
||||
[images/sls_track_queue.png] Tracking Queue <<<sls_queue_3>>>
|
||||
|
||||
The second figure illustrates the resource usage information for job
|
||||
<<<job_1369942127770_0653>>>.
|
||||
|
||||
[images/sls_track_job.png] Tracking Job <<<job_1369942127770_0653>>>
|
||||
|
||||
** Offline Analysis
|
||||
|
||||
After the simulator finishes, all logs are saved in the output directory
|
||||
specified by <<<--output-dir>>> in
|
||||
<<<$HADOOP_ROOT/share/hadoop/tools/sls/bin/slsrun.sh>>>.
|
||||
|
||||
* File <<<realtimetrack.json>>>: records all real-time tracking logs every 1
|
||||
second.
|
||||
|
||||
* File <<<jobruntime.csv>>>: records all jobs’ start and end time in the
|
||||
simulator.
|
||||
|
||||
* Folder <<<metrics>>>: logs generated by the Metrics.
|
||||
|
||||
[]
|
||||
|
||||
Users can also reproduce those real-time tracking charts in offline mode.
|
||||
Just upload the <<<realtimetrack.json>>> to
|
||||
<<<$HADOOP_ROOT/share/hadoop/tools/sls/html/showSimulationTrace.html>>>.
|
||||
For browser security problem, need to put files <<<realtimetrack.json>>> and
|
||||
<<<showSimulationTrace.html>>> in the same directory.
|
||||
|
||||
* Appendix
|
||||
|
||||
** Resources
|
||||
|
||||
{{{https://issues.apache.org/jira/browse/YARN-1021}YARN-1021}} is the main
|
||||
JIRA that introduces Yarn Scheduler Load Simulator to Hadoop Yarn project.
|
||||
|
||||
** SLS JSON input file format
|
||||
|
||||
Here we provide an example format of the sls json file, which contains 2 jobs.
|
||||
The first job has 3 map tasks and the second one has 2 map tasks.
|
||||
|
||||
+----+
|
||||
{
|
||||
"am.type" : "mapreduce",
|
||||
"job.start.ms" : 0,
|
||||
"job.end.ms" : 95375,
|
||||
"job.queue.name" : "sls_queue_1",
|
||||
"job.id" : "job_1",
|
||||
"job.user" : "default",
|
||||
"job.tasks" : [ {
|
||||
"container.host" : "/default-rack/node1",
|
||||
"container.start.ms" : 6664,
|
||||
"container.end.ms" : 23707,
|
||||
"container.priority" : 20,
|
||||
"container.type" : "map"
|
||||
}, {
|
||||
"container.host" : "/default-rack/node3",
|
||||
"container.start.ms" : 6665,
|
||||
"container.end.ms" : 21593,
|
||||
"container.priority" : 20,
|
||||
"container.type" : "map"
|
||||
}, {
|
||||
"container.host" : "/default-rack/node2",
|
||||
"container.start.ms" : 68770,
|
||||
"container.end.ms" : 86613,
|
||||
"container.priority" : 20,
|
||||
"container.type" : "map"
|
||||
} ]
|
||||
}
|
||||
{
|
||||
"am.type" : "mapreduce",
|
||||
"job.start.ms" : 105204,
|
||||
"job.end.ms" : 197256,
|
||||
"job.queue.name" : "sls_queue_2",
|
||||
"job.id" : "job_2",
|
||||
"job.user" : "default",
|
||||
"job.tasks" : [ {
|
||||
"container.host" : "/default-rack/node1",
|
||||
"container.start.ms" : 111822,
|
||||
"container.end.ms" : 133985,
|
||||
"container.priority" : 20,
|
||||
"container.type" : "map"
|
||||
}, {
|
||||
"container.host" : "/default-rack/node2",
|
||||
"container.start.ms" : 111788,
|
||||
"container.end.ms" : 131377,
|
||||
"container.priority" : 20,
|
||||
"container.type" : "map"
|
||||
} ]
|
||||
}
|
||||
+----+
|
||||
|
||||
** Simulator input topology file format
|
||||
|
||||
Here is an example input topology file which has 3 nodes organized in 1 rack.
|
||||
|
||||
+----+
|
||||
{
|
||||
"rack" : "default-rack",
|
||||
"nodes" : [ {
|
||||
"node" : "node1"
|
||||
}, {
|
||||
"node" : "node2"
|
||||
}, {
|
||||
"node" : "node3"
|
||||
}]
|
||||
}
|
||||
+----+
|
30
hadoop-tools/hadoop-sls/src/site/resources/css/site.css
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#banner {
|
||||
height: 93px;
|
||||
background: none;
|
||||
}
|
||||
|
||||
#bannerLeft img {
|
||||
margin-left: 30px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
#bannerRight img {
|
||||
margin: 17px;
|
||||
}
|
||||
|
BIN
hadoop-tools/hadoop-sls/src/site/resources/images/sls_JVM.png
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
hadoop-tools/hadoop-sls/src/site/resources/images/sls_arch.png
Normal file
After Width: | Height: | Size: 45 KiB |
After Width: | Height: | Size: 81 KiB |
After Width: | Height: | Size: 83 KiB |
After Width: | Height: | Size: 78 KiB |
After Width: | Height: | Size: 77 KiB |
After Width: | Height: | Size: 67 KiB |
After Width: | Height: | Size: 110 KiB |
After Width: | Height: | Size: 58 KiB |
After Width: | Height: | Size: 54 KiB |
@ -0,0 +1,46 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.UUID;
|
||||
|
||||
public class TestSLSRunner {
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("all")
|
||||
public void testSimulatorRunning() throws Exception {
|
||||
File tempDir = new File("target", UUID.randomUUID().toString());
|
||||
|
||||
// start the simulator
|
||||
File slsOutputDir = new File(tempDir.getAbsolutePath() + "/slsoutput/");
|
||||
String args[] = new String[]{
|
||||
"-inputrumen", "src/main/data/2jobs2min-rumen-jh.json",
|
||||
"-output", slsOutputDir.getAbsolutePath()};
|
||||
SLSRunner.main(args);
|
||||
|
||||
// wait for 45 seconds before stop
|
||||
Thread.sleep(45 * 1000);
|
||||
SLSRunner.getRunner().stop();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,247 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.sls.scheduler;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class TestTaskRunner {
|
||||
private TaskRunner runner;
|
||||
|
||||
@Before
|
||||
public void setUp() {
|
||||
runner = new TaskRunner();
|
||||
runner.setQueueSize(5);
|
||||
}
|
||||
|
||||
@After
|
||||
public void cleanUp() {
|
||||
runner.stop();
|
||||
}
|
||||
|
||||
public static class SingleTask extends TaskRunner.Task {
|
||||
public static CountDownLatch latch = new CountDownLatch(1);
|
||||
public static boolean first;
|
||||
|
||||
public SingleTask(long startTime) {
|
||||
super.init(startTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep() {
|
||||
if (first) {
|
||||
Assert.fail();
|
||||
}
|
||||
first = true;
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep() {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
Assert.fail();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleTask() throws Exception {
|
||||
runner.start();
|
||||
runner.schedule(new SingleTask(0));
|
||||
SingleTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||
Assert.assertTrue(SingleTask.first);
|
||||
}
|
||||
|
||||
public static class DualTask extends TaskRunner.Task {
|
||||
public static CountDownLatch latch = new CountDownLatch(1);
|
||||
public static boolean first;
|
||||
public static boolean last;
|
||||
|
||||
public DualTask(long startTime, long endTime, long interval) {
|
||||
super.init(startTime, endTime, interval);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep() {
|
||||
if (first) {
|
||||
Assert.fail();
|
||||
}
|
||||
first = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep() {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
if (last) {
|
||||
Assert.fail();
|
||||
}
|
||||
last = true;
|
||||
latch.countDown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDualTask() throws Exception {
|
||||
runner.start();
|
||||
runner.schedule(new DualTask(0, 10, 10));
|
||||
DualTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||
Assert.assertTrue(DualTask.first);
|
||||
Assert.assertTrue(DualTask.last);
|
||||
}
|
||||
|
||||
public static class TriTask extends TaskRunner.Task {
|
||||
public static CountDownLatch latch = new CountDownLatch(1);
|
||||
public static boolean first;
|
||||
public static boolean middle;
|
||||
public static boolean last;
|
||||
|
||||
public TriTask(long startTime, long endTime, long interval) {
|
||||
super.init(startTime, endTime, interval);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep() {
|
||||
if (first) {
|
||||
Assert.fail();
|
||||
}
|
||||
first = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep() {
|
||||
if (middle) {
|
||||
Assert.fail();
|
||||
}
|
||||
middle = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
if (last) {
|
||||
Assert.fail();
|
||||
}
|
||||
last = true;
|
||||
latch.countDown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTriTask() throws Exception {
|
||||
runner.start();
|
||||
runner.schedule(new TriTask(0, 10, 5));
|
||||
TriTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||
Assert.assertTrue(TriTask.first);
|
||||
Assert.assertTrue(TriTask.middle);
|
||||
Assert.assertTrue(TriTask.last);
|
||||
}
|
||||
|
||||
public static class MultiTask extends TaskRunner.Task {
|
||||
public static CountDownLatch latch = new CountDownLatch(1);
|
||||
public static boolean first;
|
||||
public static int middle;
|
||||
public static boolean last;
|
||||
|
||||
public MultiTask(long startTime, long endTime, long interval) {
|
||||
super.init(startTime, endTime, interval);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep() {
|
||||
if (first) {
|
||||
Assert.fail();
|
||||
}
|
||||
first = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep() {
|
||||
middle++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
if (last) {
|
||||
Assert.fail();
|
||||
}
|
||||
last = true;
|
||||
latch.countDown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiTask() throws Exception {
|
||||
runner.start();
|
||||
runner.schedule(new MultiTask(0, 20, 5));
|
||||
MultiTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||
Assert.assertTrue(MultiTask.first);
|
||||
Assert.assertEquals((20 - 0) / 5 - 2 + 1, MultiTask.middle);
|
||||
Assert.assertTrue(MultiTask.last);
|
||||
}
|
||||
|
||||
|
||||
public static class PreStartTask extends TaskRunner.Task {
|
||||
public static CountDownLatch latch = new CountDownLatch(1);
|
||||
public static boolean first;
|
||||
|
||||
public PreStartTask(long startTime) {
|
||||
super.init(startTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void firstStep() {
|
||||
if (first) {
|
||||
Assert.fail();
|
||||
}
|
||||
first = true;
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void middleStep() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lastStep() {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPreStartQueueing() throws Exception {
|
||||
runner.schedule(new PreStartTask(210));
|
||||
Thread.sleep(210);
|
||||
runner.start();
|
||||
long startedAt = System.currentTimeMillis();
|
||||
PreStartTask.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||
long runAt = System.currentTimeMillis();
|
||||
Assert.assertTrue(PreStartTask.first);
|
||||
Assert.assertTrue(runAt - startedAt >= 200);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.utils;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestSLSUtils {
|
||||
|
||||
@Test
|
||||
public void testGetRackHostname() {
|
||||
String str = "/rack1/node1";
|
||||
String rackHostname[] = SLSUtils.getRackHostName(str);
|
||||
Assert.assertEquals(rackHostname[0], "rack1");
|
||||
Assert.assertEquals(rackHostname[1], "node1");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,121 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.sls.web;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.yarn.sls.SLSRunner;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestSLSWebApp {
|
||||
|
||||
@Test
|
||||
public void testSimulateInfoPageHtmlTemplate() throws Exception {
|
||||
String simulateInfoTemplate = FileUtils.readFileToString(
|
||||
new File("src/main/html/simulate.info.html.template"));
|
||||
|
||||
SLSRunner.simulateInfoMap.put("Number of racks", 10);
|
||||
SLSRunner.simulateInfoMap.put("Number of nodes", 100);
|
||||
SLSRunner.simulateInfoMap.put("Node memory (MB)", 1024);
|
||||
SLSRunner.simulateInfoMap.put("Node VCores", 1);
|
||||
SLSRunner.simulateInfoMap.put("Number of applications", 100);
|
||||
SLSRunner.simulateInfoMap.put("Number of tasks", 1000);
|
||||
SLSRunner.simulateInfoMap.put("Average tasks per applicaion", 10);
|
||||
SLSRunner.simulateInfoMap.put("Number of queues", 4);
|
||||
SLSRunner.simulateInfoMap.put("Average applications per queue", 25);
|
||||
SLSRunner.simulateInfoMap.put("Estimated simulate time (s)", 10000);
|
||||
|
||||
StringBuilder info = new StringBuilder();
|
||||
for (Map.Entry<String, Object> entry :
|
||||
SLSRunner.simulateInfoMap.entrySet()) {
|
||||
info.append("<tr>");
|
||||
info.append("<td class='td1'>" + entry.getKey() + "</td>");
|
||||
info.append("<td class='td2'>" + entry.getValue() + "</td>");
|
||||
info.append("</tr>");
|
||||
}
|
||||
|
||||
String simulateInfo =
|
||||
MessageFormat.format(simulateInfoTemplate, info.toString());
|
||||
Assert.assertTrue("The simulate info html page should not be empty",
|
||||
simulateInfo.length() > 0);
|
||||
for (Map.Entry<String, Object> entry :
|
||||
SLSRunner.simulateInfoMap.entrySet()) {
|
||||
Assert.assertTrue("The simulate info html page should have information "
|
||||
+ "of " + entry.getKey(), simulateInfo.contains("<td class='td1'>"
|
||||
+ entry.getKey() + "</td><td class='td2'>"
|
||||
+ entry.getValue() + "</td>"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimulatePageHtmlTemplate() throws Exception {
|
||||
String simulateTemplate = FileUtils.readFileToString(
|
||||
new File("src/main/html/simulate.html.template"));
|
||||
|
||||
Set<String> queues = new HashSet<String>();
|
||||
queues.add("sls_queue_1");
|
||||
queues.add("sls_queue_2");
|
||||
queues.add("sls_queue_3");
|
||||
String queueInfo = "";
|
||||
int i = 0;
|
||||
for (String queue : queues) {
|
||||
queueInfo += "legends[4][" + i + "] = 'queue" + queue
|
||||
+ ".allocated.memory'";
|
||||
queueInfo += "legends[5][" + i + "] = 'queue" + queue
|
||||
+ ".allocated.vcores'";
|
||||
i ++;
|
||||
}
|
||||
String simulateInfo = MessageFormat.format(simulateTemplate,
|
||||
queueInfo, "s", 1000, 1000);
|
||||
Assert.assertTrue("The simulate page html page should not be empty",
|
||||
simulateInfo.length() > 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrackPageHtmlTemplate() throws Exception {
|
||||
String trackTemplate = FileUtils.readFileToString(
|
||||
new File("src/main/html/track.html.template"));
|
||||
String trackedQueueInfo = "";
|
||||
Set<String> trackedQueues = new HashSet<String>();
|
||||
trackedQueues.add("sls_queue_1");
|
||||
trackedQueues.add("sls_queue_2");
|
||||
trackedQueues.add("sls_queue_3");
|
||||
for(String queue : trackedQueues) {
|
||||
trackedQueueInfo += "<option value='Queue " + queue + "'>"
|
||||
+ queue + "</option>";
|
||||
}
|
||||
String trackedAppInfo = "";
|
||||
Set<String> trackedApps = new HashSet<String>();
|
||||
trackedApps.add("app_1");
|
||||
trackedApps.add("app_2");
|
||||
for(String job : trackedApps) {
|
||||
trackedAppInfo += "<option value='Job " + job + "'>" + job + "</option>";
|
||||
}
|
||||
String trackInfo = MessageFormat.format(trackTemplate, trackedQueueInfo,
|
||||
trackedAppInfo, "s", 1000, 1000);
|
||||
Assert.assertTrue("The queue/app tracking html page should not be empty",
|
||||
trackInfo.length() > 0);
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This file contains pool and user allocations for the Fair Scheduler.
|
||||
Its format is explained in the Fair Scheduler documentation at
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html
|
||||
The documentation also includes a sample config file.
|
||||
-->
|
||||
|
||||
<allocations>
|
||||
<user name="jenkins">
|
||||
<!-- Limit on running jobs for the user across all pools. If more
|
||||
jobs than this are submitted, only the first <maxRunningJobs> will
|
||||
be scheduled at any given time. Defaults to infinity or the
|
||||
userMaxJobsDefault value set below. -->
|
||||
<maxRunningJobs>1000</maxRunningJobs>
|
||||
</user>
|
||||
<userMaxAppsDefault>1000</userMaxAppsDefault>
|
||||
<queue name="sls_queue_1">
|
||||
<minResources>1024 mb, 1 vcores</minResources>
|
||||
<schedulingMode>fair</schedulingMode>
|
||||
<weight>0.25</weight>
|
||||
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||
</queue>
|
||||
<queue name="sls_queue_2">
|
||||
<minResources>1024 mb, 1 vcores</minResources>
|
||||
<schedulingMode>fair</schedulingMode>
|
||||
<weight>0.25</weight>
|
||||
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||
</queue>
|
||||
<queue name="sls_queue_3">
|
||||
<minResources>1024 mb, 1 vcores</minResources>
|
||||
<weight>0.5</weight>
|
||||
<schedulingMode>fair</schedulingMode>
|
||||
<minSharePreemptionTimeout>2</minSharePreemptionTimeout>
|
||||
</queue>
|
||||
</allocations>
|
@ -0,0 +1,47 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This file contains pool and user allocations for the Fair Scheduler.
|
||||
Its format is explained in the Fair Scheduler documentation at
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||
The documentation also includes a sample config file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<description>Absolute path to allocation file. An allocation file is an XML
|
||||
manifest describing queues and their properties, in addition to certain
|
||||
policy defaults. This file must be in XML format as described in
|
||||
http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/FairScheduler.html.
|
||||
</description>
|
||||
<name>yarn.scheduler.fair.allocation.file</name>
|
||||
<value>src/test/resources/fair-scheduler-allocation.xml</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Whether to use preemption. Note that preemption is experimental
|
||||
in the current version. Defaults to false.</description>
|
||||
<name>yarn.scheduler.fair.preemption</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Whether to allow multiple container assignments in one
|
||||
heartbeat. Defaults to false.</description>
|
||||
<name>yarn.scheduler.fair.assignmultiple</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
@ -0,0 +1,278 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||
<style type="text/css">
|
||||
body '{' font: 20px sans-serif; '}'
|
||||
.axis path,
|
||||
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
|
||||
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||
.legend '{'
|
||||
padding: 5px;
|
||||
font: 18px sans-serif;
|
||||
background: yellow;
|
||||
box-shadow: 2px 2px 1px #888;
|
||||
'}'
|
||||
.title '{' font: 24px sans-serif; '}'
|
||||
.divborder '{'
|
||||
border-width: 1px;
|
||||
border-style: solid;
|
||||
border-color: black;
|
||||
margin-top:10px
|
||||
'}'
|
||||
</style>
|
||||
<script src="js/thirdparty/d3.v3.js"></script>
|
||||
<script src="js/thirdparty/jquery.js"></script>
|
||||
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="row">
|
||||
<div class="span10 offset2"><br>
|
||||
<input type="button" style="float: right;" value="Stop"
|
||||
onClick="stop()" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area1"></div>
|
||||
<div class="divborder span8" id="area2"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area3"></div>
|
||||
<div class="divborder span8" id="area4"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area5"></div>
|
||||
<div class="divborder span8" id="area6"></div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span8" style="margin-left:50px" id="area7"></div>
|
||||
<div class="span8" id="area8"></div>
|
||||
</div><br/><br/>
|
||||
|
||||
<script>
|
||||
var basetime = 0;
|
||||
var running = 1;
|
||||
var data = [];
|
||||
var width, height;
|
||||
var legends = [];
|
||||
var titles = [];
|
||||
var yLabels = [];
|
||||
var isAreas = [];
|
||||
var svgs = [];
|
||||
var xs = [];
|
||||
var ys = [];
|
||||
var xAxiss = [];
|
||||
var yAxiss = [];
|
||||
var lineAreas = [];
|
||||
var stacks = [];
|
||||
|
||||
// legends
|
||||
legends[0] = [''running.applications'', ''running.containers''];
|
||||
legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
|
||||
legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
|
||||
legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
|
||||
legends[4] = [];
|
||||
legends[5] = [];
|
||||
{0}
|
||||
legends[6] = [''scheduler.allocate.timecost'',
|
||||
''scheduler.handle-NODE_ADDED.timecost'',
|
||||
''scheduler.handle-NODE_REMOVED.timecost'',
|
||||
''scheduler.handle-NODE_UPDATE.timecost'',
|
||||
''scheduler.handle-APP_ADDED.timecost'',
|
||||
''scheduler.handle-APP_REMOVED.timecost'',
|
||||
''scheduler.handle-CONTAINER_EXPIRED.timecost''];
|
||||
|
||||
// title
|
||||
titles[0] = ''Cluster running applications & containers'';
|
||||
titles[1] = ''JVM memory'';
|
||||
titles[2] = ''Cluster allocated & available memory'';
|
||||
titles[3] = ''Cluster allocated & available vcores'';
|
||||
titles[4] = ''Queue allocated memory'';
|
||||
titles[5] = ''Queue allocated vcores'';
|
||||
titles[6] = ''Scheduler allocate & handle operation timecost'';
|
||||
|
||||
// ylabels
|
||||
yLabels[0] = ''Number'';
|
||||
yLabels[1] = ''Memory (GB)'';
|
||||
yLabels[2] = ''Memory (GB)'';
|
||||
yLabels[3] = ''Number'';
|
||||
yLabels[4] = ''Memory (GB)'';
|
||||
yLabels[5] = ''Number'';
|
||||
yLabels[6] = ''Timecost (ms)'';
|
||||
|
||||
// is area?
|
||||
isAreas = [0, 0, 0, 0, 1, 1, 0];
|
||||
|
||||
// draw all charts
|
||||
for (var i = 0; i < 7; i ++) '{'
|
||||
drawEachChart(i);
|
||||
'}'
|
||||
|
||||
// draw each chart
|
||||
function drawEachChart(index) '{'
|
||||
var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
|
||||
width = 750 - margin.left - margin.right;
|
||||
height = 420 - margin.top - margin.bottom;
|
||||
|
||||
xs[index] = d3.scale.linear().range([0, width]);
|
||||
ys[index] = d3.scale.linear().range([height, 0]);
|
||||
xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
|
||||
yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
|
||||
|
||||
if (isAreas[index] == 1)'{'
|
||||
lineAreas[index] = d3.svg.area()
|
||||
.x(function(d) '{' return xs[index](d.time); '}')
|
||||
.y0(function(d) '{' return ys[index](d.y0); '}')
|
||||
.y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
|
||||
|
||||
stacks[index] = d3.layout.stack()
|
||||
.values(function(d) '{' return d.values; '}');
|
||||
'}' else '{'
|
||||
lineAreas[index] = d3.svg.line()
|
||||
.interpolate(''basis'')
|
||||
.x(function(d) '{' return xs[index](d.time); '}')
|
||||
.y(function(d) '{' return ys[index](d.value); '}');
|
||||
'}'
|
||||
|
||||
svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
|
||||
.attr(''width'', width + margin.left + margin.right)
|
||||
.attr(''height'', height + margin.top + margin.bottom)
|
||||
.append(''g'')
|
||||
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||
|
||||
// x, y and title
|
||||
svgs[index].append(''text'')
|
||||
.attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
|
||||
(height + margin.bottom - 10 ) + '')'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(''Time ({1})'');
|
||||
|
||||
svgs[index].append(''text'')
|
||||
.attr(''transform'', ''rotate(-90)'')
|
||||
.attr(''y'', 0 - margin.left)
|
||||
.attr(''x'',0 - (height / 2))
|
||||
.attr(''dy'', ''1em'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(yLabels[index]);
|
||||
|
||||
svgs[index].append(''text'')
|
||||
.attr(''x'', (width / 2))
|
||||
.attr(''y'', 10 - (margin.top / 2))
|
||||
.attr(''text-anchor'', ''middle'')
|
||||
.text(titles[index]);
|
||||
'}'
|
||||
|
||||
// request data
|
||||
function requestData() '{'
|
||||
$.ajax('{'url: ''simulateMetrics'',
|
||||
success: function(point) '{'
|
||||
// update data
|
||||
if (basetime == 0) basetime = point.time;
|
||||
point.time = (point.time - basetime) / {2};
|
||||
data.push(point);
|
||||
|
||||
// clear old
|
||||
for (var i = 0; i < 7; i ++) '{'
|
||||
svgs[i].selectAll(''g.tick'').remove();
|
||||
svgs[i].selectAll(''g'').remove();
|
||||
var color = d3.scale.category10();
|
||||
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||
return $.inArray(key, legends[i]) !== -1;
|
||||
'}'));
|
||||
|
||||
var values;
|
||||
if (isAreas[i] == 1) '{'
|
||||
values = stacks[i](color.domain().map(function(name) '{'
|
||||
return '{'
|
||||
name: name,
|
||||
values: data.map(function(d) '{'
|
||||
return '{'time: d.time, y: d[name]'}';
|
||||
'}')
|
||||
'}'
|
||||
'}'));
|
||||
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||
ys[i].domain([
|
||||
d3.min(values, function(c) '{' return 0; '}'),
|
||||
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||
function(v) '{' return v.y + v.y0; '}'); '}')
|
||||
]);
|
||||
'}' else '{'
|
||||
values = color.domain().map(function(name) '{'
|
||||
return '{'
|
||||
name: name,
|
||||
values: data.map(function(d) '{'
|
||||
return '{'time: d.time, value: d[name]'}';
|
||||
'}')
|
||||
'}'
|
||||
'}');
|
||||
xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
|
||||
ys[i].domain([
|
||||
d3.min(values, function(c) '{' return 0; '}'),
|
||||
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
|
||||
function(v) '{' return v.value; '}'); '}')
|
||||
]);
|
||||
'}'
|
||||
|
||||
svgs[i].append(''g'').attr(''class'', ''x axis'')
|
||||
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
|
||||
|
||||
svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
|
||||
|
||||
var value = svgs[i].selectAll(''.path'')
|
||||
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||
|
||||
if(isAreas[i] == 1) '{'
|
||||
value.append(''path'').attr(''class'', ''area'')
|
||||
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||
.style(''fill'', function(d) '{'return color(d.name); '}');
|
||||
'}' else '{'
|
||||
value.append(''path'').attr(''class'', ''line'')
|
||||
.attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
|
||||
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||
'}'
|
||||
|
||||
// legend
|
||||
var legend = svgs[i].append(''g'')
|
||||
.attr(''class'', ''legend'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', 25)
|
||||
.attr(''height'', 120)
|
||||
.attr(''width'', 140);
|
||||
legend.selectAll(''g'').data(legends[i])
|
||||
.enter()
|
||||
.append(''g'')
|
||||
.each(function(d, i) '{'
|
||||
var g = d3.select(this);
|
||||
g.append(''rect'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', i*20)
|
||||
.attr(''width'', 10)
|
||||
.attr(''height'', 10)
|
||||
.style(''fill'', color(d));
|
||||
g.append(''text'')
|
||||
.attr(''x'', width + 25)
|
||||
.attr(''y'', i * 20 + 8)
|
||||
.attr(''height'',30)
|
||||
.attr(''width'',250)
|
||||
.style(''fill'', color(d))
|
||||
.text(d);
|
||||
'}');
|
||||
'}'
|
||||
|
||||
if(running == 1)
|
||||
setTimeout(requestData, {3});
|
||||
'}',
|
||||
cache: false
|
||||
'}');
|
||||
'}'
|
||||
|
||||
// stop
|
||||
function stop() '{'
|
||||
running = 0;
|
||||
'}'
|
||||
requestData();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,50 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style type="text/css">
|
||||
.td1 '{'
|
||||
border-width: 1px;
|
||||
padding: 8px;
|
||||
border-style: solid;
|
||||
border-color: #666666;
|
||||
background-color: #dedede;
|
||||
width: 50%;
|
||||
'}'
|
||||
table.gridtable '{'
|
||||
font-family: verdana,arial,sans-serif;
|
||||
font-size:11px;
|
||||
color:#333333;
|
||||
border-width: 1px;
|
||||
border-color: #666666;
|
||||
border-collapse: collapse;
|
||||
margin-top: 80px;
|
||||
'}'
|
||||
.td2 '{'
|
||||
border-width: 1px;
|
||||
padding: 8px;
|
||||
border-style: solid;
|
||||
border-color: #666666;
|
||||
background-color: #ffffff;
|
||||
width: 50%;
|
||||
'}'
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<table class="gridtable" align="center" width="400px">
|
||||
<tr>
|
||||
<td colspan="2" class="td2" align="center">
|
||||
<b>SLS Simulate Information</b>
|
||||
</td>
|
||||
</tr>
|
||||
{0}
|
||||
<tr>
|
||||
<td align="center" height="80px">
|
||||
<a href="simulate">Simulation Charts</a>
|
||||
</td>
|
||||
<td align="center">
|
||||
<a href="track">Tracked Jobs & Queues</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
81
hadoop-tools/hadoop-sls/src/test/resources/sls-runner.xml
Normal file
@ -0,0 +1,81 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
|
||||
<!-- SLSRunner configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.runner.pool.size</name>
|
||||
<value>100</value>
|
||||
</property>
|
||||
|
||||
<!-- Nodes configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.nm.memory.mb</name>
|
||||
<value>10240</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.nm.vcores</name>
|
||||
<value>10</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.nm.heartbeat.interval.ms</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
|
||||
<!-- Apps configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.am.heartbeat.interval.ms</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.am.type.mapreduce</name>
|
||||
<value>org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator</value>
|
||||
</property>
|
||||
|
||||
<!-- Containers configuration -->
|
||||
<property>
|
||||
<name>yarn.sls.container.memory.mb</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.container.vcores</name>
|
||||
<value>1</value>
|
||||
</property>
|
||||
|
||||
<!-- metrics -->
|
||||
<property>
|
||||
<name>yarn.sls.metrics.switch</name>
|
||||
<value>ON</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.sls.metrics.web.address.port</name>
|
||||
<value>10001</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler</name>
|
||||
<value>org.apache.hadoop.yarn.sls.scheduler.FifoSchedulerMetrics</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</name>
|
||||
<value>org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</name>
|
||||
<value>org.apache.hadoop.yarn.sls.scheduler.CapacitySchedulerMetrics</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
193
hadoop-tools/hadoop-sls/src/test/resources/track.html.template
Normal file
@ -0,0 +1,193 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
|
||||
<link rel="stylesheet" href="css/bootstrap-responsive.min.css">
|
||||
<style type="text/css">
|
||||
body '{' font: 20px sans-serif;'}'
|
||||
.axis path,
|
||||
.axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
|
||||
.axis text '{' font-family: sans-serif; font-size: 20px; '}'
|
||||
.line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
|
||||
.legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
|
||||
box-shadow: 2px 2px 1px #888;'}'
|
||||
.title '{' font: 24px sans-serif; '}'
|
||||
.divborder '{' border-width: 1px; border-style: solid; border-color: black;
|
||||
margin-top:10px '}'
|
||||
</style>
|
||||
<script src="js/thirdparty/d3.v3.js"></script>
|
||||
<script src="js/thirdparty/jquery.js"></script>
|
||||
<script src="js/thirdparty/bootstrap.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="row">
|
||||
<div class="offset4 span8"><br/><br/><br/>
|
||||
Select Tracked Job/Queue:
|
||||
<select id="trackedSelect" onchange="redrawChart()">
|
||||
<option>----Queue----</option>
|
||||
{0}
|
||||
<option>----Job----</option>
|
||||
{1}
|
||||
</select>
|
||||
<input type="button" style="float: right;" value="Stop"
|
||||
onClick="stop()" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="divborder span9 offset4" id="area1"></div>
|
||||
</div>
|
||||
<script>
|
||||
// global variables
|
||||
var basetime = 0;
|
||||
var running = 1;
|
||||
var para = '''';
|
||||
var data = [];
|
||||
var path, line, svg;
|
||||
var x, y;
|
||||
var width, height;
|
||||
var xAxis, yAxis;
|
||||
var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
|
||||
''minshare.memory'', ''fairshare.memory''];
|
||||
|
||||
// stop function
|
||||
function stop() '{'
|
||||
running = 0;
|
||||
'}'
|
||||
|
||||
// select changed event
|
||||
function redrawChart() '{'
|
||||
var value = $(''#trackedSelect'').val();
|
||||
if (value.substring(0, ''Job ''.length) === ''Job ''
|
||||
|| value.substring(0, ''Queue ''.length) === ''Queue '') '{'
|
||||
para = value;
|
||||
running = 0;
|
||||
basetime = 0;
|
||||
data = [];
|
||||
$(''#area1'').empty();
|
||||
drawChart(''Tracking '' + value);
|
||||
running = 1;
|
||||
requestData();
|
||||
}
|
||||
}
|
||||
|
||||
// draw chart
|
||||
function drawChart(title) '{'
|
||||
// location
|
||||
var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
|
||||
width = 800 - margin.left - margin.right;
|
||||
height = 420 - margin.top - margin.bottom;
|
||||
x = d3.scale.linear().range([0, width]);
|
||||
y = d3.scale.linear().range([height, 0]);
|
||||
xAxis = d3.svg.axis().scale(x).orient(''bottom'');
|
||||
yAxis = d3.svg.axis().scale(y).orient(''left'');
|
||||
// lines
|
||||
line = d3.svg.line().interpolate(''basis'')
|
||||
.x(function(d) '{' return x(d.time); })
|
||||
.y(function(d) '{' return y(d.value); });
|
||||
// create chart
|
||||
svg = d3.select(''#area1'').append(''svg'')
|
||||
.attr(''width'', width + margin.left + margin.right)
|
||||
.attr(''height'', height + margin.top + margin.bottom)
|
||||
.append(''g'')
|
||||
.attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
|
||||
// axis labels
|
||||
svg.append(''text'')
|
||||
.attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(''Time ({2})'');
|
||||
svg.append(''text'')
|
||||
.attr(''transform'', ''rotate(-90)'')
|
||||
.attr(''y'', 0 - margin.left)
|
||||
.attr(''x'',0 - (height / 2))
|
||||
.attr(''dy'', ''1em'')
|
||||
.style(''text-anchor'', ''middle'')
|
||||
.text(''Memory (GB)'');
|
||||
// title
|
||||
svg.append(''text'')
|
||||
.attr(''x'', (width / 2))
|
||||
.attr(''y'', 10 - (margin.top / 2))
|
||||
.attr(''text-anchor'', ''middle'')
|
||||
.text(title);
|
||||
'}'
|
||||
|
||||
// request data
|
||||
function requestData() '{'
|
||||
$.ajax('{'url: ''trackMetrics?t='' + para,
|
||||
success: function(point) '{'
|
||||
// clear old
|
||||
svg.selectAll(''g.tick'').remove();
|
||||
svg.selectAll(''g'').remove();
|
||||
|
||||
if(basetime == 0) basetime = point.time;
|
||||
point.time = (point.time - basetime)/{3};
|
||||
data.push(point);
|
||||
|
||||
var color = d3.scale.category10();
|
||||
color.domain(d3.keys(data[0]).filter(function(key) '{'
|
||||
return $.inArray(key, legends) !== -1;
|
||||
'}'));
|
||||
|
||||
var values = color.domain().map(function(name) '{'
|
||||
return '{'
|
||||
name: name,
|
||||
values: data.map(function(d) '{'
|
||||
return '{' time: d.time, value: d[name]'}';
|
||||
'}')
|
||||
'}';
|
||||
'}');
|
||||
|
||||
// set x/y range
|
||||
x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
|
||||
y.domain([
|
||||
d3.min(values, function(c) '{' return 0 '}'),
|
||||
d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
|
||||
]);
|
||||
|
||||
svg.append(''g'').attr(''class'', ''x axis'')
|
||||
.attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
|
||||
svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
|
||||
var value = svg.selectAll(''.path'')
|
||||
.data(values).enter().append(''g'').attr(''class'', ''line'');
|
||||
|
||||
value.append(''path'').attr(''class'', ''line'')
|
||||
.attr(''d'', function(d) '{'return line(d.values); '}')
|
||||
.style(''stroke'', function(d) '{'return color(d.name); '}');
|
||||
|
||||
// legend
|
||||
var legend = svg.append(''g'')
|
||||
.attr(''class'', ''legend'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', 25)
|
||||
.attr(''height'', 120)
|
||||
.attr(''width'', 180);
|
||||
|
||||
legend.selectAll(''g'').data(legends)
|
||||
.enter()
|
||||
.append(''g'')
|
||||
.each(function(d, i) '{'
|
||||
var g = d3.select(this);
|
||||
g.append(''rect'')
|
||||
.attr(''x'', width + 5)
|
||||
.attr(''y'', i * 20)
|
||||
.attr(''width'', 10)
|
||||
.attr(''height'', 10)
|
||||
.style(''fill'', color(d));
|
||||
|
||||
g.append(''text'')
|
||||
.attr(''x'', width + 25)
|
||||
.attr(''y'', i * 20 + 8)
|
||||
.attr(''height'',30)
|
||||
.attr(''width'',250)
|
||||
.style(''fill'', color(d))
|
||||
.text(d);
|
||||
'}');
|
||||
|
||||
if(running == 1)
|
||||
setTimeout(requestData, {4});
|
||||
'}',
|
||||
cache: false
|
||||
'}');
|
||||
'}'
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
58
hadoop-tools/hadoop-sls/src/test/resources/yarn-site.xml
Normal file
@ -0,0 +1,58 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.scheduler.class</name>
|
||||
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the RM web application.</description>
|
||||
<name>yarn.resourcemanager.webapp.address</name>
|
||||
<value>localhost:18088</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.resourcemanager.resource-tracker.address</name>
|
||||
<value>localhost:18031</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the scheduler interface.</description>
|
||||
<name>yarn.resourcemanager.scheduler.address</name>
|
||||
<value>localhost:18030</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the applications manager interface in the RM.</description>
|
||||
<name>yarn.resourcemanager.address</name>
|
||||
<value>localhost:18032</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>The address of the RM admin interface.</description>
|
||||
<name>yarn.resourcemanager.admin.address</name>
|
||||
<value>localhost:18033</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>Set to false, to avoid ip check</description>
|
||||
<name>hadoop.security.token.service.use_ip</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
@ -83,6 +83,11 @@
|
||||
<scope>compile</scope>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-sls</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
@ -41,6 +41,7 @@
|
||||
<module>hadoop-extras</module>
|
||||
<module>hadoop-pipes</module>
|
||||
<module>hadoop-openstack</module>
|
||||
<module>hadoop-sls</module>
|
||||
</modules>
|
||||
|
||||
<build>
|
||||
|
@ -24,6 +24,8 @@ Release 2.3.0 - UNRELEASED
|
||||
YARN-649. Added a new NM web-service to serve container logs in plain text
|
||||
over HTTP. (Sandy Ryza via vinodkv)
|
||||
|
||||
YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)
|
||||
|