diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 308014eb07..7f50b75fc3 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -70,6 +70,9 @@ Release 0.23.0 - Unreleased MAPREDUCE-2037. Capture intermediate progress, CPU and memory usage for tasks. (Dick King via acmurthy) + MAPREDUCE-2930. Added the ability to be able to generate graphs from the + state-machine definitions. (Binglin Chang via vinodkv) + IMPROVEMENTS MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml index 17d4b8b0e5..66ac197d84 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml @@ -113,4 +113,41 @@ + + + + visualize + + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2 + + + compile + + java + + + test + org.apache.hadoop.yarn.util.VisualizeStateMachine + + MapReduce + org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl, + org.apache.hadoop.mapreduce.v2.app.job.impl.TaskImpl, + org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl + MapReduce.gv + + + + + + + + + diff --git a/hadoop-mapreduce-project/hadoop-yarn/README b/hadoop-mapreduce-project/hadoop-yarn/README index 8c4f43454e..713871ab76 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/README +++ b/hadoop-mapreduce-project/hadoop-yarn/README @@ -30,7 +30,6 @@ clean and test: mvn clean install run selected test after compile: mvn test -Dtest=TestClassName (combined: mvn clean install -Dtest=TestClassName) create runnable binaries after install: mvn assembly:assembly (combined: mvn clean install assembly:assembly) - Eclipse Projects ---------------- http://maven.apache.org/guides/mini/guide-ide-eclipse.html @@ -71,3 +70,16 @@ hadoop-yarn-server - Implementation of the hadoop-yarn-api hadoop-yarn-server-common - APIs shared between resourcemanager and nodemanager hadoop-yarn-server-nodemanager (TaskTracker replacement) hadoop-yarn-server-resourcemanager (JobTracker replacement) + +Utilities for understanding the code +------------------------------------ +Almost all of the yarn components as well as the mapreduce framework use +state-machines for all the data objects. To understand those central pieces of +the code, a visual representation of the state-machines helps much. You can first +convert the state-machines into graphviz(.gv) format by +running: + mvn compile -Pvisualize +Then you can use the dot program for generating directed graphs and convert the above +.gv files to images. The graphviz package has the needed dot program and related +utilites.For e.g., to generate png files you can run: + dot -Tpng NodeManager.gv > NodeManager.png diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java index 2a5244d651..71c829ac58 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java @@ -20,10 +20,14 @@ import java.util.EnumMap; import java.util.HashMap; +import java.util.Iterator; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.Stack; +import org.apache.hadoop.yarn.util.Graph; + /** * State machine topology. * This object is semantically immutable. If you have a @@ -441,4 +445,39 @@ public synchronized STATE doTransition(EVENTTYPE eventType, EVENT event) return currentState; } } + + /** + * Generate a graph represents the state graph of this StateMachine + * @param name graph name + * @return Graph object generated + */ + public Graph generateStateGraph(String name) { + maybeMakeStateMachineTable(); + Graph g = new Graph(name); + for (STATE startState : stateMachineTable.keySet()) { + Map> transitions + = stateMachineTable.get(startState); + for (Entry> entry : + transitions.entrySet()) { + Transition transition = entry.getValue(); + if (transition instanceof StateMachineFactory.SingleInternalArc) { + StateMachineFactory.SingleInternalArc sa + = (StateMachineFactory.SingleInternalArc) transition; + Graph.Node fromNode = g.getNode(startState.toString()); + Graph.Node toNode = g.getNode(sa.postState.toString()); + fromNode.addEdge(toNode, entry.getKey().toString()); + } else if (transition instanceof StateMachineFactory.MultipleInternalArc) { + StateMachineFactory.MultipleInternalArc ma + = (StateMachineFactory.MultipleInternalArc) transition; + Iterator iter = ma.validPostStates.iterator(); + while (iter.hasNext()) { + Graph.Node fromNode = g.getNode(startState.toString()); + Graph.Node toNode = g.getNode(iter.next().toString()); + fromNode.addEdge(toNode, entry.getKey().toString()); + } + } + } + } + return g; + } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java new file mode 100644 index 0000000000..aa3604fa87 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.util; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang.StringEscapeUtils; + +public class Graph { + public class Edge { + Node from; + Node to; + String label; + + public Edge(Node from, Node to, String info) { + this.from = from; + this.to = to; + this.label = info; + } + + public boolean sameAs(Edge rhs) { + if (this.from == rhs.from && + this.to == rhs.to) { + return true; + } + return false; + } + + public Edge combine(Edge rhs) { + String newlabel = this.label + "," + rhs.label; + return new Edge(this.from, this.to, newlabel); + } + } + + public class Node { + Graph parent; + String id; + List ins; + List outs; + + public Node(String id) { + this.id = id; + this.parent = Graph.this; + this.ins = new ArrayList(); + this.outs = new ArrayList(); + } + + public Graph getParent() { + return parent; + } + + public Node addEdge(Node to, String info) { + Edge e = new Edge(this, to, info); + outs.add(e); + to.ins.add(e); + return this; + } + + public String getUniqueId() { + return Graph.this.name + "." + id; + } + } + + private String name; + private Graph parent; + private Set nodes = new HashSet(); + private Set subgraphs = new HashSet(); + + public Graph(String name, Graph parent) { + this.name = name; + this.parent = parent; + } + + public Graph(String name) { + this(name, null); + } + + public Graph() { + this("graph", null); + } + + public String getName() { + return name; + } + + public Graph getParent() { + return parent; + } + + private Node newNode(String id) { + Node ret = new Node(id); + nodes.add(ret); + return ret; + } + + public Node getNode(String id) { + for (Node node : nodes) { + if (node.id.equals(id)) { + return node; + } + } + return newNode(id); + } + + public Graph newSubGraph(String name) { + Graph ret = new Graph(name, this); + subgraphs.add(ret); + return ret; + } + + public void addSubGraph(Graph graph) { + subgraphs.add(graph); + graph.parent = this; + } + + private static String wrapSafeString(String label) { + if (label.indexOf(',') >= 0) { + if (label.length()>14) { + label = label.replaceAll(",", ",\n"); + } + } + label = "\"" + StringEscapeUtils.escapeJava(label) + "\""; + return label; + } + + public String generateGraphViz(String indent) { + StringBuilder sb = new StringBuilder(); + if (this.parent == null) { + sb.append("digraph " + name + " {\n"); + sb.append(String.format("graph [ label=%s, fontsize=24, fontname=Helvetica];\n", + wrapSafeString(name))); + sb.append("node [fontsize=12, fontname=Helvetica];\n"); + sb.append("edge [fontsize=9, fontcolor=blue, fontname=Arial];\n"); + } else { + sb.append("subgraph cluster_" + name + " {\nlabel=\"" + name + "\"\n"); + } + for (Graph g : subgraphs) { + String ginfo = g.generateGraphViz(indent+" "); + sb.append(ginfo); + sb.append("\n"); + } + for (Node n : nodes) { + sb.append(String.format( + "%s%s [ label = %s ];\n", + indent, + wrapSafeString(n.getUniqueId()), + n.id)); + List combinedOuts = combineEdges(n.outs); + for (Edge e : combinedOuts) { + sb.append(String.format( + "%s%s -> %s [ label = %s ];\n", + indent, + wrapSafeString(e.from.getUniqueId()), + wrapSafeString(e.to.getUniqueId()), + wrapSafeString(e.label))); + } + } + sb.append("}\n"); + return sb.toString(); + } + + public String generateGraphViz() { + return generateGraphViz(""); + } + + public void save(String filepath) throws IOException { + FileWriter fout = new FileWriter(filepath); + fout.write(generateGraphViz()); + fout.close(); + } + + public static List combineEdges(List edges) { + List ret = new ArrayList(); + for (Edge edge : edges) { + boolean found = false; + for (int i = 0; i < ret.size(); i++) { + Edge current = ret.get(i); + if (edge.sameAs(current)) { + ret.set(i, current.combine(edge)); + found = true; + break; + } + } + if (!found) { + ret.add(edge); + } + } + return ret; + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java new file mode 100644 index 0000000000..0fb9a48b09 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.util; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.yarn.state.StateMachineFactory; + +public class VisualizeStateMachine { + + /** + * @param classes list of classes which have static field + * stateMachineFactory of type StateMachineFactory + * @return graph represent this StateMachine + */ + public static Graph getGraphFromClasses(String graphName, List classes) + throws Exception { + Graph ret = null; + if (classes.size() != 1) { + ret = new Graph(graphName); + } + for (String className : classes) { + Class clz = Class.forName(className); + Field factoryField = clz.getDeclaredField("stateMachineFactory"); + factoryField.setAccessible(true); + StateMachineFactory factory = (StateMachineFactory) factoryField.get(null); + if (classes.size() == 1) { + return factory.generateStateGraph(graphName); + } + String gname = clz.getSimpleName(); + if (gname.endsWith("Impl")) { + gname = gname.substring(0, gname.length()-4); + } + ret.addSubGraph(factory.generateStateGraph(gname)); + } + return ret; + } + + public static void main(String [] args) throws Exception { + if (args.length < 3) { + System.err.printf("Usage: %s \n", + VisualizeStateMachine.class.getName()); + System.exit(1); + } + String [] classes = args[1].split(","); + ArrayList validClasses = new ArrayList(); + for (String c : classes) { + String vc = c.trim(); + if (vc.length()>0) { + validClasses.add(vc); + } + } + Graph g = getGraphFromClasses(args[0], validClasses); + g.save(args[2]); + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml index 98959644cf..1d7b9cb2d1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml @@ -103,6 +103,39 @@ true + + visualize + + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2 + + + compile + + java + + + org.apache.hadoop.yarn.util.VisualizeStateMachine + + NodeManager + org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl, + org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl, + org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalizedResource + NodeManager.gv + + + + + + + + diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml index b4c398f70f..e676485e92 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml @@ -98,4 +98,41 @@ + + + + visualize + + false + + + + + org.codehaus.mojo + exec-maven-plugin + 1.2 + + + compile + + java + + + org.apache.hadoop.yarn.util.VisualizeStateMachine + + ResourceManager + org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl, + org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl, + org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl, + org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl + ResourceManager.gv + + + + + + + + +