diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 56c4f262ad..fb561e5c2e 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -58,6 +58,9 @@ Trunk (unreleased changes) MAPREDUCE-2384. The job submitter should make sure to validate jobs before creation of necessary files. (harsh) + MAPREDUCE-4371. Check for cyclic dependencies in Jobcontrol job DAG + (madhukara phatak via bobby) + BUG FIXES MAPREDUCE-4356. [Rumen] Provide access to the method diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/JobControl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/JobControl.java index a2bc70aaf5..3857359c8a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/JobControl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/JobControl.java @@ -24,6 +24,8 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.HashMap; +import java.util.HashSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -202,6 +204,9 @@ synchronized public boolean allFinished() { * Submit the jobs in ready state */ public void run() { + if (isCircular(jobsInProgress)) { + throw new IllegalArgumentException("job control has circular dependency"); + } try { this.runnerState = ThreadState.RUNNING; while (true) { @@ -281,4 +286,64 @@ synchronized private void failAllJobs(Throwable t) { } } } + + /** + * Uses topological sorting algorithm for finding circular dependency + */ + private boolean isCircular(final List jobList) { + boolean cyclePresent = false; + HashSet SourceSet = new HashSet(); + HashMap> processedMap = + new HashMap>(); + for (ControlledJob n : jobList) { + processedMap.put(n, new ArrayList()); + } + for (ControlledJob n : jobList) { + if (!hasInComingEdge(n, jobList, processedMap)) { + SourceSet.add(n); + } + } + while (!SourceSet.isEmpty()) { + ControlledJob controlledJob = SourceSet.iterator().next(); + SourceSet.remove(controlledJob); + if (controlledJob.getDependentJobs() != null) { + for (int i = 0; i < controlledJob.getDependentJobs().size(); i++) { + ControlledJob depenControlledJob = + controlledJob.getDependentJobs().get(i); + processedMap.get(controlledJob).add(depenControlledJob); + if (!hasInComingEdge(controlledJob, jobList, processedMap)) { + SourceSet.add(depenControlledJob); + } + } + } + } + + for (ControlledJob controlledJob : jobList) { + if (controlledJob.getDependentJobs() != null + && controlledJob.getDependentJobs().size() != processedMap.get( + controlledJob).size()) { + cyclePresent = true; + LOG.error("Job control has circular dependency for the job " + + controlledJob.getJobName()); + break; + } + } + return cyclePresent; + } + + private boolean hasInComingEdge(ControlledJob controlledJob, + List controlledJobList, + HashMap> processedMap) { + boolean hasIncomingEdge = false; + for (ControlledJob k : controlledJobList) { + if (k != controlledJob && k.getDependentJobs() != null + && !processedMap.get(k).contains(controlledJob) + && k.getDependentJobs().contains(controlledJob)) { + hasIncomingEdge = true; + break; + } + } + return hasIncomingEdge; + + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/jobcontrol/TestJobControl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/jobcontrol/TestJobControl.java new file mode 100644 index 0000000000..41a4eaf64a --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/jobcontrol/TestJobControl.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.lib.jobcontrol; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; + +public class TestJobControl { + + /* + * Tests for the circular dependency between the jobs in job control. If there + * is a circular dependency, an IllegalArgumentException will be thrown + */ + @Test + public void testCircularDependency() throws IOException { + ControlledJob job1 = new ControlledJob(new Configuration()); + job1.setJobName("job1"); + ControlledJob job2 = new ControlledJob(new Configuration()); + job2.setJobName("job2"); + ControlledJob job3 = new ControlledJob(new Configuration()); + job3.setJobName("job3"); + job1.addDependingJob(job2); + job2.addDependingJob(job3); + job3.addDependingJob(job1); + JobControl jobControl = new JobControl("test"); + jobControl.addJob(job1); + jobControl.addJob(job2); + jobControl.addJob(job3); + + try { + jobControl.run(); + } catch (Exception e) { + assertTrue(e instanceof IllegalArgumentException); + } + + } +} +