From c0737b0bdb53c72a036644ad48f57dd8d3f58b36 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Mon, 10 Jun 2013 21:39:34 +0000 Subject: [PATCH] MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching multiple compressed map outputs. Contributed by Nathan Roberts git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1491611 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 6 ++ .../task/reduce/InMemoryMapOutput.java | 13 ++++ .../mapreduce/task/reduce/TestFetcher.java | 78 ++++++++++++++++++- 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index ffbb975992..ca03caa139 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -502,6 +502,9 @@ Release 2.1.0-beta - UNRELEASED MAPREDUCE-5301. Updated MR code to work with YARN-635 changes of renaming YarnRemoteException to YarnException. (Siddharth Seth via vinodkv) + MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching + multiple compressed map outputs (Nathan Roberts via jlowe) + BREAKDOWN OF HADOOP-8562 SUBTASKS MAPREDUCE-4739. Some MapReduce tests fail to find winutils. @@ -1108,6 +1111,9 @@ Release 0.23.9 - UNRELEASED BUG FIXES + MAPREDUCE-5308. Shuffling to memory can get out-of-sync when fetching + multiple compressed map outputs (Nathan Roberts via jlowe) + Release 0.23.8 - 2013-06-05 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryMapOutput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryMapOutput.java index 87e9268c31..24fb3bbaca 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryMapOutput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryMapOutput.java @@ -99,6 +99,19 @@ public void shuffle(MapHost host, InputStream input, reporter.progress(); LOG.info("Read " + memory.length + " bytes from map-output for " + getMapId()); + + /** + * We've gotten the amount of data we were expecting. Verify the + * decompressor has nothing more to offer. This action also forces the + * decompressor to read any trailing bytes that weren't critical + * for decompression, which is necessary to keep the stream + * in sync. + */ + if (input.read() >= 0 ) { + throw new IOException("Unexpected extra bytes from input stream for " + + getMapId()); + } + } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java index 04c6478e9c..570bdadd92 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java @@ -37,6 +37,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.IFileOutputStream; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.TaskAttemptID; @@ -233,6 +234,80 @@ public void testCopyFromHostWait() throws Exception { verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); } + @SuppressWarnings("unchecked") + @Test + public void testCopyFromHostExtraBytes() throws Exception { + LOG.info("testCopyFromHostWaitExtraBytes"); + JobConf job = new JobConf(); + TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1"); + ShuffleScheduler ss = mock(ShuffleScheduler.class); + MergeManagerImpl mm = mock(MergeManagerImpl.class); + InMemoryMapOutput immo = mock(InMemoryMapOutput.class); + + Reporter r = mock(Reporter.class); + ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class); + ExceptionReporter except = mock(ExceptionReporter.class); + SecretKey key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0}); + HttpURLConnection connection = mock(HttpURLConnection.class); + + Counters.Counter allErrs = mock(Counters.Counter.class); + when(r.getCounter(anyString(), anyString())) + .thenReturn(allErrs); + + Fetcher underTest = new FakeFetcher(job, id, ss, mm, + r, metrics, except, key, connection); + + + MapHost host = new MapHost("localhost", "http://localhost:8080/"); + + ArrayList maps = new ArrayList(1); + TaskAttemptID map1ID = TaskAttemptID.forName("attempt_0_1_m_1_1"); + maps.add(map1ID); + TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1"); + maps.add(map2ID); + when(ss.getMapsForHost(host)).thenReturn(maps); + + String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg="; + String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key); + + when(connection.getResponseCode()).thenReturn(200); + when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)) + .thenReturn(replyHash); + ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 14, 10, 1); + + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bout); + IFileOutputStream ios = new IFileOutputStream(dos); + header.write(dos); + ios.write("MAPDATA123".getBytes()); + ios.finish(); + + ShuffleHeader header2 = new ShuffleHeader(map2ID.toString(), 14, 10, 1); + IFileOutputStream ios2 = new IFileOutputStream(dos); + header2.write(dos); + ios2.write("MAPDATA456".getBytes()); + ios2.finish(); + + ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray()); + when(connection.getInputStream()).thenReturn(in); + // 8 < 10 therefore there appear to be extra bytes in the IFileInputStream + InMemoryMapOutput mapOut = new InMemoryMapOutput(job, map1ID, mm, 8, null, true ); + InMemoryMapOutput mapOut2 = new InMemoryMapOutput(job, map2ID, mm, 10, null, true ); + + when(mm.reserve(eq(map1ID), anyLong(), anyInt())).thenReturn(mapOut); + when(mm.reserve(eq(map2ID), anyLong(), anyInt())).thenReturn(mapOut2); + + + underTest.copyFromHost(host); + + + verify(allErrs).increment(1); + verify(ss).copyFailed(map1ID, host, true, false); + verify(ss, never()).copyFailed(map2ID, host, true, false); + + verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); + verify(ss).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); + } @SuppressWarnings("unchecked") @Test(timeout=10000) @@ -265,7 +340,6 @@ public void testCopyFromHostCompressFailure() throws Exception { TaskAttemptID map2ID = TaskAttemptID.forName("attempt_0_1_m_2_1"); maps.add(map2ID); when(ss.getMapsForHost(host)).thenReturn(maps); - String encHash = "vFE234EIFCiBgYs2tCXY/SjT8Kg="; String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key); @@ -292,4 +366,4 @@ public void testCopyFromHostCompressFailure() throws Exception { encHash); verify(ss, times(1)).copyFailed(map1ID, host, true, false); } -} \ No newline at end of file +}