MAPREDUCE-7277. IndexCache totalMemoryUsed differs from cache contents. Contributed by Jon Eagles (jeagles).

This commit is contained in:
Eric E Payne 2020-04-27 19:10:00 +00:00
parent 18d7dfbf35
commit e2322e1117
2 changed files with 95 additions and 56 deletions

View File

@ -72,11 +72,12 @@ public IndexRecord getIndexInformation(String mapId, int reduce,
try { try {
info.wait(); info.wait();
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted waiting for construction", e); throw new IOException("Interrupted waiting for construction", e);
} }
} }
} }
LOG.debug("IndexCache HIT: MapId " + mapId + " found"); LOG.debug("IndexCache HIT: MapId {} found", mapId);
} }
if (info.mapSpillRecord.size() == 0 || if (info.mapSpillRecord.size() == 0 ||
@ -106,63 +107,91 @@ private IndexInformation readIndexFileToCache(Path indexFileName,
try { try {
info.wait(); info.wait();
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted waiting for construction", e); throw new IOException("Interrupted waiting for construction", e);
} }
} }
} }
LOG.debug("IndexCache HIT: MapId " + mapId + " found"); LOG.debug("IndexCache HIT: MapId {} found", mapId);
return info; return info;
} }
LOG.debug("IndexCache MISS: MapId " + mapId + " not found") ; LOG.debug("IndexCache MISS: MapId {} not found", mapId);
SpillRecord tmp = null; SpillRecord tmp = null;
boolean success = false;
try { try {
tmp = new SpillRecord(indexFileName, conf, expectedIndexOwner); tmp = new SpillRecord(indexFileName, conf, expectedIndexOwner);
} catch (Throwable e) { success = true;
} catch (Throwable e) {
tmp = new SpillRecord(0); tmp = new SpillRecord(0);
cache.remove(mapId); cache.remove(mapId);
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new IOException("Error Reading IndexFile", e); throw new IOException("Error Reading IndexFile", e);
} finally { } finally {
synchronized (newInd) { synchronized (newInd) {
newInd.mapSpillRecord = tmp; newInd.mapSpillRecord = tmp;
if (success) {
// Only add mapId to the queue for successful read and after added to
// the cache. Once in the queue, it is now eligible for removal once
// construction is finished.
queue.add(mapId);
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
freeIndexInformation();
}
}
newInd.notifyAll(); newInd.notifyAll();
} }
} }
queue.add(mapId);
if (totalMemoryUsed.addAndGet(newInd.getSize()) > totalMemoryAllowed) {
freeIndexInformation();
}
return newInd; return newInd;
} }
/** /**
* This method removes the map from the cache if index information for this * This method removes the map from the cache if it is present in the queue.
* map is loaded(size>0), index information entry in cache will not be
* removed if it is in the loading phrase(size=0), this prevents corruption
* of totalMemoryUsed. It should be called when a map output on this tracker
* is discarded.
* @param mapId The taskID of this map. * @param mapId The taskID of this map.
*/ */
public void removeMap(String mapId) { public void removeMap(String mapId) throws IOException {
IndexInformation info = cache.get(mapId); // Successfully removing the mapId from the queue enters into a contract
if (info == null || isUnderConstruction(info)) { // that this thread will remove the corresponding mapId from the cache.
if (!queue.remove(mapId)) {
LOG.debug("Map ID {} not found in queue", mapId);
return; return;
} }
info = cache.remove(mapId); removeMapInternal(mapId);
if (info != null) { }
totalMemoryUsed.addAndGet(-info.getSize());
if (!queue.remove(mapId)) { /** This method should only be called upon successful removal of mapId from
LOG.warn("Map ID" + mapId + " not found in queue!!"); * the queue. The mapId will be removed from the cache and totalUsedMemory
* will be decremented.
* @param mapId the cache item to be removed
* @throws IOException
*/
private void removeMapInternal(String mapId) throws IOException {
IndexInformation info = cache.remove(mapId);
if (info == null) {
// Inconsistent state as presence in queue implies presence in cache
LOG.warn("Map ID " + mapId + " not found in cache");
return;
}
try {
synchronized(info) {
while (isUnderConstruction(info)) {
info.wait();
}
totalMemoryUsed.getAndAdd(-info.getSize());
} }
} else { } catch (InterruptedException e) {
LOG.info("Map ID " + mapId + " not found in cache"); totalMemoryUsed.getAndAdd(-info.getSize());
Thread.currentThread().interrupt();
throw new IOException("Interrupted waiting for construction", e);
} }
} }
/** /**
* This method checks if cache and totolMemoryUsed is consistent. * This method checks if cache and totalMemoryUsed is consistent.
* It is only used for unit test. * It is only used for unit test.
* @return True if cache and totolMemoryUsed is consistent * @return True if cache and totalMemoryUsed is consistent
*/ */
boolean checkTotalMemoryUsed() { boolean checkTotalMemoryUsed() {
int totalSize = 0; int totalSize = 0;
@ -175,13 +204,13 @@ boolean checkTotalMemoryUsed() {
/** /**
* Bring memory usage below totalMemoryAllowed. * Bring memory usage below totalMemoryAllowed.
*/ */
private synchronized void freeIndexInformation() { private synchronized void freeIndexInformation() throws IOException {
while (totalMemoryUsed.get() > totalMemoryAllowed) { while (totalMemoryUsed.get() > totalMemoryAllowed) {
String s = queue.remove(); if(queue.isEmpty()) {
IndexInformation info = cache.remove(s); break;
if (info != null) {
totalMemoryUsed.addAndGet(-info.getSize());
} }
String mapId = queue.remove();
removeMapInternal(mapId);
} }
} }

View File

@ -21,6 +21,7 @@
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.CRC32; import java.util.zip.CRC32;
import java.util.zip.CheckedOutputStream; import java.util.zip.CheckedOutputStream;
@ -216,23 +217,32 @@ public void testRemoveMap() throws Exception {
final String user = final String user =
UserGroupInformation.getCurrentUser().getShortUserName(); UserGroupInformation.getCurrentUser().getShortUserName();
writeFile(fs, big, bytesPerFile, partsPerMap); writeFile(fs, big, bytesPerFile, partsPerMap);
// Capture if any runtime exception occurred
AtomicBoolean failed = new AtomicBoolean();
// run multiple times // run multiple times
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
Thread getInfoThread = new Thread() { Thread getInfoThread = new Thread() {
@Override @Override
public void run() { public void run() {
try { try {
cache.getIndexInformation("bigIndex", partsPerMap, big, user); cache.getIndexInformation("bigIndex", 0, big, user);
} catch (Exception e) { } catch (Exception e) {
// should not be here // should not be here
failed.set(true);
} }
} }
}; };
Thread removeMapThread = new Thread() { Thread removeMapThread = new Thread() {
@Override @Override
public void run() { public void run() {
cache.removeMap("bigIndex"); try {
cache.removeMap("bigIndex");
} catch (Exception e) {
// should not be here
failed.set(true);
}
} }
}; };
if (i%2==0) { if (i%2==0) {
@ -244,6 +254,7 @@ public void run() {
} }
getInfoThread.join(); getInfoThread.join();
removeMapThread.join(); removeMapThread.join();
assertFalse("An unexpected exception", failed.get());
assertTrue(cache.checkTotalMemoryUsed()); assertTrue(cache.checkTotalMemoryUsed());
} }
} }
@ -261,6 +272,9 @@ public void testCreateRace() throws Exception {
UserGroupInformation.getCurrentUser().getShortUserName(); UserGroupInformation.getCurrentUser().getShortUserName();
writeFile(fs, racy, bytesPerFile, partsPerMap); writeFile(fs, racy, bytesPerFile, partsPerMap);
// Capture if any runtime exception occurred
AtomicBoolean failed = new AtomicBoolean();
// run multiple instances // run multiple instances
Thread[] getInfoThreads = new Thread[50]; Thread[] getInfoThreads = new Thread[50];
for (int i = 0; i < 50; i++) { for (int i = 0; i < 50; i++) {
@ -268,10 +282,15 @@ public void testCreateRace() throws Exception {
@Override @Override
public void run() { public void run() {
try { try {
cache.getIndexInformation("racyIndex", partsPerMap, racy, user); while (!Thread.currentThread().isInterrupted()) {
cache.removeMap("racyIndex"); cache.getIndexInformation("racyIndex", 0, racy, user);
cache.removeMap("racyIndex");
}
} catch (Exception e) { } catch (Exception e) {
// should not be here if (!Thread.currentThread().isInterrupted()) {
// should not be here
failed.set(true);
}
} }
} }
}; };
@ -281,20 +300,12 @@ public void run() {
getInfoThreads[i].start(); getInfoThreads[i].start();
} }
final Thread mainTestThread = Thread.currentThread(); // The duration to keep the threads testing
Thread.sleep(5000);
Thread timeoutThread = new Thread() {
@Override
public void run() {
try {
Thread.sleep(15000);
mainTestThread.interrupt();
} catch (InterruptedException ie) {
// we are done;
}
}
};
for (int i = 0; i < 50; i++) {
getInfoThreads[i].interrupt();
}
for (int i = 0; i < 50; i++) { for (int i = 0; i < 50; i++) {
try { try {
getInfoThreads[i].join(); getInfoThreads[i].join();
@ -303,10 +314,9 @@ public void run() {
fail("Unexpectedly long delay during concurrent cache entry creations"); fail("Unexpectedly long delay during concurrent cache entry creations");
} }
} }
// stop the timeoutThread. If we get interrupted before stopping, there assertFalse("An unexpected exception", failed.get());
// must be something wrong, although it wasn't a deadlock. No need to assertTrue("Total memory used does not represent contents of the cache",
// catch and swallow. cache.checkTotalMemoryUsed());
timeoutThread.interrupt();
} }
private static void checkRecord(IndexRecord rec, long fill) { private static void checkRecord(IndexRecord rec, long fill) {