MAPREDUCE-6197. Cache MapOutputLocations in ShuffleHandler. Contributed by Junping Du
This commit is contained in:
parent
b2c596cdda
commit
d8107fcd1c
@ -46,6 +46,7 @@
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ThreadFactory;
|
import java.util.concurrent.ThreadFactory;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
@ -131,6 +132,12 @@
|
|||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Charsets;
|
import com.google.common.base.Charsets;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
import com.google.common.cache.CacheLoader;
|
||||||
|
import com.google.common.cache.LoadingCache;
|
||||||
|
import com.google.common.cache.RemovalListener;
|
||||||
|
import com.google.common.cache.RemovalNotification;
|
||||||
|
import com.google.common.cache.Weigher;
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
import com.google.protobuf.ByteString;
|
import com.google.protobuf.ByteString;
|
||||||
|
|
||||||
@ -156,6 +163,9 @@ public class ShuffleHandler extends AuxiliaryService {
|
|||||||
protected static final Version CURRENT_VERSION_INFO =
|
protected static final Version CURRENT_VERSION_INFO =
|
||||||
Version.newInstance(1, 0);
|
Version.newInstance(1, 0);
|
||||||
|
|
||||||
|
private static final String DATA_FILE_NAME = "file.out";
|
||||||
|
private static final String INDEX_FILE_NAME = "file.out.index";
|
||||||
|
|
||||||
private int port;
|
private int port;
|
||||||
private ChannelFactory selector;
|
private ChannelFactory selector;
|
||||||
private final ChannelGroup accepted = new DefaultChannelGroup();
|
private final ChannelGroup accepted = new DefaultChannelGroup();
|
||||||
@ -294,12 +304,12 @@ private static class ReduceContext {
|
|||||||
private ChannelHandlerContext ctx;
|
private ChannelHandlerContext ctx;
|
||||||
private String user;
|
private String user;
|
||||||
private Map<String, Shuffle.MapOutputInfo> infoMap;
|
private Map<String, Shuffle.MapOutputInfo> infoMap;
|
||||||
private String outputBasePathStr;
|
private String jobId;
|
||||||
|
|
||||||
public ReduceContext(List<String> mapIds, int rId,
|
public ReduceContext(List<String> mapIds, int rId,
|
||||||
ChannelHandlerContext context, String usr,
|
ChannelHandlerContext context, String usr,
|
||||||
Map<String, Shuffle.MapOutputInfo> mapOutputInfoMap,
|
Map<String, Shuffle.MapOutputInfo> mapOutputInfoMap,
|
||||||
String outputBasePath) {
|
String jobId) {
|
||||||
|
|
||||||
this.mapIds = mapIds;
|
this.mapIds = mapIds;
|
||||||
this.reduceId = rId;
|
this.reduceId = rId;
|
||||||
@ -319,7 +329,7 @@ public ReduceContext(List<String> mapIds, int rId,
|
|||||||
this.ctx = context;
|
this.ctx = context;
|
||||||
this.user = usr;
|
this.user = usr;
|
||||||
this.infoMap = mapOutputInfoMap;
|
this.infoMap = mapOutputInfoMap;
|
||||||
this.outputBasePathStr = outputBasePath;
|
this.jobId = jobId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getReduceId() {
|
public int getReduceId() {
|
||||||
@ -338,8 +348,8 @@ public Map<String, Shuffle.MapOutputInfo> getInfoMap() {
|
|||||||
return infoMap;
|
return infoMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getOutputBasePathStr() {
|
public String getJobId() {
|
||||||
return outputBasePathStr;
|
return jobId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getMapIds() {
|
public List<String> getMapIds() {
|
||||||
@ -780,18 +790,63 @@ public ChannelPipeline getPipeline() throws Exception {
|
|||||||
|
|
||||||
class Shuffle extends SimpleChannelUpstreamHandler {
|
class Shuffle extends SimpleChannelUpstreamHandler {
|
||||||
|
|
||||||
|
private static final int MAX_WEIGHT = 10 * 1024 * 1024;
|
||||||
|
private static final int EXPIRE_AFTER_ACCESS_MINUTES = 5;
|
||||||
|
private static final int ALLOWED_CONCURRENCY = 16;
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
private final IndexCache indexCache;
|
private final IndexCache indexCache;
|
||||||
private final LocalDirAllocator lDirAlloc =
|
private final LocalDirAllocator lDirAlloc =
|
||||||
new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS);
|
new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS);
|
||||||
private int port;
|
private int port;
|
||||||
|
private final LoadingCache<AttemptPathIdentifier, AttemptPathInfo> pathCache =
|
||||||
|
CacheBuilder.newBuilder().expireAfterAccess(EXPIRE_AFTER_ACCESS_MINUTES,
|
||||||
|
TimeUnit.MINUTES).softValues().concurrencyLevel(ALLOWED_CONCURRENCY).
|
||||||
|
removalListener(
|
||||||
|
new RemovalListener<AttemptPathIdentifier, AttemptPathInfo>() {
|
||||||
|
@Override
|
||||||
|
public void onRemoval(RemovalNotification<AttemptPathIdentifier,
|
||||||
|
AttemptPathInfo> notification) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("PathCache Eviction: " + notification.getKey() +
|
||||||
|
", Reason=" + notification.getCause());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).maximumWeight(MAX_WEIGHT).weigher(
|
||||||
|
new Weigher<AttemptPathIdentifier, AttemptPathInfo>() {
|
||||||
|
@Override
|
||||||
|
public int weigh(AttemptPathIdentifier key,
|
||||||
|
AttemptPathInfo value) {
|
||||||
|
return key.jobId.length() + key.user.length() +
|
||||||
|
key.attemptId.length()+
|
||||||
|
value.indexPath.toString().length() +
|
||||||
|
value.dataPath.toString().length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).build(new CacheLoader<AttemptPathIdentifier, AttemptPathInfo>() {
|
||||||
|
@Override
|
||||||
|
public AttemptPathInfo load(AttemptPathIdentifier key) throws
|
||||||
|
Exception {
|
||||||
|
String base = getBaseLocation(key.jobId, key.user);
|
||||||
|
String attemptBase = base + key.attemptId;
|
||||||
|
Path indexFileName = lDirAlloc.getLocalPathToRead(
|
||||||
|
attemptBase + "/" + INDEX_FILE_NAME, conf);
|
||||||
|
Path mapOutputFileName = lDirAlloc.getLocalPathToRead(
|
||||||
|
attemptBase + "/" + DATA_FILE_NAME, conf);
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Loaded : " + key + " via loader");
|
||||||
|
}
|
||||||
|
return new AttemptPathInfo(indexFileName, mapOutputFileName);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
public Shuffle(Configuration conf) {
|
public Shuffle(Configuration conf) {
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
indexCache = new IndexCache(new JobConf(conf));
|
indexCache = new IndexCache(new JobConf(conf));
|
||||||
this.port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
|
this.port = conf.getInt(SHUFFLE_PORT_CONFIG_KEY, DEFAULT_SHUFFLE_PORT);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setPort(int port) {
|
public void setPort(int port) {
|
||||||
this.port = port;
|
this.port = port;
|
||||||
}
|
}
|
||||||
@ -908,13 +963,8 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt)
|
|||||||
Channel ch = evt.getChannel();
|
Channel ch = evt.getChannel();
|
||||||
String user = userRsrc.get(jobId);
|
String user = userRsrc.get(jobId);
|
||||||
|
|
||||||
// $x/$user/appcache/$appId/output/$mapId
|
|
||||||
// TODO: Once Shuffle is out of NM, this can use MR APIs to convert
|
|
||||||
// between App and Job
|
|
||||||
String outputBasePathStr = getBaseLocation(jobId, user);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
populateHeaders(mapIds, outputBasePathStr, user, reduceId, request,
|
populateHeaders(mapIds, jobId, user, reduceId, request,
|
||||||
response, keepAliveParam, mapOutputInfoMap);
|
response, keepAliveParam, mapOutputInfoMap);
|
||||||
} catch(IOException e) {
|
} catch(IOException e) {
|
||||||
ch.write(response);
|
ch.write(response);
|
||||||
@ -926,7 +976,7 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent evt)
|
|||||||
ch.write(response);
|
ch.write(response);
|
||||||
//Initialize one ReduceContext object per messageReceived call
|
//Initialize one ReduceContext object per messageReceived call
|
||||||
ReduceContext reduceContext = new ReduceContext(mapIds, reduceId, ctx,
|
ReduceContext reduceContext = new ReduceContext(mapIds, reduceId, ctx,
|
||||||
user, mapOutputInfoMap, outputBasePathStr);
|
user, mapOutputInfoMap, jobId);
|
||||||
for (int i = 0; i < Math.min(maxSessionOpenFiles, mapIds.size()); i++) {
|
for (int i = 0; i < Math.min(maxSessionOpenFiles, mapIds.size()); i++) {
|
||||||
ChannelFuture nextMap = sendMap(reduceContext);
|
ChannelFuture nextMap = sendMap(reduceContext);
|
||||||
if(nextMap == null) {
|
if(nextMap == null) {
|
||||||
@ -957,9 +1007,8 @@ public ChannelFuture sendMap(ReduceContext reduceContext)
|
|||||||
try {
|
try {
|
||||||
MapOutputInfo info = reduceContext.getInfoMap().get(mapId);
|
MapOutputInfo info = reduceContext.getInfoMap().get(mapId);
|
||||||
if (info == null) {
|
if (info == null) {
|
||||||
info = getMapOutputInfo(reduceContext.getOutputBasePathStr() +
|
info = getMapOutputInfo(mapId, reduceContext.getReduceId(),
|
||||||
mapId, mapId, reduceContext.getReduceId(),
|
reduceContext.getJobId(), reduceContext.getUser());
|
||||||
reduceContext.getUser());
|
|
||||||
}
|
}
|
||||||
nextMap = sendMapOutput(
|
nextMap = sendMapOutput(
|
||||||
reduceContext.getCtx(),
|
reduceContext.getCtx(),
|
||||||
@ -1003,46 +1052,58 @@ private String getBaseLocation(String jobId, String user) {
|
|||||||
return baseStr;
|
return baseStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected MapOutputInfo getMapOutputInfo(String base, String mapId,
|
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce,
|
||||||
int reduce, String user) throws IOException {
|
String jobId, String user) throws IOException {
|
||||||
// Index file
|
AttemptPathInfo pathInfo;
|
||||||
Path indexFileName =
|
try {
|
||||||
lDirAlloc.getLocalPathToRead(base + "/file.out.index", conf);
|
AttemptPathIdentifier identifier = new AttemptPathIdentifier(
|
||||||
IndexRecord info =
|
jobId, user, mapId);
|
||||||
indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
|
pathInfo = pathCache.get(identifier);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
Path mapOutputFileName =
|
LOG.debug("Retrieved pathInfo for " + identifier +
|
||||||
lDirAlloc.getLocalPathToRead(base + "/file.out", conf);
|
" check for corresponding loaded messages to determine whether" +
|
||||||
if (LOG.isDebugEnabled()) {
|
" it was loaded or cached");
|
||||||
LOG.debug(base + " : " + mapOutputFileName + " : " + indexFileName);
|
}
|
||||||
|
} catch (ExecutionException e) {
|
||||||
|
if (e.getCause() instanceof IOException) {
|
||||||
|
throw (IOException) e.getCause();
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(e.getCause());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
MapOutputInfo outputInfo = new MapOutputInfo(mapOutputFileName, info);
|
|
||||||
|
IndexRecord info =
|
||||||
|
indexCache.getIndexInformation(mapId, reduce, pathInfo.indexPath, user);
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("getMapOutputInfo: jobId=" + jobId + ", mapId=" + mapId +
|
||||||
|
",dataFile=" + pathInfo.dataPath + ", indexFile=" +
|
||||||
|
pathInfo.indexPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
MapOutputInfo outputInfo = new MapOutputInfo(pathInfo.dataPath, info);
|
||||||
return outputInfo;
|
return outputInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void populateHeaders(List<String> mapIds, String outputBaseStr,
|
protected void populateHeaders(List<String> mapIds, String jobId,
|
||||||
String user, int reduce, HttpRequest request, HttpResponse response,
|
String user, int reduce, HttpRequest request, HttpResponse response,
|
||||||
boolean keepAliveParam, Map<String, MapOutputInfo> mapOutputInfoMap)
|
boolean keepAliveParam, Map<String, MapOutputInfo> mapOutputInfoMap)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
long contentLength = 0;
|
long contentLength = 0;
|
||||||
for (String mapId : mapIds) {
|
for (String mapId : mapIds) {
|
||||||
String base = outputBaseStr + mapId;
|
MapOutputInfo outputInfo = getMapOutputInfo(mapId, reduce, jobId, user);
|
||||||
MapOutputInfo outputInfo = getMapOutputInfo(base, mapId, reduce, user);
|
|
||||||
if (mapOutputInfoMap.size() < mapOutputMetaInfoCacheSize) {
|
if (mapOutputInfoMap.size() < mapOutputMetaInfoCacheSize) {
|
||||||
mapOutputInfoMap.put(mapId, outputInfo);
|
mapOutputInfoMap.put(mapId, outputInfo);
|
||||||
}
|
}
|
||||||
// Index file
|
|
||||||
Path indexFileName =
|
|
||||||
lDirAlloc.getLocalPathToRead(base + "/file.out.index", conf);
|
|
||||||
IndexRecord info =
|
|
||||||
indexCache.getIndexInformation(mapId, reduce, indexFileName, user);
|
|
||||||
ShuffleHeader header =
|
ShuffleHeader header =
|
||||||
new ShuffleHeader(mapId, info.partLength, info.rawLength, reduce);
|
new ShuffleHeader(mapId, outputInfo.indexRecord.partLength,
|
||||||
|
outputInfo.indexRecord.rawLength, reduce);
|
||||||
DataOutputBuffer dob = new DataOutputBuffer();
|
DataOutputBuffer dob = new DataOutputBuffer();
|
||||||
header.write(dob);
|
header.write(dob);
|
||||||
|
|
||||||
contentLength += info.partLength;
|
contentLength += outputInfo.indexRecord.partLength;
|
||||||
contentLength += dob.getLength();
|
contentLength += dob.getLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1215,4 +1276,64 @@ public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class AttemptPathInfo {
|
||||||
|
// TODO Change this over to just store local dir indices, instead of the
|
||||||
|
// entire path. Far more efficient.
|
||||||
|
private final Path indexPath;
|
||||||
|
private final Path dataPath;
|
||||||
|
|
||||||
|
public AttemptPathInfo(Path indexPath, Path dataPath) {
|
||||||
|
this.indexPath = indexPath;
|
||||||
|
this.dataPath = dataPath;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class AttemptPathIdentifier {
|
||||||
|
private final String jobId;
|
||||||
|
private final String user;
|
||||||
|
private final String attemptId;
|
||||||
|
|
||||||
|
public AttemptPathIdentifier(String jobId, String user, String attemptId) {
|
||||||
|
this.jobId = jobId;
|
||||||
|
this.user = user;
|
||||||
|
this.attemptId = attemptId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null || getClass() != o.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
AttemptPathIdentifier that = (AttemptPathIdentifier) o;
|
||||||
|
|
||||||
|
if (!attemptId.equals(that.attemptId)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!jobId.equals(that.jobId)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int result = jobId.hashCode();
|
||||||
|
result = 31 * result + attemptId.hashCode();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "AttemptPathIdentifier{" +
|
||||||
|
"attemptId='" + attemptId + '\'' +
|
||||||
|
", jobId='" + jobId + '\'' +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -110,8 +110,8 @@ protected void verifyRequest(String appid, ChannelHandlerContext ctx,
|
|||||||
throws IOException {
|
throws IOException {
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
protected MapOutputInfo getMapOutputInfo(String base, String mapId,
|
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce,
|
||||||
int reduce, String user) throws IOException {
|
String jobId, String user) throws IOException {
|
||||||
// Do nothing.
|
// Do nothing.
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -230,8 +230,8 @@ protected Shuffle getShuffle(Configuration conf) {
|
|||||||
// replace the shuffle handler with one stubbed for testing
|
// replace the shuffle handler with one stubbed for testing
|
||||||
return new Shuffle(conf) {
|
return new Shuffle(conf) {
|
||||||
@Override
|
@Override
|
||||||
protected MapOutputInfo getMapOutputInfo(String base, String mapId,
|
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce,
|
||||||
int reduce, String user) throws IOException {
|
String jobId, String user) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
@ -325,8 +325,8 @@ protected Shuffle getShuffle(final Configuration conf) {
|
|||||||
// replace the shuffle handler with one stubbed for testing
|
// replace the shuffle handler with one stubbed for testing
|
||||||
return new Shuffle(conf) {
|
return new Shuffle(conf) {
|
||||||
@Override
|
@Override
|
||||||
protected MapOutputInfo getMapOutputInfo(String base, String mapId,
|
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce,
|
||||||
int reduce, String user) throws IOException {
|
String jobId, String user) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
@ -534,8 +534,8 @@ protected Shuffle getShuffle(Configuration conf) {
|
|||||||
// replace the shuffle handler with one stubbed for testing
|
// replace the shuffle handler with one stubbed for testing
|
||||||
return new Shuffle(conf) {
|
return new Shuffle(conf) {
|
||||||
@Override
|
@Override
|
||||||
protected MapOutputInfo getMapOutputInfo(String base, String mapId,
|
protected MapOutputInfo getMapOutputInfo(String mapId, int reduce,
|
||||||
int reduce, String user) throws IOException {
|
String jobId, String user) throws IOException {
|
||||||
// Do nothing.
|
// Do nothing.
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user