HDFS-12255. Block Storage: Cblock should generated unique trace ID for the ops. Contributed by Mukul Kumar Singh.
This commit is contained in:
parent
e61530f5d9
commit
6a16d7c7ab
@ -93,9 +93,9 @@ public class BlockWriterTask implements Runnable {
|
||||
long endTime = Time.monotonicNow();
|
||||
Preconditions.checkState(data.length > 0, "Block data is zero length");
|
||||
startTime = Time.monotonicNow();
|
||||
// BUG: fix the trace ID.
|
||||
ContainerProtocolCalls.writeSmallFile(client, containerName,
|
||||
Long.toString(block.getBlockID()), data, "");
|
||||
Long.toString(block.getBlockID()), data,
|
||||
flusher.getTraceID(new File(dbPath), block.getBlockID()));
|
||||
endTime = Time.monotonicNow();
|
||||
flusher.getTargetMetrics().updateContainerWriteLatency(
|
||||
endTime - startTime);
|
||||
|
@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.ozone.OzoneConsts;
|
||||
import org.apache.hadoop.scm.XceiverClientManager;
|
||||
import org.apache.hadoop.scm.container.common.helpers.Pipeline;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.utils.LevelDBStore;
|
||||
import org.iq80.leveldb.Options;
|
||||
import org.slf4j.Logger;
|
||||
@ -37,6 +38,8 @@ import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.ReadableByteChannel;
|
||||
import java.nio.file.Files;
|
||||
@ -45,6 +48,7 @@ import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
@ -103,6 +107,7 @@ public class ContainerCacheFlusher implements Runnable {
|
||||
private AtomicBoolean shutdown;
|
||||
private final long levelDBCacheSize;
|
||||
private final int maxRetryCount;
|
||||
private final String tracePrefix;
|
||||
|
||||
private final ConcurrentMap<String, FinishCounter> finishCountMap;
|
||||
|
||||
@ -158,6 +163,7 @@ public class ContainerCacheFlusher implements Runnable {
|
||||
this.maxRetryCount =
|
||||
config.getInt(CBlockConfigKeys.DFS_CBLOCK_CACHE_MAX_RETRY_KEY,
|
||||
CBlockConfigKeys.DFS_CBLOCK_CACHE_MAX_RETRY_DEFAULT);
|
||||
this.tracePrefix = getTracePrefix();
|
||||
}
|
||||
|
||||
private void checkExistingLog(String prefixFileName, File dbPath) {
|
||||
@ -436,6 +442,40 @@ public class ContainerCacheFlusher implements Runnable {
|
||||
LOG.info("Exiting flusher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to get the local host IP Address as trace prefix
|
||||
* for creating trace IDs, otherwise uses a random UUID for it.
|
||||
*/
|
||||
private static String getTracePrefix() {
|
||||
String tmp;
|
||||
try {
|
||||
tmp = InetAddress.getLocalHost().getHostAddress();
|
||||
} catch (UnknownHostException ex) {
|
||||
tmp = UUID.randomUUID().toString();
|
||||
LOG.error("Unable to read the host address. Using a GUID for " +
|
||||
"hostname:{} ", tmp, ex);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/**
|
||||
* We create a trace ID to make it easy to debug issues.
|
||||
* A trace ID is in IPAddress:UserName:VolumeName:blockID:second format.
|
||||
*
|
||||
* This will get written down on the data node if we get any failures, so
|
||||
* with this trace ID we can correlate cBlock failures across machines.
|
||||
*
|
||||
* @param blockID - Block ID
|
||||
* @return trace ID
|
||||
*/
|
||||
public String getTraceID(File dbPath, long blockID) {
|
||||
String volumeName = dbPath.getName();
|
||||
String userName = dbPath.getParentFile().getName();
|
||||
// mapping to seconds to make the string smaller.
|
||||
return tracePrefix + ":" + userName + ":" + volumeName
|
||||
+ ":" + blockID + ":" + Time.monotonicNow() / 1000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Keeps a Reference counted DB that we close only when the total Reference
|
||||
* has gone to zero.
|
||||
|
@ -174,9 +174,9 @@ public class AsyncBlockWriter {
|
||||
long startTime = Time.monotonicNow();
|
||||
client = parentCache.getClientManager()
|
||||
.acquireClient(parentCache.getPipeline(block.getBlockID()));
|
||||
// BUG: fix the trace ID.
|
||||
ContainerProtocolCalls.writeSmallFile(client, containerName,
|
||||
Long.toString(block.getBlockID()), block.getData().array(), "");
|
||||
Long.toString(block.getBlockID()), block.getData().array(),
|
||||
parentCache.getTraceID(block.getBlockID()));
|
||||
long endTime = Time.monotonicNow();
|
||||
if (parentCache.isTraceEnabled()) {
|
||||
String datahash = DigestUtils.sha256Hex(block.getData().array());
|
||||
|
@ -27,23 +27,19 @@ import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.scm.XceiverClientManager;
|
||||
import org.apache.hadoop.scm.container.common.helpers.Pipeline;
|
||||
import org.apache.hadoop.cblock.jscsiHelper.CBlockTargetMetrics;
|
||||
import org.apache.hadoop.util.Time;
|
||||
import org.apache.hadoop.utils.LevelDBStore;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.UnknownHostException;
|
||||
import java.nio.file.FileStore;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.apache.hadoop.cblock.CBlockConfigKeys
|
||||
.DFS_CBLOCK_DISK_CACHE_PATH_DEFAULT;
|
||||
@ -74,7 +70,7 @@ public class CBlockLocalCache implements CacheModule {
|
||||
private final LevelDBStore cacheDB;
|
||||
|
||||
/**
|
||||
* Asyncblock writer updates the cacheDB and writes the blocks async to
|
||||
* AsyncBlock writer updates the cacheDB and writes the blocks async to
|
||||
* remote containers.
|
||||
*/
|
||||
private final AsyncBlockWriter blockWriter;
|
||||
@ -85,17 +81,8 @@ public class CBlockLocalCache implements CacheModule {
|
||||
* update the cacheDB.
|
||||
*/
|
||||
private final SyncBlockReader blockReader;
|
||||
/**
|
||||
* We create a trace ID to make it easy to debug issues.
|
||||
* A trace ID is in the following format. IPAddress:VolumeName:blockID:second
|
||||
* <p>
|
||||
* This will get written down on the data node if we get any failures, so
|
||||
* with this trace ID we can correlate cBlock failures across machines.
|
||||
*/
|
||||
private final String userName;
|
||||
private final String volumeName;
|
||||
private final String ipAddressString;
|
||||
private final String tracePrefix;
|
||||
|
||||
/**
|
||||
* From a block ID we are able to get the pipeline by indexing this array.
|
||||
@ -160,8 +147,6 @@ public class CBlockLocalCache implements CacheModule {
|
||||
cacheDB = flusher.getCacheDB(dbPath.toString());
|
||||
this.containerList = containerPipelines.toArray(new
|
||||
Pipeline[containerPipelines.size()]);
|
||||
this.ipAddressString = getHostIP();
|
||||
this.tracePrefix = ipAddressString + ":" + this.volumeName;
|
||||
this.volumeSize = volumeSize;
|
||||
|
||||
blockWriter = new AsyncBlockWriter(conf, this);
|
||||
@ -325,22 +310,6 @@ public class CBlockLocalCache implements CacheModule {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tries to get the local host IP Address for creating trace IDs.
|
||||
*/
|
||||
private String getHostIP() {
|
||||
String tmp;
|
||||
try {
|
||||
tmp = InetAddress.getLocalHost().toString();
|
||||
} catch (UnknownHostException ex) {
|
||||
tmp = UUID.randomUUID().toString();
|
||||
LOG.error("Unable to read the host address. Using a GUID for " +
|
||||
"hostname:{} ", tmp, ex);
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the local cache DB.
|
||||
*
|
||||
@ -397,18 +366,8 @@ public class CBlockLocalCache implements CacheModule {
|
||||
return containerList[containerIdx];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a traceID based in Block ID.
|
||||
* The format is HostIP:VolumeName:BlockID:timeStamp, in case of error this
|
||||
* will be logged on the container side.
|
||||
*
|
||||
* @param blockID - Block ID
|
||||
* @return trace ID
|
||||
*/
|
||||
String getTraceID(long blockID) {
|
||||
// mapping to seconds to make the string smaller.
|
||||
return this.tracePrefix + ":" + blockID + ":"
|
||||
+ Time.monotonicNow() / 1000;
|
||||
return flusher.getTraceID(dbPath, blockID);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user