HDFS-12255. Block Storage: Cblock should generated unique trace ID for the ops. Contributed by Mukul Kumar Singh.
This commit is contained in:
parent
e61530f5d9
commit
6a16d7c7ab
@ -93,9 +93,9 @@ public void run() {
|
|||||||
long endTime = Time.monotonicNow();
|
long endTime = Time.monotonicNow();
|
||||||
Preconditions.checkState(data.length > 0, "Block data is zero length");
|
Preconditions.checkState(data.length > 0, "Block data is zero length");
|
||||||
startTime = Time.monotonicNow();
|
startTime = Time.monotonicNow();
|
||||||
// BUG: fix the trace ID.
|
|
||||||
ContainerProtocolCalls.writeSmallFile(client, containerName,
|
ContainerProtocolCalls.writeSmallFile(client, containerName,
|
||||||
Long.toString(block.getBlockID()), data, "");
|
Long.toString(block.getBlockID()), data,
|
||||||
|
flusher.getTraceID(new File(dbPath), block.getBlockID()));
|
||||||
endTime = Time.monotonicNow();
|
endTime = Time.monotonicNow();
|
||||||
flusher.getTargetMetrics().updateContainerWriteLatency(
|
flusher.getTargetMetrics().updateContainerWriteLatency(
|
||||||
endTime - startTime);
|
endTime - startTime);
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
import org.apache.hadoop.ozone.OzoneConsts;
|
import org.apache.hadoop.ozone.OzoneConsts;
|
||||||
import org.apache.hadoop.scm.XceiverClientManager;
|
import org.apache.hadoop.scm.XceiverClientManager;
|
||||||
import org.apache.hadoop.scm.container.common.helpers.Pipeline;
|
import org.apache.hadoop.scm.container.common.helpers.Pipeline;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hadoop.utils.LevelDBStore;
|
import org.apache.hadoop.utils.LevelDBStore;
|
||||||
import org.iq80.leveldb.Options;
|
import org.iq80.leveldb.Options;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -37,6 +38,8 @@
|
|||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.InetAddress;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.ReadableByteChannel;
|
import java.nio.channels.ReadableByteChannel;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@ -45,6 +48,7 @@
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
@ -103,6 +107,7 @@ public class ContainerCacheFlusher implements Runnable {
|
|||||||
private AtomicBoolean shutdown;
|
private AtomicBoolean shutdown;
|
||||||
private final long levelDBCacheSize;
|
private final long levelDBCacheSize;
|
||||||
private final int maxRetryCount;
|
private final int maxRetryCount;
|
||||||
|
private final String tracePrefix;
|
||||||
|
|
||||||
private final ConcurrentMap<String, FinishCounter> finishCountMap;
|
private final ConcurrentMap<String, FinishCounter> finishCountMap;
|
||||||
|
|
||||||
@ -158,6 +163,7 @@ public ContainerCacheFlusher(Configuration config,
|
|||||||
this.maxRetryCount =
|
this.maxRetryCount =
|
||||||
config.getInt(CBlockConfigKeys.DFS_CBLOCK_CACHE_MAX_RETRY_KEY,
|
config.getInt(CBlockConfigKeys.DFS_CBLOCK_CACHE_MAX_RETRY_KEY,
|
||||||
CBlockConfigKeys.DFS_CBLOCK_CACHE_MAX_RETRY_DEFAULT);
|
CBlockConfigKeys.DFS_CBLOCK_CACHE_MAX_RETRY_DEFAULT);
|
||||||
|
this.tracePrefix = getTracePrefix();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkExistingLog(String prefixFileName, File dbPath) {
|
private void checkExistingLog(String prefixFileName, File dbPath) {
|
||||||
@ -436,6 +442,40 @@ public void run() {
|
|||||||
LOG.info("Exiting flusher");
|
LOG.info("Exiting flusher");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to get the local host IP Address as trace prefix
|
||||||
|
* for creating trace IDs, otherwise uses a random UUID for it.
|
||||||
|
*/
|
||||||
|
private static String getTracePrefix() {
|
||||||
|
String tmp;
|
||||||
|
try {
|
||||||
|
tmp = InetAddress.getLocalHost().getHostAddress();
|
||||||
|
} catch (UnknownHostException ex) {
|
||||||
|
tmp = UUID.randomUUID().toString();
|
||||||
|
LOG.error("Unable to read the host address. Using a GUID for " +
|
||||||
|
"hostname:{} ", tmp, ex);
|
||||||
|
}
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We create a trace ID to make it easy to debug issues.
|
||||||
|
* A trace ID is in IPAddress:UserName:VolumeName:blockID:second format.
|
||||||
|
*
|
||||||
|
* This will get written down on the data node if we get any failures, so
|
||||||
|
* with this trace ID we can correlate cBlock failures across machines.
|
||||||
|
*
|
||||||
|
* @param blockID - Block ID
|
||||||
|
* @return trace ID
|
||||||
|
*/
|
||||||
|
public String getTraceID(File dbPath, long blockID) {
|
||||||
|
String volumeName = dbPath.getName();
|
||||||
|
String userName = dbPath.getParentFile().getName();
|
||||||
|
// mapping to seconds to make the string smaller.
|
||||||
|
return tracePrefix + ":" + userName + ":" + volumeName
|
||||||
|
+ ":" + blockID + ":" + Time.monotonicNow() / 1000;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Keeps a Reference counted DB that we close only when the total Reference
|
* Keeps a Reference counted DB that we close only when the total Reference
|
||||||
* has gone to zero.
|
* has gone to zero.
|
||||||
|
@ -174,9 +174,9 @@ public void writeBlock(LogicalBlock block) throws IOException {
|
|||||||
long startTime = Time.monotonicNow();
|
long startTime = Time.monotonicNow();
|
||||||
client = parentCache.getClientManager()
|
client = parentCache.getClientManager()
|
||||||
.acquireClient(parentCache.getPipeline(block.getBlockID()));
|
.acquireClient(parentCache.getPipeline(block.getBlockID()));
|
||||||
// BUG: fix the trace ID.
|
|
||||||
ContainerProtocolCalls.writeSmallFile(client, containerName,
|
ContainerProtocolCalls.writeSmallFile(client, containerName,
|
||||||
Long.toString(block.getBlockID()), block.getData().array(), "");
|
Long.toString(block.getBlockID()), block.getData().array(),
|
||||||
|
parentCache.getTraceID(block.getBlockID()));
|
||||||
long endTime = Time.monotonicNow();
|
long endTime = Time.monotonicNow();
|
||||||
if (parentCache.isTraceEnabled()) {
|
if (parentCache.isTraceEnabled()) {
|
||||||
String datahash = DigestUtils.sha256Hex(block.getData().array());
|
String datahash = DigestUtils.sha256Hex(block.getData().array());
|
||||||
|
@ -27,23 +27,19 @@
|
|||||||
import org.apache.hadoop.scm.XceiverClientManager;
|
import org.apache.hadoop.scm.XceiverClientManager;
|
||||||
import org.apache.hadoop.scm.container.common.helpers.Pipeline;
|
import org.apache.hadoop.scm.container.common.helpers.Pipeline;
|
||||||
import org.apache.hadoop.cblock.jscsiHelper.CBlockTargetMetrics;
|
import org.apache.hadoop.cblock.jscsiHelper.CBlockTargetMetrics;
|
||||||
import org.apache.hadoop.util.Time;
|
|
||||||
import org.apache.hadoop.utils.LevelDBStore;
|
import org.apache.hadoop.utils.LevelDBStore;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.UnknownHostException;
|
|
||||||
import java.nio.file.FileStore;
|
import java.nio.file.FileStore;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.UUID;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.cblock.CBlockConfigKeys
|
import static org.apache.hadoop.cblock.CBlockConfigKeys
|
||||||
.DFS_CBLOCK_DISK_CACHE_PATH_DEFAULT;
|
.DFS_CBLOCK_DISK_CACHE_PATH_DEFAULT;
|
||||||
@ -74,7 +70,7 @@ public class CBlockLocalCache implements CacheModule {
|
|||||||
private final LevelDBStore cacheDB;
|
private final LevelDBStore cacheDB;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Asyncblock writer updates the cacheDB and writes the blocks async to
|
* AsyncBlock writer updates the cacheDB and writes the blocks async to
|
||||||
* remote containers.
|
* remote containers.
|
||||||
*/
|
*/
|
||||||
private final AsyncBlockWriter blockWriter;
|
private final AsyncBlockWriter blockWriter;
|
||||||
@ -85,17 +81,8 @@ public class CBlockLocalCache implements CacheModule {
|
|||||||
* update the cacheDB.
|
* update the cacheDB.
|
||||||
*/
|
*/
|
||||||
private final SyncBlockReader blockReader;
|
private final SyncBlockReader blockReader;
|
||||||
/**
|
|
||||||
* We create a trace ID to make it easy to debug issues.
|
|
||||||
* A trace ID is in the following format. IPAddress:VolumeName:blockID:second
|
|
||||||
* <p>
|
|
||||||
* This will get written down on the data node if we get any failures, so
|
|
||||||
* with this trace ID we can correlate cBlock failures across machines.
|
|
||||||
*/
|
|
||||||
private final String userName;
|
private final String userName;
|
||||||
private final String volumeName;
|
private final String volumeName;
|
||||||
private final String ipAddressString;
|
|
||||||
private final String tracePrefix;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* From a block ID we are able to get the pipeline by indexing this array.
|
* From a block ID we are able to get the pipeline by indexing this array.
|
||||||
@ -160,8 +147,6 @@ public File getDbPath() {
|
|||||||
cacheDB = flusher.getCacheDB(dbPath.toString());
|
cacheDB = flusher.getCacheDB(dbPath.toString());
|
||||||
this.containerList = containerPipelines.toArray(new
|
this.containerList = containerPipelines.toArray(new
|
||||||
Pipeline[containerPipelines.size()]);
|
Pipeline[containerPipelines.size()]);
|
||||||
this.ipAddressString = getHostIP();
|
|
||||||
this.tracePrefix = ipAddressString + ":" + this.volumeName;
|
|
||||||
this.volumeSize = volumeSize;
|
this.volumeSize = volumeSize;
|
||||||
|
|
||||||
blockWriter = new AsyncBlockWriter(conf, this);
|
blockWriter = new AsyncBlockWriter(conf, this);
|
||||||
@ -325,22 +310,6 @@ public boolean isDirtyCache() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tries to get the local host IP Address for creating trace IDs.
|
|
||||||
*/
|
|
||||||
private String getHostIP() {
|
|
||||||
String tmp;
|
|
||||||
try {
|
|
||||||
tmp = InetAddress.getLocalHost().toString();
|
|
||||||
} catch (UnknownHostException ex) {
|
|
||||||
tmp = UUID.randomUUID().toString();
|
|
||||||
LOG.error("Unable to read the host address. Using a GUID for " +
|
|
||||||
"hostname:{} ", tmp, ex);
|
|
||||||
}
|
|
||||||
return tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the local cache DB.
|
* Returns the local cache DB.
|
||||||
*
|
*
|
||||||
@ -397,18 +366,8 @@ Pipeline getPipeline(long blockId) {
|
|||||||
return containerList[containerIdx];
|
return containerList[containerIdx];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a traceID based in Block ID.
|
|
||||||
* The format is HostIP:VolumeName:BlockID:timeStamp, in case of error this
|
|
||||||
* will be logged on the container side.
|
|
||||||
*
|
|
||||||
* @param blockID - Block ID
|
|
||||||
* @return trace ID
|
|
||||||
*/
|
|
||||||
String getTraceID(long blockID) {
|
String getTraceID(long blockID) {
|
||||||
// mapping to seconds to make the string smaller.
|
return flusher.getTraceID(dbPath, blockID);
|
||||||
return this.tracePrefix + ":" + blockID + ":"
|
|
||||||
+ Time.monotonicNow() / 1000;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user