HDDS-870. Avoid creating block sized buffer in ChunkGroupOutputStream. Contributed by Shashikant Banerjee.

This commit is contained in:
Jitendra Pandey 2018-12-08 09:23:10 -08:00
parent 5e773efd78
commit 1afba83f2c
12 changed files with 505 additions and 385 deletions

View File

@ -290,12 +290,16 @@ private void reconnect(DatanodeDetails dn)
}
@Override
public void watchForCommit(long index, long timeout)
public long watchForCommit(long index, long timeout)
throws InterruptedException, ExecutionException, TimeoutException,
IOException {
// there is no notion of watch for commit index in standalone pipeline
return 0;
};
public long getReplicatedMinCommitIndex() {
return 0;
}
/**
* Returns pipeline Type.
*

View File

@ -18,7 +18,9 @@
package org.apache.hadoop.hdds.scm;
import com.google.common.base.Preconditions;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.ratis.proto.RaftProtos;
import org.apache.ratis.protocol.RaftRetryFailureException;
import org.apache.ratis.retry.RetryPolicy;
@ -42,15 +44,14 @@
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Objects;
import java.util.Collection;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.ConcurrentHashMap;
/**
* An abstract implementation of {@link XceiverClientSpi} using Ratis.
@ -79,6 +80,12 @@ public static XceiverClientRatis newXceiverClientRatis(
private final int maxOutstandingRequests;
private final RetryPolicy retryPolicy;
// Map to track commit index at every server
private final ConcurrentHashMap<String, Long> commitInfoMap;
// create a separate RaftClient for watchForCommit API
private RaftClient watchClient;
/**
* Constructs a client.
*/
@ -89,6 +96,30 @@ private XceiverClientRatis(Pipeline pipeline, RpcType rpcType,
this.rpcType = rpcType;
this.maxOutstandingRequests = maxOutStandingChunks;
this.retryPolicy = retryPolicy;
commitInfoMap = new ConcurrentHashMap<>();
watchClient = null;
}
private void updateCommitInfosMap(
Collection<RaftProtos.CommitInfoProto> commitInfoProtos) {
// if the commitInfo map is empty, just update the commit indexes for each
// of the servers
if (commitInfoMap.isEmpty()) {
commitInfoProtos.forEach(proto -> commitInfoMap
.put(proto.getServer().getAddress(), proto.getCommitIndex()));
// In case the commit is happening 2 way, just update the commitIndex
// for the servers which have been successfully updating the commit
// indexes. This is important because getReplicatedMinCommitIndex()
// should always return the min commit index out of the nodes which have
// been replicating data successfully.
} else {
commitInfoProtos.forEach(proto -> commitInfoMap
.computeIfPresent(proto.getServer().getAddress(),
(address, index) -> {
index = proto.getCommitIndex();
return index;
}));
}
}
/**
@ -125,6 +156,9 @@ public void close() {
if (c != null) {
closeRaftClient(c);
}
if (watchClient != null) {
closeRaftClient(watchClient);
}
}
private void closeRaftClient(RaftClient raftClient) {
@ -148,39 +182,73 @@ private CompletableFuture<RaftClientReply> sendRequestAsync(
getClient().sendAsync(() -> byteString);
}
// gets the minimum log index replicated to all servers
@Override
public void watchForCommit(long index, long timeout)
public long getReplicatedMinCommitIndex() {
OptionalLong minIndex =
commitInfoMap.values().parallelStream().mapToLong(v -> v).min();
return minIndex.isPresent() ? minIndex.getAsLong() : 0;
}
private void getFailedServer(
Collection<RaftProtos.CommitInfoProto> commitInfos) {
for (RaftProtos.CommitInfoProto proto : commitInfos) {
}
}
@Override
public long watchForCommit(long index, long timeout)
throws InterruptedException, ExecutionException, TimeoutException,
IOException {
long commitIndex = getReplicatedMinCommitIndex();
if (commitIndex >= index) {
// return the min commit index till which the log has been replicated to
// all servers
return commitIndex;
}
LOG.debug("commit index : {} watch timeout : {}", index, timeout);
// create a new RaftClient instance for watch request
RaftClient raftClient =
RatisHelper.newRaftClient(rpcType, getPipeline(), retryPolicy);
CompletableFuture<RaftClientReply> replyFuture = raftClient
if (watchClient == null) {
watchClient =
RatisHelper.newRaftClient(rpcType, getPipeline(), retryPolicy);
}
CompletableFuture<RaftClientReply> replyFuture = watchClient
.sendWatchAsync(index, RaftProtos.ReplicationLevel.ALL_COMMITTED);
RaftClientReply reply;
try {
replyFuture.get(timeout, TimeUnit.MILLISECONDS);
reply = replyFuture.get(timeout, TimeUnit.MILLISECONDS);
} catch (TimeoutException toe) {
LOG.warn("3 way commit failed ", toe);
closeRaftClient(raftClient);
closeRaftClient(watchClient);
// generate a new raft client instance again so that next watch request
// does not get blocked for the previous one
// TODO : need to remove the code to create the new RaftClient instance
// here once the watch request bypassing sliding window in Raft Client
// gets fixed.
raftClient =
watchClient =
RatisHelper.newRaftClient(rpcType, getPipeline(), retryPolicy);
raftClient
reply = watchClient
.sendWatchAsync(index, RaftProtos.ReplicationLevel.MAJORITY_COMMITTED)
.get(timeout, TimeUnit.MILLISECONDS);
LOG.info("Could not commit " + index + " to all the nodes."
+ "Committed by majority.");
} finally {
closeRaftClient(raftClient);
Optional<RaftProtos.CommitInfoProto>
proto = reply.getCommitInfos().stream().min(Comparator.comparing(
RaftProtos.CommitInfoProto :: getCommitIndex));
Preconditions.checkState(proto.isPresent());
String address = proto.get().getServer().getAddress();
// since 3 way commit has failed, the updated map from now on will
// only store entries for those datanodes which have had successful
// replication.
commitInfoMap.remove(address);
LOG.info(
"Could not commit " + index + " to all the nodes. Server " + address
+ " has failed" + "Committed by majority.");
}
return index;
}
/**
* Sends a given command to server gets a waitable future back.
*
@ -193,8 +261,6 @@ public XceiverClientAsyncReply sendCommandAsync(
XceiverClientAsyncReply asyncReply = new XceiverClientAsyncReply(null);
CompletableFuture<RaftClientReply> raftClientReply =
sendRequestAsync(request);
Collection<XceiverClientAsyncReply.CommitInfo> commitInfos =
new ArrayList<>();
CompletableFuture<ContainerCommandResponseProto> containerCommandResponse =
raftClientReply.whenComplete((reply, e) -> LOG.debug(
"received reply {} for request: cmdType={} containerID={}"
@ -212,14 +278,10 @@ public XceiverClientAsyncReply sendCommandAsync(
ContainerCommandResponseProto response =
ContainerCommandResponseProto
.parseFrom(reply.getMessage().getContent());
reply.getCommitInfos().forEach(e -> {
XceiverClientAsyncReply.CommitInfo commitInfo =
new XceiverClientAsyncReply.CommitInfo(
e.getServer().getAddress(), e.getCommitIndex());
commitInfos.add(commitInfo);
asyncReply.setCommitInfos(commitInfos);
if (response.getResult() == ContainerProtos.Result.SUCCESS) {
updateCommitInfosMap(reply.getCommitInfos());
asyncReply.setLogIndex(reply.getLogIndex());
});
}
return response;
} catch (InvalidProtocolBufferException e) {
throw new CompletionException(e);

View File

@ -37,15 +37,13 @@
import java.io.IOException;
import java.io.OutputStream;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.util.UUID;
import java.util.List;
import java.util.ArrayList;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.*;
import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls
.putBlockAsync;
import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls
@ -84,25 +82,30 @@ public class ChunkOutputStream extends OutputStream {
private final long streamBufferFlushSize;
private final long streamBufferMaxSize;
private final long watchTimeout;
private ByteBuffer buffer;
private List<ByteBuffer> bufferList;
// The IOException will be set by response handling thread in case there is an
// exception received in the response. If the exception is set, the next
// request will fail upfront.
private IOException ioException;
private ExecutorService responseExecutor;
// position of the buffer where the last flush was attempted
private int lastFlushPos;
// the effective length of data flushed so far
private long totalDataFlushedLength;
// position of the buffer till which the flush was successfully
// acknowledged by all nodes in pipeline
private int lastSuccessfulFlushIndex;
// effective data write attempted so far for the block
private long writtenDataLength;
// total data which has been successfully flushed and acknowledged
// by all servers
private long totalAckDataLength;
// list to hold up all putBlock futures
private List<CompletableFuture<ContainerProtos.ContainerCommandResponseProto>>
futureList;
// list maintaining commit indexes for putBlocks
private List<Long> commitIndexList;
// map containing mapping for putBlock logIndex to to flushedDataLength Map.
private ConcurrentHashMap<Long, Long> commitIndex2flushedDataMap;
private int currentBufferIndex;
/**
* Creates a new ChunkOutputStream.
@ -113,12 +116,17 @@ public class ChunkOutputStream extends OutputStream {
* @param xceiverClient client to perform container calls
* @param traceID container protocol call args
* @param chunkSize chunk size
* @param bufferList list of byte buffers
* @param streamBufferFlushSize flush size
* @param streamBufferMaxSize max size of the currentBuffer
* @param watchTimeout watch timeout
* @param checksum checksum
*/
public ChunkOutputStream(BlockID blockID, String key,
XceiverClientManager xceiverClientManager, XceiverClientSpi xceiverClient,
String traceID, int chunkSize, long streamBufferFlushSize,
long streamBufferMaxSize, long watchTimeout, ByteBuffer buffer,
Checksum checksum) {
long streamBufferMaxSize, long watchTimeout,
List<ByteBuffer> bufferList, Checksum checksum) {
this.blockID = blockID;
this.key = key;
this.traceID = traceID;
@ -135,24 +143,36 @@ public ChunkOutputStream(BlockID blockID, String key,
this.streamBufferFlushSize = streamBufferFlushSize;
this.streamBufferMaxSize = streamBufferMaxSize;
this.watchTimeout = watchTimeout;
this.buffer = buffer;
this.ioException = null;
this.bufferList = bufferList;
this.checksum = checksum;
// A single thread executor handle the responses of async requests
responseExecutor = Executors.newSingleThreadExecutor();
commitIndexList = new ArrayList<>();
lastSuccessfulFlushIndex = 0;
commitIndex2flushedDataMap = new ConcurrentHashMap<>();
totalAckDataLength = 0;
futureList = new ArrayList<>();
lastFlushPos = 0;
totalDataFlushedLength = 0;
currentBufferIndex = 0;
writtenDataLength = 0;
}
public BlockID getBlockID() {
return blockID;
}
public int getLastSuccessfulFlushIndex() {
return lastSuccessfulFlushIndex;
public long getTotalSuccessfulFlushedData() {
return totalAckDataLength;
}
public long getWrittenDataLength() {
return writtenDataLength;
}
private long computeBufferData() {
int dataLength =
bufferList.stream().mapToInt(Buffer::position).sum();
Preconditions.checkState(dataLength <= streamBufferMaxSize);
return dataLength;
}
@ -176,139 +196,172 @@ public void write(byte[] b, int off, int len) throws IOException {
if (len == 0) {
return;
}
checkOpen();
while (len > 0) {
checkOpen();
int writeLen;
writeLen = Math.min(chunkSize - buffer.position() % chunkSize, len);
buffer.put(b, off, writeLen);
if (buffer.position() % chunkSize == 0) {
int pos = buffer.position() - chunkSize;
int limit = buffer.position();
allocateBuffer();
ByteBuffer currentBuffer = getCurrentBuffer();
writeLen =
Math.min(chunkSize - currentBuffer.position() % chunkSize, len);
currentBuffer.put(b, off, writeLen);
if (currentBuffer.position() % chunkSize == 0) {
int pos = currentBuffer.position() - chunkSize;
int limit = currentBuffer.position();
writeChunk(pos, limit);
}
off += writeLen;
len -= writeLen;
if (buffer.position() >= streamBufferFlushSize
&& buffer.position() % streamBufferFlushSize == 0) {
lastFlushPos = buffer.position();
futureList.add(handlePartialFlush());
writtenDataLength += writeLen;
if (currentBuffer.position() == streamBufferFlushSize) {
totalDataFlushedLength += streamBufferFlushSize;
handlePartialFlush();
}
if (buffer.position() >= streamBufferMaxSize
&& buffer.position() % streamBufferMaxSize == 0) {
long bufferedData = computeBufferData();
// Data in the bufferList can not exceed streamBufferMaxSize
if (bufferedData == streamBufferMaxSize) {
handleFullBuffer();
}
}
}
private ByteBuffer getCurrentBuffer() {
ByteBuffer buffer = bufferList.get(currentBufferIndex);
if (!buffer.hasRemaining()) {
currentBufferIndex =
currentBufferIndex < getMaxNumBuffers() - 1 ? ++currentBufferIndex :
0;
}
return bufferList.get(currentBufferIndex);
}
private int getMaxNumBuffers() {
return (int)(streamBufferMaxSize/streamBufferFlushSize);
}
private void allocateBuffer() {
for (int i = bufferList.size(); i < getMaxNumBuffers(); i++) {
bufferList.add(ByteBuffer.allocate((int)streamBufferFlushSize));
}
}
/**
* Will be called on the retryPath in case closedContainerException/
* TimeoutException.
* @param len length of data to write
* @throws IOException if error occured
* @throws IOException if error occurred
*/
// In this case, the data is already cached in the buffer.
public void writeOnRetry(int len) throws IOException {
// In this case, the data is already cached in the currentBuffer.
public void writeOnRetry(long len) throws IOException {
if (len == 0) {
return;
}
int off = 0;
checkOpen();
int pos = off;
while (len > 0) {
int writeLen;
long writeLen;
writeLen = Math.min(chunkSize, len);
if (writeLen == chunkSize) {
int pos = off;
int limit = pos + chunkSize;
writeChunk(pos, limit);
}
off += writeLen;
len -= writeLen;
writtenDataLength += writeLen;
if (off % streamBufferFlushSize == 0) {
lastFlushPos = off;
futureList.add(handlePartialFlush());
// reset the position to zero as now we wll readng thhe next buffer in
// the list
pos = 0;
totalDataFlushedLength += streamBufferFlushSize;
handlePartialFlush();
}
if (off % streamBufferMaxSize == 0) {
if (computeBufferData() % streamBufferMaxSize == 0) {
handleFullBuffer();
}
}
}
private void handleResponse(
ContainerProtos.ContainerCommandResponseProto response,
XceiverClientAsyncReply asyncReply) {
validateResponse(response);
discardBuffer(asyncReply);
}
private void discardBuffer(XceiverClientAsyncReply asyncReply) {
if (!commitIndexList.isEmpty()) {
long index = commitIndexList.get(0);
if (checkIfBufferDiscardRequired(asyncReply, index)) {
updateFlushIndex();
}
}
}
/**
* just update the lastSuccessfulFlushIndex. Since we have allocated
* the buffer more than the streamBufferMaxSize, we can keep on writing
* to the buffer. In case of failure, we will read the data starting from
* lastSuccessfulFlushIndex.
* just update the totalAckDataLength. Since we have allocated
* the currentBuffer more than the streamBufferMaxSize, we can keep on writing
* to the currentBuffer. In case of failure, we will read the data starting
* from totalAckDataLength.
*/
private void updateFlushIndex() {
lastSuccessfulFlushIndex += streamBufferFlushSize;
LOG.debug("Discarding buffer till pos " + lastSuccessfulFlushIndex);
if (!commitIndexList.isEmpty()) {
commitIndexList.remove(0);
private void updateFlushIndex(long index) {
if (!commitIndex2flushedDataMap.isEmpty()) {
Preconditions.checkState(commitIndex2flushedDataMap.containsKey(index));
totalAckDataLength = commitIndex2flushedDataMap.remove(index);
LOG.debug("Total data successfully replicated: " + totalAckDataLength);
futureList.remove(0);
}
}
/**
* Check if the last commitIndex stored at the beginning of the
* commitIndexList is less than equal to current commitInfo indexes.
* If its true, the buffer has been successfully flushed till the
* last position where flush happened.
*/
private boolean checkIfBufferDiscardRequired(
XceiverClientAsyncReply asyncReply, long commitIndex) {
if (asyncReply.getCommitInfos() != null) {
for (XceiverClientAsyncReply.CommitInfo info : asyncReply
.getCommitInfos()) {
if (info.getCommitIndex() < commitIndex) {
return false;
}
// Flush has been committed to required servers successful.
// just swap the bufferList head and tail after clearing.
ByteBuffer currentBuffer = bufferList.remove(0);
currentBuffer.clear();
if (currentBufferIndex != 0) {
currentBufferIndex--;
}
bufferList.add(currentBuffer);
}
return true;
}
/**
* This is a blocking call.It will wait for the flush till the commit index
* at the head of the commitIndexList gets replicated to all or majority.
* This is a blocking call. It will wait for the flush till the commit index
* at the head of the commitIndex2flushedDataMap gets replicated to all or
* majority.
* @throws IOException
*/
private void handleFullBuffer() throws IOException {
if (!commitIndexList.isEmpty()) {
watchForCommit(commitIndexList.get(0));
try {
checkOpen();
if (!futureList.isEmpty()) {
waitOnFlushFutures();
}
} catch (InterruptedException | ExecutionException e) {
adjustBuffersOnException();
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
}
if (!commitIndex2flushedDataMap.isEmpty()) {
watchForCommit(
commitIndex2flushedDataMap.keySet().stream().mapToLong(v -> v)
.min().getAsLong());
}
}
private void adjustBuffers(long commitIndex) {
commitIndex2flushedDataMap.keySet().stream().forEach(index -> {
if (index <= commitIndex) {
updateFlushIndex(index);
} else {
return;
}
});
}
// It may happen that once the exception is encountered , we still might
// have successfully flushed up to a certain index. Make sure the buffers
// only contain data which have not been sufficiently replicated
private void adjustBuffersOnException() {
adjustBuffers(xceiverClient.getReplicatedMinCommitIndex());
}
/**
* calls watchForCommit API of the Ratis Client. For Standalone client,
* it is a no op.
* @param commitIndex log index to watch for
* @return minimum commit index replicated to all nodes
* @throws IOException IOException in case watch gets timed out
*/
private void watchForCommit(long commitIndex) throws IOException {
checkOpen();
Preconditions.checkState(!commitIndexList.isEmpty());
Preconditions.checkState(!commitIndex2flushedDataMap.isEmpty());
try {
xceiverClient.watchForCommit(commitIndex, watchTimeout);
long index =
xceiverClient.watchForCommit(commitIndex, watchTimeout);
adjustBuffers(index);
} catch (TimeoutException | InterruptedException | ExecutionException e) {
LOG.warn("watchForCommit failed for index " + commitIndex, e);
adjustBuffersOnException();
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
}
@ -317,68 +370,79 @@ private void watchForCommit(long commitIndex) throws IOException {
private CompletableFuture<ContainerProtos.
ContainerCommandResponseProto> handlePartialFlush()
throws IOException {
checkOpen();
long flushPos = totalDataFlushedLength;
String requestId =
traceID + ContainerProtos.Type.PutBlock + chunkIndex + blockID;
CompletableFuture<ContainerProtos.
ContainerCommandResponseProto> flushFuture;
try {
XceiverClientAsyncReply asyncReply =
putBlockAsync(xceiverClient, containerBlockData.build(), requestId);
CompletableFuture<ContainerProtos.ContainerCommandResponseProto> future =
asyncReply.getResponse();
return future.thenApplyAsync(e -> {
handleResponse(e, asyncReply);
flushFuture = future.thenApplyAsync(e -> {
try {
validateResponse(e);
} catch (IOException sce) {
future.completeExceptionally(sce);
return e;
}
// if the ioException is not set, putBlock is successful
if (ioException == null) {
LOG.debug(
"Adding index " + asyncReply.getLogIndex() + " commitList size "
+ commitIndexList.size());
"Adding index " + asyncReply.getLogIndex() + " commitMap size "
+ commitIndex2flushedDataMap.size());
BlockID responseBlockID = BlockID.getFromProtobuf(
e.getPutBlock().getCommittedBlockLength().getBlockID());
Preconditions.checkState(blockID.getContainerBlockID()
.equals(responseBlockID.getContainerBlockID()));
// updates the bcsId of the block
blockID = responseBlockID;
long index = asyncReply.getLogIndex();
// for standalone protocol, logIndex will always be 0.
if (index != 0) {
commitIndexList.add(index);
} else {
updateFlushIndex();
}
commitIndex2flushedDataMap.put(asyncReply.getLogIndex(), flushPos);
}
return e;
}, responseExecutor);
}, responseExecutor).exceptionally(e -> {
LOG.debug(
"putBlock failed for blockID " + blockID + " with exception " + e
.getLocalizedMessage());
CompletionException ce = new CompletionException(e);
setIoException(ce);
throw ce;
});
} catch (IOException | InterruptedException | ExecutionException e) {
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
}
futureList.add(flushFuture);
return flushFuture;
}
@Override
public void flush() throws IOException {
if (xceiverClientManager != null && xceiverClient != null
&& buffer != null) {
&& bufferList != null) {
checkOpen();
if (buffer.position() > 0 && lastSuccessfulFlushIndex != buffer
.position()) {
int bufferSize = bufferList.size();
if (bufferSize > 0) {
try {
// flush the last chunk data residing on the buffer
if (buffer.position() % chunkSize > 0) {
int pos = buffer.position() - (buffer.position() % chunkSize);
writeChunk(pos, buffer.position());
}
if (lastFlushPos != buffer.position()) {
lastFlushPos = buffer.position();
// flush the last chunk data residing on the currentBuffer
if (totalDataFlushedLength < writtenDataLength) {
ByteBuffer currentBuffer = getCurrentBuffer();
int pos = currentBuffer.position() - (currentBuffer.position()
% chunkSize);
int limit = currentBuffer.position() - pos;
writeChunk(pos, currentBuffer.position());
totalDataFlushedLength += limit;
handlePartialFlush();
}
CompletableFuture<Void> combinedFuture = CompletableFuture.allOf(
futureList.toArray(new CompletableFuture[futureList.size()]));
combinedFuture.get();
waitOnFlushFutures();
// just check again if the exception is hit while waiting for the
// futures to ensure flush has indeed succeeded
checkOpen();
} catch (InterruptedException | ExecutionException e) {
adjustBuffersOnException();
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
}
@ -388,11 +452,11 @@ public void flush() throws IOException {
private void writeChunk(int pos, int limit) throws IOException {
// Please note : We are not flipping the slice when we write since
// the slices are pointing the buffer start and end as needed for
// the slices are pointing the currentBuffer start and end as needed for
// the chunk write. Also please note, Duplicate does not create a
// copy of data, it only creates metadata that points to the data
// stream.
ByteBuffer chunk = buffer.duplicate();
ByteBuffer chunk = bufferList.get(currentBufferIndex).duplicate();
chunk.position(pos);
chunk.limit(limit);
writeChunkToContainer(chunk);
@ -401,49 +465,78 @@ private void writeChunk(int pos, int limit) throws IOException {
@Override
public void close() throws IOException {
if (xceiverClientManager != null && xceiverClient != null
&& buffer != null) {
try {
if (buffer.position() > lastFlushPos) {
int pos = buffer.position() - (buffer.position() % chunkSize);
writeChunk(pos, buffer.position());
futureList.add(handlePartialFlush());
&& bufferList != null) {
int bufferSize = bufferList.size();
if (bufferSize > 0) {
try {
// flush the last chunk data residing on the currentBuffer
if (totalDataFlushedLength < writtenDataLength) {
ByteBuffer currentBuffer = getCurrentBuffer();
int pos = currentBuffer.position() - (currentBuffer.position()
% chunkSize);
int limit = currentBuffer.position() - pos;
writeChunk(pos, currentBuffer.position());
totalDataFlushedLength += limit;
handlePartialFlush();
}
waitOnFlushFutures();
// irrespective of whether the commitIndex2flushedDataMap is empty
// or not, ensure there is no exception set
checkOpen();
if (!commitIndex2flushedDataMap.isEmpty()) {
// wait for the last commit index in the commitIndex2flushedDataMap
// to get committed to all or majority of nodes in case timeout
// happens.
long lastIndex =
commitIndex2flushedDataMap.keySet().stream()
.mapToLong(v -> v).max().getAsLong();
LOG.debug(
"waiting for last flush Index " + lastIndex + " to catch up");
watchForCommit(lastIndex);
}
} catch (InterruptedException | ExecutionException e) {
adjustBuffersOnException();
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
} finally {
cleanup();
}
CompletableFuture<Void> combinedFuture = CompletableFuture.allOf(
futureList.toArray(new CompletableFuture[futureList.size()]));
// wait for all the transactions to complete
combinedFuture.get();
// irrespective of whether the commitIndexList is empty or not,
// ensure there is no exception set(For Standalone Protocol)
checkOpen();
if (!commitIndexList.isEmpty()) {
// wait for the last commit index in the commitIndexList to get
// committed to all or majority of nodes in case timeout happens.
long lastIndex = commitIndexList.get(commitIndexList.size() - 1);
LOG.debug(
"waiting for last flush Index " + lastIndex + " to catch up");
watchForCommit(lastIndex);
updateFlushIndex();
}
} catch (InterruptedException | ExecutionException e) {
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
} finally {
cleanup();
}
// clear the currentBuffer
bufferList.stream().forEach(ByteBuffer::clear);
}
// clear the buffer
buffer.clear();
}
private void waitOnFlushFutures()
throws InterruptedException, ExecutionException {
CompletableFuture<Void> combinedFuture = CompletableFuture
.allOf(futureList.toArray(new CompletableFuture[futureList.size()]));
// wait for all the transactions to complete
combinedFuture.get();
}
private void validateResponse(
ContainerProtos.ContainerCommandResponseProto responseProto) {
ContainerProtos.ContainerCommandResponseProto responseProto)
throws IOException {
try {
// if the ioException is already set, it means a prev request has failed
// just throw the exception. The current operation will fail with the
// original error
if (ioException != null) {
throw ioException;
}
ContainerProtocolCalls.validateContainerResponse(responseProto);
} catch (StorageContainerException sce) {
ioException = new IOException(
"Unexpected Storage Container Exception: " + sce.toString(), sce);
LOG.error("Unexpected Storage Container Exception: ", sce);
setIoException(sce);
throw sce;
}
}
private void setIoException(Exception e) {
if (ioException != null) {
ioException = new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
}
}
@ -457,7 +550,10 @@ public void cleanup() {
futureList.clear();
}
futureList = null;
commitIndexList = null;
if (commitIndex2flushedDataMap != null) {
commitIndex2flushedDataMap.clear();
}
commitIndex2flushedDataMap = null;
responseExecutor.shutdown();
}
@ -471,6 +567,7 @@ private void checkOpen() throws IOException {
if (xceiverClient == null) {
throw new IOException("ChunkOutputStream has been closed.");
} else if (ioException != null) {
adjustBuffersOnException();
throw ioException;
}
}
@ -504,16 +601,27 @@ private void writeChunkToContainer(ByteBuffer chunk) throws IOException {
CompletableFuture<ContainerProtos.ContainerCommandResponseProto> future =
asyncReply.getResponse();
future.thenApplyAsync(e -> {
handleResponse(e, asyncReply);
try {
validateResponse(e);
} catch (IOException sce) {
future.completeExceptionally(sce);
}
return e;
}, responseExecutor);
}, responseExecutor).exceptionally(e -> {
LOG.debug(
"writing chunk failed " + chunkInfo.getChunkName() + " blockID "
+ blockID + " with exception " + e.getLocalizedMessage());
CompletionException ce = new CompletionException(e);
setIoException(ce);
throw ce;
});
} catch (IOException | InterruptedException | ExecutionException e) {
throw new IOException(
"Unexpected Storage Container Exception: " + e.toString(), e);
}
LOG.debug(
"writing chunk " + chunkInfo.getChunkName() + " blockID " + blockID
+ " length " + chunk.remaining());
+ " length " + effectiveChunkSize);
containerBlockData.addChunks(chunkInfo);
}
}

View File

@ -21,8 +21,6 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
.ContainerCommandResponseProto;
import java.util.Collection;
import java.util.concurrent.CompletableFuture;
/**
@ -32,49 +30,13 @@ public class XceiverClientAsyncReply {
private CompletableFuture<ContainerCommandResponseProto> response;
private Long logIndex;
private Collection<CommitInfo> commitInfos;
public XceiverClientAsyncReply(
CompletableFuture<ContainerCommandResponseProto> response) {
this(response, 0, null);
}
public XceiverClientAsyncReply(
CompletableFuture<ContainerCommandResponseProto> response, long index,
Collection<CommitInfo> commitInfos) {
this.commitInfos = commitInfos;
this.logIndex = index;
this.logIndex = (long)0;
this.response = response;
}
/**
* A class having details about latest commitIndex for each server in the
* Ratis pipeline. For Standalone pipeline, commitInfo will be null.
*/
public static class CommitInfo {
private final String server;
private final Long commitIndex;
public CommitInfo(String server, long commitIndex) {
this.server = server;
this.commitIndex = commitIndex;
}
public String getServer() {
return server;
}
public long getCommitIndex() {
return commitIndex;
}
}
public Collection<CommitInfo> getCommitInfos() {
return commitInfos;
}
public CompletableFuture<ContainerCommandResponseProto> getResponse() {
return response;
}
@ -83,10 +45,6 @@ public long getLogIndex() {
return logIndex;
}
public void setCommitInfos(Collection<CommitInfo> commitInfos) {
this.commitInfos = commitInfos;
}
public void setLogIndex(Long logIndex) {
this.logIndex = logIndex;
}

View File

@ -125,7 +125,24 @@ public ContainerCommandResponseProto sendCommand(
*/
public abstract HddsProtos.ReplicationType getPipelineType();
public abstract void watchForCommit(long index, long timeout)
/**
* Check if an specfic commitIndex is replicated to majority/all servers.
* @param index index to watch for
* @param timeout timeout provided for the watch ipeartion to complete
* @return the min commit index replicated to all or majority servers
* in case of a failure
* @throws InterruptedException
* @throws ExecutionException
* @throws TimeoutException
* @throws IOException
*/
public abstract long watchForCommit(long index, long timeout)
throws InterruptedException, ExecutionException, TimeoutException,
IOException;
/**
* returns the min commit index replicated to all servers.
* @return min commit index replicated to all servers.
*/
public abstract long getReplicatedMinCommitIndex();
}

View File

@ -189,7 +189,6 @@ public static XceiverClientAsyncReply putBlockAsync(
.setContainerID(containerBlockData.getBlockID().getContainerID())
.setTraceID(traceID).setDatanodeUuid(id)
.setPutBlock(createBlockRequest).build();
xceiverClient.sendCommand(request);
return xceiverClient.sendCommandAsync(request);
}

View File

@ -365,8 +365,8 @@
<name>ozone.client.stream.buffer.flush.size</name>
<value>64MB</value>
<tag>OZONE, CLIENT</tag>
<description>Size in mb which determines at what buffer position , a partial
flush will be initiated during write. It should be ideally a mutiple
<description>Size which determines at what buffer position , a partial
flush will be initiated during write. It should be ideally a multiple
of chunkSize.
</description>
</property>
@ -374,8 +374,8 @@
<name>ozone.client.stream.buffer.max.size</name>
<value>128MB</value>
<tag>OZONE, CLIENT</tag>
<description>Size in mb which determines at what buffer position ,
write call be blocked till acknowledgement of the fisrt partial flush
<description>Size which determines at what buffer position,
write call be blocked till acknowledgement of the first partial flush
happens by all servers.
</description>
</property>

View File

@ -21,7 +21,7 @@
import org.apache.hadoop.classification.InterfaceStability;
/**
* DispatcherContext class holds transport protocol specfic context info
* DispatcherContext class holds transport protocol specific context info
* required for execution of container commands over the container dispatcher.
*/
@InterfaceAudience.Private
@ -121,7 +121,7 @@ public Builder setLogIndex(long logIndex) {
}
/**
* Builds and returns DatanodeDetails instance.
* Builds and returns DispatcherContext instance.
*
* @return DispatcherContext
*/

View File

@ -83,8 +83,8 @@ public class ChunkGroupOutputStream extends OutputStream {
private final long streamBufferMaxSize;
private final long watchTimeout;
private final long blockSize;
private ByteBuffer buffer;
private final Checksum checksum;
private List<ByteBuffer> bufferList;
/**
* A constructor for testing purpose only.
*/
@ -101,7 +101,9 @@ public ChunkGroupOutputStream() {
closed = false;
streamBufferFlushSize = 0;
streamBufferMaxSize = 0;
buffer = ByteBuffer.allocate(1);
bufferList = new ArrayList<>(1);
ByteBuffer buffer = ByteBuffer.allocate(1);
bufferList.add(buffer);
watchTimeout = 0;
blockSize = 0;
this.checksum = new Checksum();
@ -177,15 +179,7 @@ public ChunkGroupOutputStream(OpenKeySession handler,
Preconditions.checkState(streamBufferFlushSize % chunkSize == 0);
Preconditions.checkState(streamBufferMaxSize % streamBufferFlushSize == 0);
Preconditions.checkState(blockSize % streamBufferMaxSize == 0);
// This byteBuffer will be used to cache data until all the blockCommits
// (putBlock) gets replicated to all/majority servers. The idea here is to
// allocate the buffer of size blockSize so that as and when a chunk is
// is replicated to all servers, as a part of discarding the buffer, we
// don't necessarily need to run compaction(buffer.compact() on the buffer
// to actually discard the acknowledged data. Compaction is inefficient so
// it would be a better choice to avoid compaction on the happy I/O path.
this.buffer = ByteBuffer.allocate((int) blockSize);
this.bufferList = new ArrayList<>();
}
/**
@ -222,12 +216,7 @@ private void addKeyLocationInfo(OmKeyLocationInfo subKeyInfo)
streamEntries.add(new ChunkOutputStreamEntry(subKeyInfo.getBlockID(),
keyArgs.getKeyName(), xceiverClientManager, xceiverClient, requestID,
chunkSize, subKeyInfo.getLength(), streamBufferFlushSize,
streamBufferMaxSize, watchTimeout, buffer, checksum));
}
@VisibleForTesting
public long getByteOffset() {
return getKeyLength();
streamBufferMaxSize, watchTimeout, bufferList, checksum));
}
@ -254,11 +243,6 @@ public void write(int b) throws IOException {
public void write(byte[] b, int off, int len)
throws IOException {
checkNotClosed();
handleWrite(b, off, len, false, buffer.position());
}
private void handleWrite(byte[] b, int off, int len, boolean retry,
int pos) throws IOException {
if (b == null) {
throw new NullPointerException();
}
@ -269,8 +253,17 @@ private void handleWrite(byte[] b, int off, int len, boolean retry,
if (len == 0) {
return;
}
handleWrite(b, off, len, false);
}
private long computeBufferData() {
return bufferList.stream().mapToInt(value -> value.position())
.sum();
}
private void handleWrite(byte[] b, int off, long len, boolean retry)
throws IOException {
int succeededAllocates = 0;
int initialPos;
while (len > 0) {
if (streamEntries.size() <= currentStreamIndex) {
Preconditions.checkNotNull(omClient);
@ -289,8 +282,12 @@ private void handleWrite(byte[] b, int off, int len, boolean retry,
// still do a sanity check.
Preconditions.checkArgument(currentStreamIndex < streamEntries.size());
ChunkOutputStreamEntry current = streamEntries.get(currentStreamIndex);
int writeLen = Math.min(len, (int) current.getRemaining());
initialPos = pos < buffer.position() ? pos : buffer.position();
// length(len) will be in int range if the call is happening through
// write API of chunkOutputStream. Length can be in long range if it comes
// via Exception path.
int writeLen = Math.min((int)len, (int) current.getRemaining());
long currentPos = current.getWrittenDataLength();
try {
if (retry) {
current.writeOnRetry(len);
@ -299,9 +296,10 @@ private void handleWrite(byte[] b, int off, int len, boolean retry,
}
} catch (IOException ioe) {
if (checkIfContainerIsClosed(ioe) || checkIfTimeoutException(ioe)) {
// for the current iteration, current pos - initialPos gives the
// for the current iteration, totalDataWritten - currentPos gives the
// amount of data already written to the buffer
writeLen = buffer.position() - initialPos;
writeLen = (int) (current.getWrittenDataLength() - currentPos);
LOG.debug("writeLen {}, total len {}", writeLen, len);
handleException(current, currentStreamIndex);
} else {
throw ioe;
@ -366,30 +364,20 @@ private void removeEmptyBlocks() {
*/
private void handleException(ChunkOutputStreamEntry streamEntry,
int streamIndex) throws IOException {
int lastSuccessfulFlushIndex = streamEntry.getLastSuccessfulFlushIndex();
int currentPos = buffer.position();
// In case of a failure, read the data from the position till the last
// acknowledgement happened.
if (lastSuccessfulFlushIndex > 0) {
buffer.position(lastSuccessfulFlushIndex);
buffer.limit(currentPos);
buffer.compact();
}
if (buffer.position() > 0) {
//set the correct length for the current stream
streamEntry.currentPosition = lastSuccessfulFlushIndex;
long totalSuccessfulFlushedData =
streamEntry.getTotalSuccessfulFlushedData();
//set the correct length for the current stream
streamEntry.currentPosition = totalSuccessfulFlushedData;
long bufferedDataLen = computeBufferData();
// just clean up the current stream.
streamEntry.cleanup();
if (bufferedDataLen > 0) {
// If the data is still cached in the underlying stream, we need to
// allocate new block and write this data in the datanode.
currentStreamIndex += 1;
handleWrite(buffer.array(), 0, buffer.position(), true,
lastSuccessfulFlushIndex);
handleWrite(null, 0, bufferedDataLen, true);
}
// just clean up the current stream.
streamEntry.cleanup();
if (lastSuccessfulFlushIndex == 0) {
if (totalSuccessfulFlushedData == 0) {
streamEntries.remove(streamIndex);
currentStreamIndex -= 1;
}
@ -433,7 +421,7 @@ private boolean checkIfTimeoutException(IOException ioe) {
}
private long getKeyLength() {
return streamEntries.parallelStream().mapToLong(e -> e.currentPosition)
return streamEntries.stream().mapToLong(e -> e.currentPosition)
.sum();
}
@ -517,10 +505,10 @@ public void close() throws IOException {
} catch (IOException ioe) {
throw ioe;
} finally {
if (buffer != null) {
buffer.clear();
if (bufferList != null) {
bufferList.stream().forEach(e -> e.clear());
}
buffer = null;
bufferList = null;
}
}
@ -633,13 +621,13 @@ private static class ChunkOutputStreamEntry extends OutputStream {
private final long streamBufferFlushSize;
private final long streamBufferMaxSize;
private final long watchTimeout;
private ByteBuffer buffer;
private List<ByteBuffer> bufferList;
ChunkOutputStreamEntry(BlockID blockID, String key,
XceiverClientManager xceiverClientManager,
XceiverClientSpi xceiverClient, String requestId, int chunkSize,
long length, long streamBufferFlushSize, long streamBufferMaxSize,
long watchTimeout, ByteBuffer buffer, Checksum checksum) {
long watchTimeout, List<ByteBuffer> bufferList, Checksum checksum) {
this.outputStream = null;
this.blockID = blockID;
this.key = key;
@ -653,8 +641,8 @@ private static class ChunkOutputStreamEntry extends OutputStream {
this.streamBufferFlushSize = streamBufferFlushSize;
this.streamBufferMaxSize = streamBufferMaxSize;
this.watchTimeout = watchTimeout;
this.buffer = buffer;
this.checksum = checksum;
this.bufferList = bufferList;
}
/**
@ -676,7 +664,7 @@ private static class ChunkOutputStreamEntry extends OutputStream {
this.currentPosition = 0;
streamBufferFlushSize = 0;
streamBufferMaxSize = 0;
buffer = null;
bufferList = null;
watchTimeout = 0;
this.checksum = checksum;
}
@ -694,7 +682,7 @@ private void checkStream() {
this.outputStream =
new ChunkOutputStream(blockID, key, xceiverClientManager,
xceiverClient, requestId, chunkSize, streamBufferFlushSize,
streamBufferMaxSize, watchTimeout, buffer, checksum);
streamBufferMaxSize, watchTimeout, bufferList, checksum);
}
}
@ -731,11 +719,24 @@ public void close() throws IOException {
}
}
int getLastSuccessfulFlushIndex() throws IOException {
long getTotalSuccessfulFlushedData() throws IOException {
if (this.outputStream instanceof ChunkOutputStream) {
ChunkOutputStream out = (ChunkOutputStream) this.outputStream;
blockID = out.getBlockID();
return out.getLastSuccessfulFlushIndex();
return out.getTotalSuccessfulFlushedData();
} else if (outputStream == null) {
// For a pre allocated block for which no write has been initiated,
// the OutputStream will be null here.
// In such cases, the default blockCommitSequenceId will be 0
return 0;
}
throw new IOException("Invalid Output Stream for Key: " + key);
}
long getWrittenDataLength() throws IOException {
if (this.outputStream instanceof ChunkOutputStream) {
ChunkOutputStream out = (ChunkOutputStream) this.outputStream;
return out.getWrittenDataLength();
} else if (outputStream == null) {
// For a pre allocated block for which no write has been initiated,
// the OutputStream will be null here.
@ -753,7 +754,7 @@ void cleanup() {
}
}
void writeOnRetry(int len) throws IOException {
void writeOnRetry(long len) throws IOException {
checkStream();
if (this.outputStream instanceof ChunkOutputStream) {
ChunkOutputStream out = (ChunkOutputStream) this.outputStream;

View File

@ -109,6 +109,10 @@ public static void init() throws Exception {
objectStore.getVolume(volumeName).createBucket(bucketName);
}
private String getKeyName() {
return UUID.randomUUID().toString();
}
/**
* Shutdown MiniDFSCluster.
*/
@ -121,7 +125,7 @@ public static void shutdown() {
@Test
public void testBlockWritesWithFlushAndClose() throws Exception {
String keyName = "standalone";
String keyName = getKeyName();
OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, 0);
// write data more than 1 chunk
byte[] data = ContainerTestHelper
@ -153,7 +157,7 @@ public void testBlockWritesWithFlushAndClose() throws Exception {
@Test
public void testBlockWritesCloseConsistency() throws Exception {
String keyName = "standalone2";
String keyName = getKeyName();
OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, 0);
// write data more than 1 chunk
byte[] data = ContainerTestHelper
@ -181,7 +185,7 @@ public void testBlockWritesCloseConsistency() throws Exception {
@Test
public void testMultiBlockWrites() throws Exception {
String keyName = "standalone3";
String keyName = getKeyName();
OzoneOutputStream key =
createKey(keyName, ReplicationType.RATIS, (4 * blockSize));
ChunkGroupOutputStream groupOutputStream =
@ -227,8 +231,7 @@ public void testMultiBlockWrites() throws Exception {
@Test
public void testMultiBlockWrites2() throws Exception {
String keyName = "ratis2";
long dataLength;
String keyName = getKeyName();
OzoneOutputStream key =
createKey(keyName, ReplicationType.RATIS, 4 * blockSize);
ChunkGroupOutputStream groupOutputStream =
@ -272,7 +275,7 @@ public void testMultiBlockWrites2() throws Exception {
@Test
public void testMultiBlockWrites3() throws Exception {
String keyName = "standalone5";
String keyName = getKeyName();
int keyLen = 4 * blockSize;
OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, keyLen);
ChunkGroupOutputStream groupOutputStream =
@ -391,7 +394,7 @@ private void waitForContainerClose(Long... containerIdList)
// on the datanode.
@Test
public void testDiscardPreallocatedBlocks() throws Exception {
String keyName = "discardpreallocatedblocks";
String keyName = getKeyName();
OzoneOutputStream key =
createKey(keyName, ReplicationType.RATIS, 2 * blockSize);
ChunkGroupOutputStream groupOutputStream =
@ -447,7 +450,7 @@ private void validateData(String keyName, byte[] data) throws Exception {
@Test
public void testBlockWriteViaRatis() throws Exception {
String keyName = "ratis";
String keyName = getKeyName();
OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, 0);
byte[] data = ContainerTestHelper
.getFixedLengthString(keyString, chunkSize + chunkSize / 2)

View File

@ -65,10 +65,6 @@ public class TestFailureHandlingByClient {
private static String keyString;
private static int maxRetries;
/**
* TODO: we will spawn new MiniOzoneCluster every time for each unit test
* invocation. Need to use the same instance for all tests.
*/
/**
* Create a MiniDFSCluster for testing.
* <p>
@ -86,6 +82,11 @@ public void init() throws Exception {
TimeUnit.SECONDS);
conf.setTimeDuration(HDDS_SCM_WATCHER_TIMEOUT, 1000, TimeUnit.MILLISECONDS);
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS);
conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 5);
conf.setTimeDuration(
OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY,
1, TimeUnit.SECONDS);
conf.setQuietMode(false);
cluster = MiniOzoneCluster.newBuilder(conf)
.setNumDatanodes(6).build();
@ -112,7 +113,7 @@ public void shutdown() {
@Test
public void testBlockWritesWithDnFailures() throws Exception {
String keyName = "ratis3";
String keyName = UUID.randomUUID().toString();
OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, 0);
byte[] data =
ContainerTestHelper
@ -189,6 +190,51 @@ public void testMultiBlockWritesWithDnFailures() throws Exception {
validateData(keyName, data.concat(data).getBytes());
}
@Test
public void testMultiBlockWritesWithIntermittentDnFailures()
throws Exception {
String keyName = UUID.randomUUID().toString();
OzoneOutputStream key =
createKey(keyName, ReplicationType.RATIS, 6 * blockSize);
String data = ContainerTestHelper
.getFixedLengthString(keyString, blockSize + chunkSize);
key.write(data.getBytes());
// get the name of a valid container
Assert.assertTrue(key.getOutputStream() instanceof ChunkGroupOutputStream);
ChunkGroupOutputStream groupOutputStream =
(ChunkGroupOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList =
groupOutputStream.getLocationInfoList();
Assert.assertTrue(locationInfoList.size() == 6);
long containerId = locationInfoList.get(1).getContainerID();
ContainerInfo container =
cluster.getStorageContainerManager().getContainerManager()
.getContainer(ContainerID.valueof(containerId));
Pipeline pipeline =
cluster.getStorageContainerManager().getPipelineManager()
.getPipeline(container.getPipelineID());
List<DatanodeDetails> datanodes = pipeline.getNodes();
cluster.shutdownHddsDatanode(datanodes.get(0));
// The write will fail but exception will be handled and length will be
// updated correctly in OzoneManager once the steam is closed
key.write(data.getBytes());
// shutdown the second datanode
cluster.shutdownHddsDatanode(datanodes.get(1));
key.write(data.getBytes());
key.close();
OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName)
.setBucketName(bucketName).setType(HddsProtos.ReplicationType.RATIS)
.setFactor(HddsProtos.ReplicationFactor.THREE).setKeyName(keyName)
.build();
OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs);
Assert.assertEquals(3 * data.getBytes().length, keyInfo.getDataSize());
validateData(keyName, data.concat(data).concat(data).getBytes());
}
private OzoneOutputStream createKey(String keyName, ReplicationType type,
long size) throws Exception {
return ContainerTestHelper

View File

@ -18,15 +18,12 @@
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.ozone.client.io.ChunkGroupInputStream;
import org.apache.hadoop.ozone.client.io.ChunkGroupOutputStream;
import org.apache.hadoop.hdds.scm.storage.ChunkInputStream;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
import java.io.IOException;
import java.util.ArrayList;
@ -41,81 +38,9 @@ public class TestChunkStreams {
@Rule
public ExpectedException exception = ExpectedException.none();
/**
* This test uses ByteArrayOutputStream as the underlying stream to test
* the correctness of ChunkGroupOutputStream.
*
* @throws Exception
*/
@Test
public void testWriteGroupOutputStream() throws Exception {
try (ChunkGroupOutputStream groupOutputStream =
new ChunkGroupOutputStream()) {
ArrayList<OutputStream> outputStreams = new ArrayList<>();
// 5 byte streams, each 100 bytes. write 500 bytes means writing to each
// of them with 100 bytes.
for (int i = 0; i < 5; i++) {
ByteArrayOutputStream out = new ByteArrayOutputStream(100);
outputStreams.add(out);
groupOutputStream.addStream(out, 100);
}
assertEquals(0, groupOutputStream.getByteOffset());
String dataString = RandomStringUtils.randomAscii(500);
byte[] data = dataString.getBytes(UTF_8);
groupOutputStream.write(data, 0, data.length);
assertEquals(500, groupOutputStream.getByteOffset());
String res = "";
int offset = 0;
for (OutputStream stream : outputStreams) {
String subString = stream.toString();
res += subString;
assertEquals(dataString.substring(offset, offset + 100), subString);
offset += 100;
}
assertEquals(dataString, res);
}
}
@Test
public void testErrorWriteGroupOutputStream() throws Exception {
try (ChunkGroupOutputStream groupOutputStream =
new ChunkGroupOutputStream()) {
ArrayList<OutputStream> outputStreams = new ArrayList<>();
// 5 byte streams, each 100 bytes. write 500 bytes means writing to each
// of them with 100 bytes. all 5 streams makes up a ChunkGroupOutputStream
// with a total of 500 bytes in size
for (int i = 0; i < 5; i++) {
ByteArrayOutputStream out = new ByteArrayOutputStream(100);
outputStreams.add(out);
groupOutputStream.addStream(out, 100);
}
assertEquals(0, groupOutputStream.getByteOffset());
// first writes of 100 bytes should succeed
groupOutputStream.write(RandomStringUtils.randomAscii(100)
.getBytes(UTF_8));
assertEquals(100, groupOutputStream.getByteOffset());
// second writes of 500 bytes should fail, as there should be only 400
// bytes space left
// TODO : if we decide to take the 400 bytes instead in the future,
// other add more informative error code rather than exception, need to
// change this part.
exception.expect(Exception.class);
groupOutputStream.write(RandomStringUtils.randomAscii(500)
.getBytes(UTF_8));
assertEquals(100, groupOutputStream.getByteOffset());
}
}
@Test
public void testReadGroupInputStream() throws Exception {
try (ChunkGroupInputStream groupInputStream = new ChunkGroupInputStream()) {
ArrayList<ChunkInputStream> inputStreams = new ArrayList<>();
String dataString = RandomStringUtils.randomAscii(500);
byte[] buf = dataString.getBytes(UTF_8);
@ -157,7 +82,6 @@ public synchronized int read(byte[] b, int off, int len)
return readLen;
}
};
inputStreams.add(in);
offset += 100;
groupInputStream.addStream(in, 100);
}
@ -173,7 +97,6 @@ public synchronized int read(byte[] b, int off, int len)
@Test
public void testErrorReadGroupInputStream() throws Exception {
try (ChunkGroupInputStream groupInputStream = new ChunkGroupInputStream()) {
ArrayList<ChunkInputStream> inputStreams = new ArrayList<>();
String dataString = RandomStringUtils.randomAscii(500);
byte[] buf = dataString.getBytes(UTF_8);
@ -215,7 +138,6 @@ public synchronized int read(byte[] b, int off, int len)
return readLen;
}
};
inputStreams.add(in);
offset += 100;
groupInputStream.addStream(in, 100);
}