HADOOP-18391. Improvements in VectoredReadUtils#readVectored() for direct buffers (#4787)
part of HADOOP-18103. Contributed By: Mukund Thakur
This commit is contained in:
parent
84081a8cae
commit
19830c98bc
@ -30,6 +30,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.fs.impl.CombinedFileRange;
|
import org.apache.hadoop.fs.impl.CombinedFileRange;
|
||||||
import org.apache.hadoop.util.Preconditions;
|
import org.apache.hadoop.util.Preconditions;
|
||||||
|
import org.apache.hadoop.util.functional.Function4RaisingIOE;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class which implements helper methods used
|
* Utility class which implements helper methods used
|
||||||
@ -37,6 +38,8 @@
|
|||||||
*/
|
*/
|
||||||
public final class VectoredReadUtils {
|
public final class VectoredReadUtils {
|
||||||
|
|
||||||
|
private static final int TMP_BUFFER_MAX_SIZE = 64 * 1024;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate a single range.
|
* Validate a single range.
|
||||||
* @param range file range.
|
* @param range file range.
|
||||||
@ -114,7 +117,12 @@ private static void readNonByteBufferPositionedReadable(PositionedReadable strea
|
|||||||
FileRange range,
|
FileRange range,
|
||||||
ByteBuffer buffer) throws IOException {
|
ByteBuffer buffer) throws IOException {
|
||||||
if (buffer.isDirect()) {
|
if (buffer.isDirect()) {
|
||||||
buffer.put(readInDirectBuffer(stream, range));
|
readInDirectBuffer(range.getLength(),
|
||||||
|
buffer,
|
||||||
|
(position, buffer1, offset, length) -> {
|
||||||
|
stream.readFully(position, buffer1, offset, length);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
buffer.flip();
|
buffer.flip();
|
||||||
} else {
|
} else {
|
||||||
stream.readFully(range.getOffset(), buffer.array(),
|
stream.readFully(range.getOffset(), buffer.array(),
|
||||||
@ -122,13 +130,34 @@ private static void readNonByteBufferPositionedReadable(PositionedReadable strea
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static byte[] readInDirectBuffer(PositionedReadable stream,
|
/**
|
||||||
FileRange range) throws IOException {
|
* Read bytes from stream into a byte buffer using an
|
||||||
// if we need to read data from a direct buffer and the stream doesn't
|
* intermediate byte array.
|
||||||
// support it, we allocate a byte array to use.
|
* @param length number of bytes to read.
|
||||||
byte[] tmp = new byte[range.getLength()];
|
* @param buffer buffer to fill.
|
||||||
stream.readFully(range.getOffset(), tmp, 0, tmp.length);
|
* @param operation operation to use for reading data.
|
||||||
return tmp;
|
* @throws IOException any IOE.
|
||||||
|
*/
|
||||||
|
public static void readInDirectBuffer(int length,
|
||||||
|
ByteBuffer buffer,
|
||||||
|
Function4RaisingIOE<Integer, byte[], Integer,
|
||||||
|
Integer, Void> operation) throws IOException {
|
||||||
|
if (length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int readBytes = 0;
|
||||||
|
int position = 0;
|
||||||
|
int tmpBufferMaxSize = Math.min(TMP_BUFFER_MAX_SIZE, length);
|
||||||
|
byte[] tmp = new byte[tmpBufferMaxSize];
|
||||||
|
while (readBytes < length) {
|
||||||
|
int currentLength = (readBytes + tmpBufferMaxSize) < length ?
|
||||||
|
tmpBufferMaxSize
|
||||||
|
: (length - readBytes);
|
||||||
|
operation.apply(position, tmp, 0, currentLength);
|
||||||
|
buffer.put(tmp, 0, currentLength);
|
||||||
|
position = position + currentLength;
|
||||||
|
readBytes = readBytes + currentLength;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.util.functional;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function of arity 4 which may raise an IOException.
|
||||||
|
* @param <I1> type of arg1.
|
||||||
|
* @param <I2> type of arg2.
|
||||||
|
* @param <I3> type of arg3.
|
||||||
|
* @param <I4> type of arg4.
|
||||||
|
* @param <R> return type.
|
||||||
|
*/
|
||||||
|
public interface Function4RaisingIOE<I1, I2, I3, I4, R> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply the function.
|
||||||
|
* @param i1 argument 1.
|
||||||
|
* @param i2 argument 2.
|
||||||
|
* @param i3 argument 3.
|
||||||
|
* @param i4 argument 4.
|
||||||
|
* @return return value.
|
||||||
|
* @throws IOException any IOE.
|
||||||
|
*/
|
||||||
|
R apply(I1 i1, I2 i2, I3 i3, I4 i4) throws IOException;
|
||||||
|
}
|
@ -386,17 +386,31 @@ public void testReadVectored() throws Exception {
|
|||||||
List<FileRange> input = Arrays.asList(FileRange.createFileRange(0, 100),
|
List<FileRange> input = Arrays.asList(FileRange.createFileRange(0, 100),
|
||||||
FileRange.createFileRange(100_000, 100),
|
FileRange.createFileRange(100_000, 100),
|
||||||
FileRange.createFileRange(200_000, 100));
|
FileRange.createFileRange(200_000, 100));
|
||||||
|
runAndValidateVectoredRead(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReadVectoredZeroBytes() throws Exception {
|
||||||
|
List<FileRange> input = Arrays.asList(FileRange.createFileRange(0, 0),
|
||||||
|
FileRange.createFileRange(100_000, 100),
|
||||||
|
FileRange.createFileRange(200_000, 0));
|
||||||
|
runAndValidateVectoredRead(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void runAndValidateVectoredRead(List<FileRange> input)
|
||||||
|
throws Exception {
|
||||||
Stream stream = Mockito.mock(Stream.class);
|
Stream stream = Mockito.mock(Stream.class);
|
||||||
Mockito.doAnswer(invocation -> {
|
Mockito.doAnswer(invocation -> {
|
||||||
fillBuffer(invocation.getArgument(1));
|
fillBuffer(invocation.getArgument(1));
|
||||||
return null;
|
return null;
|
||||||
}).when(stream).readFully(ArgumentMatchers.anyLong(),
|
}).when(stream).readFully(ArgumentMatchers.anyLong(),
|
||||||
ArgumentMatchers.any(ByteBuffer.class));
|
ArgumentMatchers.any(ByteBuffer.class));
|
||||||
// should not merge the ranges
|
// should not merge the ranges
|
||||||
VectoredReadUtils.readVectored(stream, input, ByteBuffer::allocate);
|
VectoredReadUtils.readVectored(stream, input, ByteBuffer::allocate);
|
||||||
Mockito.verify(stream, Mockito.times(3))
|
Mockito.verify(stream, Mockito.times(3))
|
||||||
.readFully(ArgumentMatchers.anyLong(), ArgumentMatchers.any(ByteBuffer.class));
|
.readFully(ArgumentMatchers.anyLong(), ArgumentMatchers.any(ByteBuffer.class));
|
||||||
for(int b=0; b < input.size(); ++b) {
|
for (int b = 0; b < input.size(); ++b) {
|
||||||
validateBuffer("buffer " + b, input.get(b).getData().get(), 0);
|
validateBuffer("buffer " + b, input.get(b).getData().get(), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1054,20 +1054,13 @@ private void readSingleRange(FileRange range, ByteBuffer buffer) {
|
|||||||
private void populateBuffer(int length,
|
private void populateBuffer(int length,
|
||||||
ByteBuffer buffer,
|
ByteBuffer buffer,
|
||||||
S3ObjectInputStream objectContent) throws IOException {
|
S3ObjectInputStream objectContent) throws IOException {
|
||||||
|
|
||||||
if (buffer.isDirect()) {
|
if (buffer.isDirect()) {
|
||||||
int readBytes = 0;
|
VectoredReadUtils.readInDirectBuffer(length, buffer,
|
||||||
int offset = 0;
|
(position, tmp, offset, currentLength) -> {
|
||||||
byte[] tmp = new byte[TMP_BUFFER_MAX_SIZE];
|
readByteArray(objectContent, tmp, offset, currentLength);
|
||||||
while (readBytes < length) {
|
return null;
|
||||||
checkIfVectoredIOStopped();
|
});
|
||||||
int currentLength = readBytes + TMP_BUFFER_MAX_SIZE < length ?
|
|
||||||
TMP_BUFFER_MAX_SIZE
|
|
||||||
: length - readBytes;
|
|
||||||
readByteArray(objectContent, tmp, 0, currentLength);
|
|
||||||
buffer.put(tmp, 0, currentLength);
|
|
||||||
offset = offset + currentLength;
|
|
||||||
readBytes = readBytes + currentLength;
|
|
||||||
}
|
|
||||||
buffer.flip();
|
buffer.flip();
|
||||||
} else {
|
} else {
|
||||||
readByteArray(objectContent, buffer.array(), 0, length);
|
readByteArray(objectContent, buffer.array(), 0, length);
|
||||||
@ -1076,6 +1069,7 @@ private void populateBuffer(int length,
|
|||||||
incrementBytesRead(length);
|
incrementBytesRead(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read data into destination buffer from s3 object content.
|
* Read data into destination buffer from s3 object content.
|
||||||
* @param objectContent result from S3.
|
* @param objectContent result from S3.
|
||||||
|
Loading…
Reference in New Issue
Block a user