HADOOP-17347. ABFS: Read optimizations
- Contributed by Bilahari T H
This commit is contained in:
parent
5ca1ea89b3
commit
1448add08f
@ -100,6 +100,16 @@ public class AbfsConfiguration{
|
|||||||
DefaultValue = DEFAULT_WRITE_BUFFER_SIZE)
|
DefaultValue = DEFAULT_WRITE_BUFFER_SIZE)
|
||||||
private int writeBufferSize;
|
private int writeBufferSize;
|
||||||
|
|
||||||
|
@BooleanConfigurationValidatorAnnotation(
|
||||||
|
ConfigurationKey = AZURE_READ_SMALL_FILES_COMPLETELY,
|
||||||
|
DefaultValue = DEFAULT_READ_SMALL_FILES_COMPLETELY)
|
||||||
|
private boolean readSmallFilesCompletely;
|
||||||
|
|
||||||
|
@BooleanConfigurationValidatorAnnotation(
|
||||||
|
ConfigurationKey = AZURE_READ_OPTIMIZE_FOOTER_READ,
|
||||||
|
DefaultValue = DEFAULT_OPTIMIZE_FOOTER_READ)
|
||||||
|
private boolean optimizeFooterRead;
|
||||||
|
|
||||||
@IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_READ_BUFFER_SIZE,
|
@IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_READ_BUFFER_SIZE,
|
||||||
MinValue = MIN_BUFFER_SIZE,
|
MinValue = MIN_BUFFER_SIZE,
|
||||||
MaxValue = MAX_BUFFER_SIZE,
|
MaxValue = MAX_BUFFER_SIZE,
|
||||||
@ -527,6 +537,14 @@ public int getWriteBufferSize() {
|
|||||||
return this.writeBufferSize;
|
return this.writeBufferSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean readSmallFilesCompletely() {
|
||||||
|
return this.readSmallFilesCompletely;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean optimizeFooterRead() {
|
||||||
|
return this.optimizeFooterRead;
|
||||||
|
}
|
||||||
|
|
||||||
public int getReadBufferSize() {
|
public int getReadBufferSize() {
|
||||||
return this.readBufferSize;
|
return this.readBufferSize;
|
||||||
}
|
}
|
||||||
@ -925,4 +943,14 @@ private String appendSlashIfNeeded(String authority) {
|
|||||||
return authority;
|
return authority;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setReadSmallFilesCompletely(boolean readSmallFilesCompletely) {
|
||||||
|
this.readSmallFilesCompletely = readSmallFilesCompletely;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setOptimizeFooterRead(boolean optimizeFooterRead) {
|
||||||
|
this.optimizeFooterRead = optimizeFooterRead;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -643,6 +643,8 @@ private AbfsInputStreamContext populateAbfsInputStreamContext() {
|
|||||||
.withReadBufferSize(abfsConfiguration.getReadBufferSize())
|
.withReadBufferSize(abfsConfiguration.getReadBufferSize())
|
||||||
.withReadAheadQueueDepth(abfsConfiguration.getReadAheadQueueDepth())
|
.withReadAheadQueueDepth(abfsConfiguration.getReadAheadQueueDepth())
|
||||||
.withTolerateOobAppends(abfsConfiguration.getTolerateOobAppends())
|
.withTolerateOobAppends(abfsConfiguration.getTolerateOobAppends())
|
||||||
|
.withReadSmallFilesCompletely(abfsConfiguration.readSmallFilesCompletely())
|
||||||
|
.withOptimizeFooterRead(abfsConfiguration.optimizeFooterRead())
|
||||||
.withStreamStatistics(new AbfsInputStreamStatisticsImpl())
|
.withStreamStatistics(new AbfsInputStreamStatisticsImpl())
|
||||||
.withShouldReadBufferSizeAlways(
|
.withShouldReadBufferSizeAlways(
|
||||||
abfsConfiguration.shouldReadBufferSizeAlways())
|
abfsConfiguration.shouldReadBufferSizeAlways())
|
||||||
|
@ -56,6 +56,8 @@ public final class ConfigurationKeys {
|
|||||||
public static final String AZURE_WRITE_MAX_REQUESTS_TO_QUEUE = "fs.azure.write.max.requests.to.queue";
|
public static final String AZURE_WRITE_MAX_REQUESTS_TO_QUEUE = "fs.azure.write.max.requests.to.queue";
|
||||||
public static final String AZURE_WRITE_BUFFER_SIZE = "fs.azure.write.request.size";
|
public static final String AZURE_WRITE_BUFFER_SIZE = "fs.azure.write.request.size";
|
||||||
public static final String AZURE_READ_BUFFER_SIZE = "fs.azure.read.request.size";
|
public static final String AZURE_READ_BUFFER_SIZE = "fs.azure.read.request.size";
|
||||||
|
public static final String AZURE_READ_SMALL_FILES_COMPLETELY = "fs.azure.read.smallfilescompletely";
|
||||||
|
public static final String AZURE_READ_OPTIMIZE_FOOTER_READ = "fs.azure.read.optimizefooterread";
|
||||||
public static final String AZURE_BLOCK_SIZE_PROPERTY_NAME = "fs.azure.block.size";
|
public static final String AZURE_BLOCK_SIZE_PROPERTY_NAME = "fs.azure.block.size";
|
||||||
public static final String AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME = "fs.azure.block.location.impersonatedhost";
|
public static final String AZURE_BLOCK_LOCATION_HOST_PROPERTY_NAME = "fs.azure.block.location.impersonatedhost";
|
||||||
public static final String AZURE_CONCURRENT_CONNECTION_VALUE_OUT = "fs.azure.concurrentRequestCount.out";
|
public static final String AZURE_CONCURRENT_CONNECTION_VALUE_OUT = "fs.azure.concurrentRequestCount.out";
|
||||||
|
@ -50,13 +50,15 @@ public final class FileSystemConfigurations {
|
|||||||
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL = SIXTY_SECONDS;
|
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_MAX_BACKOFF_INTERVAL = SIXTY_SECONDS;
|
||||||
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF = 2;
|
public static final int DEFAULT_AZURE_OAUTH_TOKEN_FETCH_RETRY_DELTA_BACKOFF = 2;
|
||||||
|
|
||||||
private static final int ONE_KB = 1024;
|
public static final int ONE_KB = 1024;
|
||||||
private static final int ONE_MB = ONE_KB * ONE_KB;
|
public static final int ONE_MB = ONE_KB * ONE_KB;
|
||||||
|
|
||||||
// Default upload and download buffer size
|
// Default upload and download buffer size
|
||||||
public static final int DEFAULT_WRITE_BUFFER_SIZE = 8 * ONE_MB; // 8 MB
|
public static final int DEFAULT_WRITE_BUFFER_SIZE = 8 * ONE_MB; // 8 MB
|
||||||
public static final int APPENDBLOB_MAX_WRITE_BUFFER_SIZE = 4 * ONE_MB; // 4 MB
|
public static final int APPENDBLOB_MAX_WRITE_BUFFER_SIZE = 4 * ONE_MB; // 4 MB
|
||||||
public static final int DEFAULT_READ_BUFFER_SIZE = 4 * ONE_MB; // 4 MB
|
public static final int DEFAULT_READ_BUFFER_SIZE = 4 * ONE_MB; // 4 MB
|
||||||
|
public static final boolean DEFAULT_READ_SMALL_FILES_COMPLETELY = false;
|
||||||
|
public static final boolean DEFAULT_OPTIMIZE_FOOTER_READ = false;
|
||||||
public static final boolean DEFAULT_ALWAYS_READ_BUFFER_SIZE = false;
|
public static final boolean DEFAULT_ALWAYS_READ_BUFFER_SIZE = false;
|
||||||
public static final int DEFAULT_READ_AHEAD_BLOCK_SIZE = 4 * ONE_MB;
|
public static final int DEFAULT_READ_AHEAD_BLOCK_SIZE = 4 * ONE_MB;
|
||||||
public static final int MIN_BUFFER_SIZE = 16 * ONE_KB; // 16 KB
|
public static final int MIN_BUFFER_SIZE = 16 * ONE_KB; // 16 KB
|
||||||
|
@ -38,6 +38,10 @@
|
|||||||
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
|
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
|
||||||
import org.apache.hadoop.fs.azurebfs.utils.CachedSASToken;
|
import org.apache.hadoop.fs.azurebfs.utils.CachedSASToken;
|
||||||
|
|
||||||
|
import static java.lang.Math.max;
|
||||||
|
import static java.lang.Math.min;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB;
|
||||||
import static org.apache.hadoop.util.StringUtils.toLowerCase;
|
import static org.apache.hadoop.util.StringUtils.toLowerCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -46,6 +50,9 @@
|
|||||||
public class AbfsInputStream extends FSInputStream implements CanUnbuffer,
|
public class AbfsInputStream extends FSInputStream implements CanUnbuffer,
|
||||||
StreamCapabilities {
|
StreamCapabilities {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(AbfsInputStream.class);
|
private static final Logger LOG = LoggerFactory.getLogger(AbfsInputStream.class);
|
||||||
|
// Footer size is set to qualify for both ORC and parquet files
|
||||||
|
public static final int FOOTER_SIZE = 16 * ONE_KB;
|
||||||
|
public static final int MAX_OPTIMIZED_READ_ATTEMPTS = 2;
|
||||||
|
|
||||||
private int readAheadBlockSize;
|
private int readAheadBlockSize;
|
||||||
private final AbfsClient client;
|
private final AbfsClient client;
|
||||||
@ -59,6 +66,7 @@ public class AbfsInputStream extends FSInputStream implements CanUnbuffer,
|
|||||||
private final boolean readAheadEnabled; // whether enable readAhead;
|
private final boolean readAheadEnabled; // whether enable readAhead;
|
||||||
private final boolean alwaysReadBufferSize;
|
private final boolean alwaysReadBufferSize;
|
||||||
|
|
||||||
|
private boolean firstRead = true;
|
||||||
// SAS tokens can be re-used until they expire
|
// SAS tokens can be re-used until they expire
|
||||||
private CachedSASToken cachedSasToken;
|
private CachedSASToken cachedSasToken;
|
||||||
private byte[] buffer = null; // will be initialized on first use
|
private byte[] buffer = null; // will be initialized on first use
|
||||||
@ -70,11 +78,21 @@ public class AbfsInputStream extends FSInputStream implements CanUnbuffer,
|
|||||||
// of valid bytes in buffer)
|
// of valid bytes in buffer)
|
||||||
private boolean closed = false;
|
private boolean closed = false;
|
||||||
|
|
||||||
|
// Optimisations modify the pointer fields.
|
||||||
|
// For better resilience the following fields are used to save the
|
||||||
|
// existing state before optimization flows.
|
||||||
|
private int limitBkp;
|
||||||
|
private int bCursorBkp;
|
||||||
|
private long fCursorBkp;
|
||||||
|
private long fCursorAfterLastReadBkp;
|
||||||
|
|
||||||
/** Stream statistics. */
|
/** Stream statistics. */
|
||||||
private final AbfsInputStreamStatistics streamStatistics;
|
private final AbfsInputStreamStatistics streamStatistics;
|
||||||
private long bytesFromReadAhead; // bytes read from readAhead; for testing
|
private long bytesFromReadAhead; // bytes read from readAhead; for testing
|
||||||
private long bytesFromRemoteRead; // bytes read remotely; for testing
|
private long bytesFromRemoteRead; // bytes read remotely; for testing
|
||||||
|
|
||||||
|
private final AbfsInputStreamContext context;
|
||||||
|
|
||||||
public AbfsInputStream(
|
public AbfsInputStream(
|
||||||
final AbfsClient client,
|
final AbfsClient client,
|
||||||
final Statistics statistics,
|
final Statistics statistics,
|
||||||
@ -96,6 +114,7 @@ public AbfsInputStream(
|
|||||||
this.cachedSasToken = new CachedSASToken(
|
this.cachedSasToken = new CachedSASToken(
|
||||||
abfsInputStreamContext.getSasTokenRenewPeriodForStreamsInSeconds());
|
abfsInputStreamContext.getSasTokenRenewPeriodForStreamsInSeconds());
|
||||||
this.streamStatistics = abfsInputStreamContext.getStreamStatistics();
|
this.streamStatistics = abfsInputStreamContext.getStreamStatistics();
|
||||||
|
this.context = abfsInputStreamContext;
|
||||||
readAheadBlockSize = abfsInputStreamContext.getReadAheadBlockSize();
|
readAheadBlockSize = abfsInputStreamContext.getReadAheadBlockSize();
|
||||||
|
|
||||||
// Propagate the config values to ReadBufferManager so that the first instance
|
// Propagate the config values to ReadBufferManager so that the first instance
|
||||||
@ -137,7 +156,13 @@ public synchronized int read(final byte[] b, final int off, final int len) throw
|
|||||||
}
|
}
|
||||||
incrementReadOps();
|
incrementReadOps();
|
||||||
do {
|
do {
|
||||||
lastReadBytes = readOneBlock(b, currentOff, currentLen);
|
if (shouldReadFully()) {
|
||||||
|
lastReadBytes = readFileCompletely(b, currentOff, currentLen);
|
||||||
|
} else if (shouldReadLastBlock()) {
|
||||||
|
lastReadBytes = readLastBlock(b, currentOff, currentLen);
|
||||||
|
} else {
|
||||||
|
lastReadBytes = readOneBlock(b, currentOff, currentLen);
|
||||||
|
}
|
||||||
if (lastReadBytes > 0) {
|
if (lastReadBytes > 0) {
|
||||||
currentOff += lastReadBytes;
|
currentOff += lastReadBytes;
|
||||||
currentLen -= lastReadBytes;
|
currentLen -= lastReadBytes;
|
||||||
@ -150,27 +175,24 @@ public synchronized int read(final byte[] b, final int off, final int len) throw
|
|||||||
return totalReadBytes > 0 ? totalReadBytes : lastReadBytes;
|
return totalReadBytes > 0 ? totalReadBytes : lastReadBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean shouldReadFully() {
|
||||||
|
return this.firstRead && this.context.readSmallFilesCompletely()
|
||||||
|
&& this.contentLength <= this.bufferSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean shouldReadLastBlock() {
|
||||||
|
long footerStart = max(0, this.contentLength - FOOTER_SIZE);
|
||||||
|
return this.firstRead && this.context.optimizeFooterRead()
|
||||||
|
&& this.fCursor >= footerStart;
|
||||||
|
}
|
||||||
|
|
||||||
private int readOneBlock(final byte[] b, final int off, final int len) throws IOException {
|
private int readOneBlock(final byte[] b, final int off, final int len) throws IOException {
|
||||||
if (closed) {
|
|
||||||
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
|
|
||||||
}
|
|
||||||
|
|
||||||
Preconditions.checkNotNull(b);
|
|
||||||
LOG.debug("read one block requested b.length = {} off {} len {}", b.length,
|
|
||||||
off, len);
|
|
||||||
|
|
||||||
if (len == 0) {
|
if (len == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
if (!validate(b, off, len)) {
|
||||||
if (this.available() == 0) {
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (off < 0 || len < 0 || len > b.length - off) {
|
|
||||||
throw new IndexOutOfBoundsException();
|
|
||||||
}
|
|
||||||
|
|
||||||
//If buffer is empty, then fill the buffer.
|
//If buffer is empty, then fill the buffer.
|
||||||
if (bCursor == limit) {
|
if (bCursor == limit) {
|
||||||
//If EOF, then return -1
|
//If EOF, then return -1
|
||||||
@ -197,6 +219,9 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO
|
|||||||
bytesRead = readInternal(fCursor, buffer, 0, b.length, true);
|
bytesRead = readInternal(fCursor, buffer, 0, b.length, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (firstRead) {
|
||||||
|
firstRead = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (bytesRead == -1) {
|
if (bytesRead == -1) {
|
||||||
return -1;
|
return -1;
|
||||||
@ -206,11 +231,123 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO
|
|||||||
fCursor += bytesRead;
|
fCursor += bytesRead;
|
||||||
fCursorAfterLastRead = fCursor;
|
fCursorAfterLastRead = fCursor;
|
||||||
}
|
}
|
||||||
|
return copyToUserBuffer(b, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int readFileCompletely(final byte[] b, final int off, final int len)
|
||||||
|
throws IOException {
|
||||||
|
if (len == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!validate(b, off, len)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
savePointerState();
|
||||||
|
// data need to be copied to user buffer from index bCursor, bCursor has
|
||||||
|
// to be the current fCusor
|
||||||
|
bCursor = (int) fCursor;
|
||||||
|
return optimisedRead(b, off, len, 0, contentLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int readLastBlock(final byte[] b, final int off, final int len)
|
||||||
|
throws IOException {
|
||||||
|
if (len == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (!validate(b, off, len)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
savePointerState();
|
||||||
|
// data need to be copied to user buffer from index bCursor,
|
||||||
|
// AbfsInutStream buffer is going to contain data from last block start. In
|
||||||
|
// that case bCursor will be set to fCursor - lastBlockStart
|
||||||
|
long lastBlockStart = max(0, contentLength - bufferSize);
|
||||||
|
bCursor = (int) (fCursor - lastBlockStart);
|
||||||
|
// 0 if contentlength is < buffersize
|
||||||
|
long actualLenToRead = min(bufferSize, contentLength);
|
||||||
|
return optimisedRead(b, off, len, lastBlockStart, actualLenToRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int optimisedRead(final byte[] b, final int off, final int len,
|
||||||
|
final long readFrom, final long actualLen) throws IOException {
|
||||||
|
fCursor = readFrom;
|
||||||
|
int totalBytesRead = 0;
|
||||||
|
int lastBytesRead = 0;
|
||||||
|
try {
|
||||||
|
buffer = new byte[bufferSize];
|
||||||
|
for (int i = 0;
|
||||||
|
i < MAX_OPTIMIZED_READ_ATTEMPTS && fCursor < contentLength; i++) {
|
||||||
|
lastBytesRead = readInternal(fCursor, buffer, limit,
|
||||||
|
(int) actualLen - limit, true);
|
||||||
|
if (lastBytesRead > 0) {
|
||||||
|
totalBytesRead += lastBytesRead;
|
||||||
|
limit += lastBytesRead;
|
||||||
|
fCursor += lastBytesRead;
|
||||||
|
fCursorAfterLastRead = fCursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.debug("Optimized read failed. Defaulting to readOneBlock {}", e);
|
||||||
|
restorePointerState();
|
||||||
|
return readOneBlock(b, off, len);
|
||||||
|
} finally {
|
||||||
|
firstRead = false;
|
||||||
|
}
|
||||||
|
if (totalBytesRead < 1) {
|
||||||
|
restorePointerState();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
// If the read was partial and the user requested part of data has
|
||||||
|
// not read then fallback to readoneblock. When limit is smaller than
|
||||||
|
// bCursor that means the user requested data has not been read.
|
||||||
|
if (fCursor < contentLength && bCursor > limit) {
|
||||||
|
restorePointerState();
|
||||||
|
return readOneBlock(b, off, len);
|
||||||
|
}
|
||||||
|
return copyToUserBuffer(b, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void savePointerState() {
|
||||||
|
// Saving the current state for fall back ifn case optimization fails
|
||||||
|
this.limitBkp = this.limit;
|
||||||
|
this.fCursorBkp = this.fCursor;
|
||||||
|
this.fCursorAfterLastReadBkp = this.fCursorAfterLastRead;
|
||||||
|
this.bCursorBkp = this.bCursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void restorePointerState() {
|
||||||
|
// Saving the current state for fall back ifn case optimization fails
|
||||||
|
this.limit = this.limitBkp;
|
||||||
|
this.fCursor = this.fCursorBkp;
|
||||||
|
this.fCursorAfterLastRead = this.fCursorAfterLastReadBkp;
|
||||||
|
this.bCursor = this.bCursorBkp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean validate(final byte[] b, final int off, final int len)
|
||||||
|
throws IOException {
|
||||||
|
if (closed) {
|
||||||
|
throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
|
||||||
|
}
|
||||||
|
|
||||||
|
Preconditions.checkNotNull(b);
|
||||||
|
LOG.debug("read one block requested b.length = {} off {} len {}", b.length,
|
||||||
|
off, len);
|
||||||
|
|
||||||
|
if (this.available() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (off < 0 || len < 0 || len > b.length - off) {
|
||||||
|
throw new IndexOutOfBoundsException();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int copyToUserBuffer(byte[] b, int off, int len){
|
||||||
//If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer)
|
//If there is anything in the buffer, then return lesser of (requested bytes) and (bytes in buffer)
|
||||||
//(bytes returned may be less than requested)
|
//(bytes returned may be less than requested)
|
||||||
int bytesRemaining = limit - bCursor;
|
int bytesRemaining = limit - bCursor;
|
||||||
int bytesToRead = Math.min(len, bytesRemaining);
|
int bytesToRead = min(len, bytesRemaining);
|
||||||
System.arraycopy(buffer, bCursor, b, off, bytesToRead);
|
System.arraycopy(buffer, bCursor, b, off, bytesToRead);
|
||||||
bCursor += bytesToRead;
|
bCursor += bytesToRead;
|
||||||
if (statistics != null) {
|
if (statistics != null) {
|
||||||
@ -224,7 +361,6 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO
|
|||||||
return bytesToRead;
|
return bytesToRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private int readInternal(final long position, final byte[] b, final int offset, final int length,
|
private int readInternal(final long position, final byte[] b, final int offset, final int length,
|
||||||
final boolean bypassReadAhead) throws IOException {
|
final boolean bypassReadAhead) throws IOException {
|
||||||
if (readAheadEnabled && !bypassReadAhead) {
|
if (readAheadEnabled && !bypassReadAhead) {
|
||||||
@ -239,7 +375,7 @@ private int readInternal(final long position, final byte[] b, final int offset,
|
|||||||
long nextOffset = position;
|
long nextOffset = position;
|
||||||
// First read to queue needs to be of readBufferSize and later
|
// First read to queue needs to be of readBufferSize and later
|
||||||
// of readAhead Block size
|
// of readAhead Block size
|
||||||
long nextSize = Math.min((long) bufferSize, contentLength - nextOffset);
|
long nextSize = min((long) bufferSize, contentLength - nextOffset);
|
||||||
LOG.debug("read ahead enabled issuing readheads num = {}", numReadAheads);
|
LOG.debug("read ahead enabled issuing readheads num = {}", numReadAheads);
|
||||||
while (numReadAheads > 0 && nextOffset < contentLength) {
|
while (numReadAheads > 0 && nextOffset < contentLength) {
|
||||||
LOG.debug("issuing read ahead requestedOffset = {} requested size {}",
|
LOG.debug("issuing read ahead requestedOffset = {} requested size {}",
|
||||||
@ -248,7 +384,7 @@ private int readInternal(final long position, final byte[] b, final int offset,
|
|||||||
nextOffset = nextOffset + nextSize;
|
nextOffset = nextOffset + nextSize;
|
||||||
numReadAheads--;
|
numReadAheads--;
|
||||||
// From next round onwards should be of readahead block size.
|
// From next round onwards should be of readahead block size.
|
||||||
nextSize = Math.min((long) readAheadBlockSize, contentLength - nextOffset);
|
nextSize = min((long) readAheadBlockSize, contentLength - nextOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// try reading from buffers first
|
// try reading from buffers first
|
||||||
@ -572,4 +708,24 @@ public String toString() {
|
|||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
int getBCursor() {
|
||||||
|
return this.bCursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getFCursor() {
|
||||||
|
return this.fCursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getFCursorAfterLastRead() {
|
||||||
|
return this.fCursorAfterLastRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getLimit() {
|
||||||
|
return this.limit;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -40,6 +40,10 @@ public class AbfsInputStreamContext extends AbfsStreamContext {
|
|||||||
|
|
||||||
private AbfsInputStreamStatistics streamStatistics;
|
private AbfsInputStreamStatistics streamStatistics;
|
||||||
|
|
||||||
|
private boolean readSmallFilesCompletely;
|
||||||
|
|
||||||
|
private boolean optimizeFooterRead;
|
||||||
|
|
||||||
public AbfsInputStreamContext(final long sasTokenRenewPeriodForStreamsInSeconds) {
|
public AbfsInputStreamContext(final long sasTokenRenewPeriodForStreamsInSeconds) {
|
||||||
super(sasTokenRenewPeriodForStreamsInSeconds);
|
super(sasTokenRenewPeriodForStreamsInSeconds);
|
||||||
}
|
}
|
||||||
@ -69,6 +73,18 @@ public AbfsInputStreamContext withStreamStatistics(
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public AbfsInputStreamContext withReadSmallFilesCompletely(
|
||||||
|
final boolean readSmallFilesCompletely) {
|
||||||
|
this.readSmallFilesCompletely = readSmallFilesCompletely;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AbfsInputStreamContext withOptimizeFooterRead(
|
||||||
|
final boolean optimizeFooterRead) {
|
||||||
|
this.optimizeFooterRead = optimizeFooterRead;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public AbfsInputStreamContext withShouldReadBufferSizeAlways(
|
public AbfsInputStreamContext withShouldReadBufferSizeAlways(
|
||||||
final boolean alwaysReadBufferSize) {
|
final boolean alwaysReadBufferSize) {
|
||||||
this.alwaysReadBufferSize = alwaysReadBufferSize;
|
this.alwaysReadBufferSize = alwaysReadBufferSize;
|
||||||
@ -110,6 +126,14 @@ public AbfsInputStreamStatistics getStreamStatistics() {
|
|||||||
return streamStatistics;
|
return streamStatistics;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean readSmallFilesCompletely() {
|
||||||
|
return this.readSmallFilesCompletely;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean optimizeFooterRead() {
|
||||||
|
return this.optimizeFooterRead;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean shouldReadBufferSizeAlways() {
|
public boolean shouldReadBufferSizeAlways() {
|
||||||
return alwaysReadBufferSize;
|
return alwaysReadBufferSize;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,256 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.azurebfs.services;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyInt;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyLong;
|
||||||
|
import static org.mockito.Mockito.doThrow;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
|
|
||||||
|
public class ITestAbfsInputStream extends AbstractAbfsIntegrationTest {
|
||||||
|
|
||||||
|
protected static final int HUNDRED = 100;
|
||||||
|
|
||||||
|
public ITestAbfsInputStream() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithNoOptimization() throws Exception {
|
||||||
|
for (int i = 2; i <= 7; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(false, false, fileSize);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
testWithNoOptimization(fs, testFilePath, HUNDRED, fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void testWithNoOptimization(final FileSystem fs,
|
||||||
|
final Path testFilePath, final int seekPos, final byte[] fileContent)
|
||||||
|
throws IOException {
|
||||||
|
FSDataInputStream iStream = fs.open(testFilePath);
|
||||||
|
try {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
|
||||||
|
iStream = new FSDataInputStream(abfsInputStream);
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
long totalBytesRead = 0;
|
||||||
|
int length = HUNDRED * HUNDRED;
|
||||||
|
do {
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
totalBytesRead += bytesRead;
|
||||||
|
if ((totalBytesRead + seekPos) >= fileContent.length) {
|
||||||
|
length = (fileContent.length - seekPos) % length;
|
||||||
|
}
|
||||||
|
assertEquals(length, bytesRead);
|
||||||
|
assertContentReadCorrectly(fileContent,
|
||||||
|
(int) (seekPos + totalBytesRead - length), length, buffer);
|
||||||
|
|
||||||
|
assertTrue(abfsInputStream.getFCursor() >= seekPos + totalBytesRead);
|
||||||
|
assertTrue(abfsInputStream.getFCursorAfterLastRead() >= seekPos + totalBytesRead);
|
||||||
|
assertTrue(abfsInputStream.getBCursor() >= totalBytesRead % abfsInputStream.getBufferSize());
|
||||||
|
assertTrue(abfsInputStream.getLimit() >= totalBytesRead % abfsInputStream.getBufferSize());
|
||||||
|
} while (totalBytesRead + seekPos < fileContent.length);
|
||||||
|
} finally {
|
||||||
|
iStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExceptionInOptimization() throws Exception {
|
||||||
|
for (int i = 2; i <= 7; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(true, true, fileSize);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
testExceptionInOptimization(fs, testFilePath, fileSize - HUNDRED,
|
||||||
|
fileSize / 4, fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testExceptionInOptimization(final FileSystem fs,
|
||||||
|
final Path testFilePath,
|
||||||
|
final int seekPos, final int length, final byte[] fileContent)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
FSDataInputStream iStream = fs.open(testFilePath);
|
||||||
|
try {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
abfsInputStream = spy(abfsInputStream);
|
||||||
|
doThrow(new IOException())
|
||||||
|
.doCallRealMethod()
|
||||||
|
.when(abfsInputStream)
|
||||||
|
.readRemote(anyLong(), any(), anyInt(), anyInt());
|
||||||
|
|
||||||
|
iStream = new FSDataInputStream(abfsInputStream);
|
||||||
|
verifyBeforeSeek(abfsInputStream);
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
long actualLength = length;
|
||||||
|
if (seekPos + length > fileContent.length) {
|
||||||
|
long delta = seekPos + length - fileContent.length;
|
||||||
|
actualLength = length - delta;
|
||||||
|
}
|
||||||
|
assertEquals(bytesRead, actualLength);
|
||||||
|
assertContentReadCorrectly(fileContent, seekPos, (int) actualLength, buffer);
|
||||||
|
assertEquals(fileContent.length, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(fileContent.length, abfsInputStream.getFCursorAfterLastRead());
|
||||||
|
assertEquals(actualLength, abfsInputStream.getBCursor());
|
||||||
|
assertTrue(abfsInputStream.getLimit() >= actualLength);
|
||||||
|
} finally {
|
||||||
|
iStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AzureBlobFileSystem getFileSystem(boolean readSmallFilesCompletely)
|
||||||
|
throws IOException {
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem();
|
||||||
|
getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.setReadSmallFilesCompletely(readSmallFilesCompletely);
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private AzureBlobFileSystem getFileSystem(boolean optimizeFooterRead,
|
||||||
|
boolean readSmallFileCompletely, int fileSize) throws IOException {
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem();
|
||||||
|
getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.setOptimizeFooterRead(optimizeFooterRead);
|
||||||
|
if (fileSize <= getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.getReadBufferSize()) {
|
||||||
|
getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.setReadSmallFilesCompletely(readSmallFileCompletely);
|
||||||
|
}
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected byte[] getRandomBytesArray(int length) {
|
||||||
|
final byte[] b = new byte[length];
|
||||||
|
new Random().nextBytes(b);
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Path createFileWithContent(FileSystem fs, String fileName,
|
||||||
|
byte[] fileContent) throws IOException {
|
||||||
|
Path testFilePath = path(fileName);
|
||||||
|
try (FSDataOutputStream oStream = fs.create(testFilePath)) {
|
||||||
|
oStream.write(fileContent);
|
||||||
|
oStream.flush();
|
||||||
|
}
|
||||||
|
return testFilePath;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AzureBlobFileSystemStore getAbfsStore(FileSystem fs)
|
||||||
|
throws NoSuchFieldException, IllegalAccessException {
|
||||||
|
AzureBlobFileSystem abfs = (AzureBlobFileSystem) fs;
|
||||||
|
Field abfsStoreField = AzureBlobFileSystem.class
|
||||||
|
.getDeclaredField("abfsStore");
|
||||||
|
abfsStoreField.setAccessible(true);
|
||||||
|
return (AzureBlobFileSystemStore) abfsStoreField.get(abfs);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Map<String, Long> getInstrumentationMap(FileSystem fs)
|
||||||
|
throws NoSuchFieldException, IllegalAccessException {
|
||||||
|
AzureBlobFileSystem abfs = (AzureBlobFileSystem) fs;
|
||||||
|
Field abfsCountersField = AzureBlobFileSystem.class
|
||||||
|
.getDeclaredField("abfsCounters");
|
||||||
|
abfsCountersField.setAccessible(true);
|
||||||
|
AbfsCounters abfsCounters = (AbfsCounters) abfsCountersField.get(abfs);
|
||||||
|
return abfsCounters.toMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void assertContentReadCorrectly(byte[] actualFileContent, int from,
|
||||||
|
int len, byte[] contentRead) {
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
assertEquals(contentRead[i], actualFileContent[i + from]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void assertBuffersAreNotEqual(byte[] actualContent,
|
||||||
|
byte[] contentRead, AbfsConfiguration conf) {
|
||||||
|
assertBufferEquality(actualContent, contentRead, conf, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void assertBuffersAreEqual(byte[] actualContent, byte[] contentRead,
|
||||||
|
AbfsConfiguration conf) {
|
||||||
|
assertBufferEquality(actualContent, contentRead, conf, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertBufferEquality(byte[] actualContent, byte[] contentRead,
|
||||||
|
AbfsConfiguration conf, boolean assertEqual) {
|
||||||
|
int bufferSize = conf.getReadBufferSize();
|
||||||
|
int actualContentSize = actualContent.length;
|
||||||
|
int n = (actualContentSize < bufferSize) ? actualContentSize : bufferSize;
|
||||||
|
int matches = 0;
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (actualContent[i] == contentRead[i]) {
|
||||||
|
matches++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (assertEqual) {
|
||||||
|
assertEquals(n, matches);
|
||||||
|
} else {
|
||||||
|
assertNotEquals(n, matches);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void seek(FSDataInputStream iStream, long seekPos)
|
||||||
|
throws IOException {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream.getWrappedStream();
|
||||||
|
verifyBeforeSeek(abfsInputStream);
|
||||||
|
iStream.seek(seekPos);
|
||||||
|
verifyAfterSeek(abfsInputStream, seekPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyBeforeSeek(AbfsInputStream abfsInputStream){
|
||||||
|
assertEquals(0, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(-1, abfsInputStream.getFCursorAfterLastRead());
|
||||||
|
assertEquals(0, abfsInputStream.getLimit());
|
||||||
|
assertEquals(0, abfsInputStream.getBCursor());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyAfterSeek(AbfsInputStream abfsInputStream, long seekPos){
|
||||||
|
assertEquals(seekPos, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(-1, abfsInputStream.getFCursorAfterLastRead());
|
||||||
|
assertEquals(0, abfsInputStream.getLimit());
|
||||||
|
assertEquals(0, abfsInputStream.getBCursor());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,358 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.azurebfs.services;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
|
||||||
|
|
||||||
|
import static java.lang.Math.max;
|
||||||
|
import static java.lang.Math.min;
|
||||||
|
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyInt;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyLong;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE;
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB;
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB;
|
||||||
|
|
||||||
|
public class ITestAbfsInputStreamReadFooter extends ITestAbfsInputStream {
|
||||||
|
|
||||||
|
private static final int TEN = 10;
|
||||||
|
private static final int TWENTY = 20;
|
||||||
|
|
||||||
|
public ITestAbfsInputStreamReadFooter() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOnlyOneServerCallIsMadeWhenTheConfIsTrue() throws Exception {
|
||||||
|
testNumBackendCalls(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleServerCallsAreMadeWhenTheConfIsFalse()
|
||||||
|
throws Exception {
|
||||||
|
testNumBackendCalls(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testNumBackendCalls(boolean optimizeFooterRead)
|
||||||
|
throws Exception {
|
||||||
|
for (int i = 1; i <= 4; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(optimizeFooterRead,
|
||||||
|
fileSize);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
int length = AbfsInputStream.FOOTER_SIZE;
|
||||||
|
try (FSDataInputStream iStream = fs.open(testFilePath)) {
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
|
||||||
|
Map<String, Long> metricMap = getInstrumentationMap(fs);
|
||||||
|
long requestsMadeBeforeTest = metricMap
|
||||||
|
.get(CONNECTIONS_MADE.getStatName());
|
||||||
|
|
||||||
|
iStream.seek(fileSize - 8);
|
||||||
|
iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
iStream.seek(fileSize - (TEN * ONE_KB));
|
||||||
|
iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
iStream.seek(fileSize - (TWENTY * ONE_KB));
|
||||||
|
iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
metricMap = getInstrumentationMap(fs);
|
||||||
|
long requestsMadeAfterTest = metricMap
|
||||||
|
.get(CONNECTIONS_MADE.getStatName());
|
||||||
|
|
||||||
|
if (optimizeFooterRead) {
|
||||||
|
assertEquals(1, requestsMadeAfterTest - requestsMadeBeforeTest);
|
||||||
|
} else {
|
||||||
|
assertEquals(3, requestsMadeAfterTest - requestsMadeBeforeTest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeginAndReadWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(true, SeekTo.BEGIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeginAndReadWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(false, SeekTo.BEGIN);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeforeFooterAndReadWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(true, SeekTo.BEFORE_FOOTER_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeforeFooterAndReadWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(false, SeekTo.BEFORE_FOOTER_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToFooterAndReadWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(true, SeekTo.AT_FOOTER_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToFooterAndReadWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(false, SeekTo.AT_FOOTER_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToAfterFooterAndReadWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(true, SeekTo.AFTER_FOOTER_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToToAfterFooterAndReadWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(false, SeekTo.AFTER_FOOTER_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToEndAndReadWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(true, SeekTo.END);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToEndAndReadWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(false, SeekTo.END);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSeekAndReadWithConf(boolean optimizeFooterRead,
|
||||||
|
SeekTo seekTo) throws Exception {
|
||||||
|
for (int i = 2; i <= 6; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(optimizeFooterRead,
|
||||||
|
fileSize);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
seekReadAndTest(fs, testFilePath, seekPos(seekTo, fileSize), HUNDRED,
|
||||||
|
fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int seekPos(SeekTo seekTo, int fileSize) {
|
||||||
|
if (seekTo == SeekTo.BEGIN) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (seekTo == SeekTo.BEFORE_FOOTER_START) {
|
||||||
|
return fileSize - AbfsInputStream.FOOTER_SIZE - 1;
|
||||||
|
}
|
||||||
|
if (seekTo == SeekTo.AT_FOOTER_START) {
|
||||||
|
return fileSize - AbfsInputStream.FOOTER_SIZE;
|
||||||
|
}
|
||||||
|
if (seekTo == SeekTo.END) {
|
||||||
|
return fileSize - 1;
|
||||||
|
}
|
||||||
|
//seekTo == SeekTo.AFTER_FOOTER_START
|
||||||
|
return fileSize - AbfsInputStream.FOOTER_SIZE + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void seekReadAndTest(final FileSystem fs, final Path testFilePath,
|
||||||
|
final int seekPos, final int length, final byte[] fileContent)
|
||||||
|
throws IOException, NoSuchFieldException, IllegalAccessException {
|
||||||
|
AbfsConfiguration conf = getAbfsStore(fs).getAbfsConfiguration();
|
||||||
|
long actualContentLength = fileContent.length;
|
||||||
|
try (FSDataInputStream iStream = fs.open(testFilePath)) {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
long bufferSize = abfsInputStream.getBufferSize();
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
long bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
long footerStart = max(0,
|
||||||
|
actualContentLength - AbfsInputStream.FOOTER_SIZE);
|
||||||
|
boolean optimizationOn =
|
||||||
|
conf.optimizeFooterRead() && seekPos >= footerStart;
|
||||||
|
|
||||||
|
long actualLength = length;
|
||||||
|
if (seekPos + length > actualContentLength) {
|
||||||
|
long delta = seekPos + length - actualContentLength;
|
||||||
|
actualLength = length - delta;
|
||||||
|
}
|
||||||
|
long expectedLimit;
|
||||||
|
long expectedBCurson;
|
||||||
|
long expectedFCursor;
|
||||||
|
if (optimizationOn) {
|
||||||
|
if (actualContentLength <= bufferSize) {
|
||||||
|
expectedLimit = actualContentLength;
|
||||||
|
expectedBCurson = seekPos + actualLength;
|
||||||
|
} else {
|
||||||
|
expectedLimit = bufferSize;
|
||||||
|
long lastBlockStart = max(0, actualContentLength - bufferSize);
|
||||||
|
expectedBCurson = seekPos - lastBlockStart + actualLength;
|
||||||
|
}
|
||||||
|
expectedFCursor = actualContentLength;
|
||||||
|
} else {
|
||||||
|
if (seekPos + bufferSize < actualContentLength) {
|
||||||
|
expectedLimit = bufferSize;
|
||||||
|
expectedFCursor = bufferSize;
|
||||||
|
} else {
|
||||||
|
expectedLimit = actualContentLength - seekPos;
|
||||||
|
expectedFCursor = min(seekPos + bufferSize, actualContentLength);
|
||||||
|
}
|
||||||
|
expectedBCurson = actualLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(expectedFCursor, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(expectedFCursor, abfsInputStream.getFCursorAfterLastRead());
|
||||||
|
assertEquals(expectedLimit, abfsInputStream.getLimit());
|
||||||
|
assertEquals(expectedBCurson, abfsInputStream.getBCursor());
|
||||||
|
assertEquals(actualLength, bytesRead);
|
||||||
|
// Verify user-content read
|
||||||
|
assertContentReadCorrectly(fileContent, seekPos, (int) actualLength, buffer);
|
||||||
|
// Verify data read to AbfsInputStream buffer
|
||||||
|
int from = seekPos;
|
||||||
|
if (optimizationOn) {
|
||||||
|
from = (int) max(0, actualContentLength - bufferSize);
|
||||||
|
}
|
||||||
|
assertContentReadCorrectly(fileContent, from, (int) abfsInputStream.getLimit(),
|
||||||
|
abfsInputStream.getBuffer());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPartialReadWithNoData()
|
||||||
|
throws Exception {
|
||||||
|
for (int i = 2; i <= 6; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(true, fileSize);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
testPartialReadWithNoData(fs, testFilePath,
|
||||||
|
fileSize - AbfsInputStream.FOOTER_SIZE, AbfsInputStream.FOOTER_SIZE,
|
||||||
|
fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testPartialReadWithNoData(final FileSystem fs,
|
||||||
|
final Path testFilePath, final int seekPos, final int length,
|
||||||
|
final byte[] fileContent)
|
||||||
|
throws IOException, NoSuchFieldException, IllegalAccessException {
|
||||||
|
FSDataInputStream iStream = fs.open(testFilePath);
|
||||||
|
try {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
abfsInputStream = spy(abfsInputStream);
|
||||||
|
doReturn(10).doReturn(10).doCallRealMethod().when(abfsInputStream)
|
||||||
|
.readRemote(anyLong(), any(), anyInt(), anyInt());
|
||||||
|
|
||||||
|
iStream = new FSDataInputStream(abfsInputStream);
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
assertEquals(length, bytesRead);
|
||||||
|
assertContentReadCorrectly(fileContent, seekPos, length, buffer);
|
||||||
|
assertEquals(fileContent.length, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(length, abfsInputStream.getBCursor());
|
||||||
|
assertTrue(abfsInputStream.getLimit() >= length);
|
||||||
|
} finally {
|
||||||
|
iStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPartialReadWithSomeDat()
|
||||||
|
throws Exception {
|
||||||
|
for (int i = 3; i <= 6; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(true, fileSize);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
testPartialReadWithSomeDat(fs, testFilePath,
|
||||||
|
fileSize - AbfsInputStream.FOOTER_SIZE, AbfsInputStream.FOOTER_SIZE,
|
||||||
|
fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testPartialReadWithSomeDat(final FileSystem fs,
|
||||||
|
final Path testFilePath, final int seekPos, final int length,
|
||||||
|
final byte[] fileContent)
|
||||||
|
throws IOException, NoSuchFieldException, IllegalAccessException {
|
||||||
|
FSDataInputStream iStream = fs.open(testFilePath);
|
||||||
|
try {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
abfsInputStream = spy(abfsInputStream);
|
||||||
|
// first readRemote, will return first 10 bytes
|
||||||
|
// second readRemote returns data till the last 2 bytes
|
||||||
|
int someDataLength = 2;
|
||||||
|
int secondReturnSize =
|
||||||
|
min(fileContent.length, abfsInputStream.getBufferSize()) - 10
|
||||||
|
- someDataLength;
|
||||||
|
doReturn(10).doReturn(secondReturnSize).doCallRealMethod()
|
||||||
|
.when(abfsInputStream)
|
||||||
|
.readRemote(anyLong(), any(), anyInt(), anyInt());
|
||||||
|
|
||||||
|
iStream = new FSDataInputStream(abfsInputStream);
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
assertEquals(length, bytesRead);
|
||||||
|
assertEquals(fileContent.length, abfsInputStream.getFCursor());
|
||||||
|
// someDataLength(2), because in the do-while loop in read, the 2nd loop
|
||||||
|
// will go to readoneblock and that resets the bCursor to 0 as
|
||||||
|
// bCursor == limit finally when the 2 bytes are read bCursor and limit
|
||||||
|
// will be at someDataLength(2)
|
||||||
|
assertEquals(someDataLength, abfsInputStream.getBCursor());
|
||||||
|
assertEquals(someDataLength, abfsInputStream.getLimit());
|
||||||
|
} finally {
|
||||||
|
iStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AzureBlobFileSystem getFileSystem(boolean optimizeFooterRead,
|
||||||
|
int fileSize) throws IOException {
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem();
|
||||||
|
getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.setOptimizeFooterRead(optimizeFooterRead);
|
||||||
|
if (fileSize <= getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.getReadBufferSize()) {
|
||||||
|
getAbfsStore(fs).getAbfsConfiguration()
|
||||||
|
.setReadSmallFilesCompletely(false);
|
||||||
|
}
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum SeekTo {
|
||||||
|
BEGIN, AT_FOOTER_START, BEFORE_FOOTER_START, AFTER_FOOTER_START, END
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,326 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.azurebfs.services;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
|
||||||
|
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyInt;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyLong;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE;
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB;
|
||||||
|
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB;
|
||||||
|
|
||||||
|
public class ITestAbfsInputStreamSmallFileReads extends ITestAbfsInputStream {
|
||||||
|
|
||||||
|
public ITestAbfsInputStreamSmallFileReads() throws Exception {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOnlyOneServerCallIsMadeWhenTheConfIsTrue() throws Exception {
|
||||||
|
testNumBackendCalls(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleServerCallsAreMadeWhenTheConfIsFalse()
|
||||||
|
throws Exception {
|
||||||
|
testNumBackendCalls(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testNumBackendCalls(boolean readSmallFilesCompletely)
|
||||||
|
throws Exception {
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(readSmallFilesCompletely);
|
||||||
|
for (int i = 1; i <= 4; i++) {
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
int length = ONE_KB;
|
||||||
|
try (FSDataInputStream iStream = fs.open(testFilePath)) {
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
|
||||||
|
Map<String, Long> metricMap = getInstrumentationMap(fs);
|
||||||
|
long requestsMadeBeforeTest = metricMap
|
||||||
|
.get(CONNECTIONS_MADE.getStatName());
|
||||||
|
|
||||||
|
iStream.seek(seekPos(SeekTo.END, fileSize, length));
|
||||||
|
iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
iStream.seek(seekPos(SeekTo.MIDDLE, fileSize, length));
|
||||||
|
iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
iStream.seek(seekPos(SeekTo.BEGIN, fileSize, length));
|
||||||
|
iStream.read(buffer, 0, length);
|
||||||
|
|
||||||
|
metricMap = getInstrumentationMap(fs);
|
||||||
|
long requestsMadeAfterTest = metricMap
|
||||||
|
.get(CONNECTIONS_MADE.getStatName());
|
||||||
|
|
||||||
|
if (readSmallFilesCompletely) {
|
||||||
|
assertEquals(1, requestsMadeAfterTest - requestsMadeBeforeTest);
|
||||||
|
} else {
|
||||||
|
assertEquals(3, requestsMadeAfterTest - requestsMadeBeforeTest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeginingAndReadSmallFileWithConfTrue()
|
||||||
|
throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.BEGIN, 2, 4, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeginingAndReadSmallFileWithConfFalse()
|
||||||
|
throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.BEGIN, 2, 4, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeginingAndReadBigFileWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.BEGIN, 5, 6, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToBeginingAndReadBigFileWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.BEGIN, 5, 6, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToEndAndReadSmallFileWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.END, 2, 4, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToEndAndReadSmallFileWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.END, 2, 4, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToEndAndReadBigFileWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.END, 5, 6, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToEndAndReaBigFiledWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.END, 5, 6, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToMiddleAndReadSmallFileWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.MIDDLE, 2, 4, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToMiddleAndReadSmallFileWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.MIDDLE, 2, 4, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToMiddleAndReaBigFileWithConfTrue() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.MIDDLE, 5, 6, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSeekToMiddleAndReadBigFileWithConfFalse() throws Exception {
|
||||||
|
testSeekAndReadWithConf(SeekTo.MIDDLE, 5, 6, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSeekAndReadWithConf(SeekTo seekTo, int startFileSizeInMB,
|
||||||
|
int endFileSizeInMB, boolean readSmallFilesCompletely) throws Exception {
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(readSmallFilesCompletely);
|
||||||
|
for (int i = startFileSizeInMB; i <= endFileSizeInMB; i++) {
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
int length = ONE_KB;
|
||||||
|
int seekPos = seekPos(seekTo, fileSize, length);
|
||||||
|
seekReadAndTest(fs, testFilePath, seekPos, length, fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int seekPos(SeekTo seekTo, int fileSize, int length) {
|
||||||
|
if (seekTo == SeekTo.BEGIN) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (seekTo == SeekTo.END) {
|
||||||
|
return fileSize - length;
|
||||||
|
}
|
||||||
|
return fileSize / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void seekReadAndTest(FileSystem fs, Path testFilePath, int seekPos,
|
||||||
|
int length, byte[] fileContent)
|
||||||
|
throws IOException, NoSuchFieldException, IllegalAccessException {
|
||||||
|
AbfsConfiguration conf = getAbfsStore(fs).getAbfsConfiguration();
|
||||||
|
try (FSDataInputStream iStream = fs.open(testFilePath)) {
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
assertEquals(bytesRead, length);
|
||||||
|
assertContentReadCorrectly(fileContent, seekPos, length, buffer);
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
|
||||||
|
final int readBufferSize = conf.getReadBufferSize();
|
||||||
|
final int fileContentLength = fileContent.length;
|
||||||
|
final boolean smallFile = fileContentLength <= readBufferSize;
|
||||||
|
int expectedLimit, expectedFCursor;
|
||||||
|
int expectedBCursor;
|
||||||
|
if (conf.readSmallFilesCompletely() && smallFile) {
|
||||||
|
assertBuffersAreEqual(fileContent, abfsInputStream.getBuffer(), conf);
|
||||||
|
expectedFCursor = fileContentLength;
|
||||||
|
expectedLimit = fileContentLength;
|
||||||
|
expectedBCursor = seekPos + length;
|
||||||
|
} else {
|
||||||
|
if ((seekPos == 0)) {
|
||||||
|
assertBuffersAreEqual(fileContent, abfsInputStream.getBuffer(), conf);
|
||||||
|
} else {
|
||||||
|
assertBuffersAreNotEqual(fileContent, abfsInputStream.getBuffer(),
|
||||||
|
conf);
|
||||||
|
}
|
||||||
|
expectedBCursor = length;
|
||||||
|
expectedFCursor = (fileContentLength < (seekPos + readBufferSize))
|
||||||
|
? fileContentLength
|
||||||
|
: (seekPos + readBufferSize);
|
||||||
|
expectedLimit = (fileContentLength < (seekPos + readBufferSize))
|
||||||
|
? (fileContentLength - seekPos)
|
||||||
|
: readBufferSize;
|
||||||
|
}
|
||||||
|
assertEquals(expectedFCursor, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(expectedFCursor, abfsInputStream.getFCursorAfterLastRead());
|
||||||
|
assertEquals(expectedBCursor, abfsInputStream.getBCursor());
|
||||||
|
assertEquals(expectedLimit, abfsInputStream.getLimit());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPartialReadWithNoData() throws Exception {
|
||||||
|
for (int i = 2; i <= 4; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(true);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
partialReadWithNoData(fs, testFilePath, fileSize / 2, fileSize / 4,
|
||||||
|
fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void partialReadWithNoData(final FileSystem fs,
|
||||||
|
final Path testFilePath,
|
||||||
|
final int seekPos, final int length, final byte[] fileContent)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
FSDataInputStream iStream = fs.open(testFilePath);
|
||||||
|
try {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
abfsInputStream = spy(abfsInputStream);
|
||||||
|
doReturn(10)
|
||||||
|
.doReturn(10)
|
||||||
|
.doCallRealMethod()
|
||||||
|
.when(abfsInputStream)
|
||||||
|
.readRemote(anyLong(), any(), anyInt(), anyInt());
|
||||||
|
|
||||||
|
iStream = new FSDataInputStream(abfsInputStream);
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
assertEquals(bytesRead, length);
|
||||||
|
assertContentReadCorrectly(fileContent, seekPos, length, buffer);
|
||||||
|
assertEquals(fileContent.length, abfsInputStream.getFCursor());
|
||||||
|
assertEquals(fileContent.length,
|
||||||
|
abfsInputStream.getFCursorAfterLastRead());
|
||||||
|
assertEquals(length, abfsInputStream.getBCursor());
|
||||||
|
assertTrue(abfsInputStream.getLimit() >= length);
|
||||||
|
} finally {
|
||||||
|
iStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPartialReadWithSomeData() throws Exception {
|
||||||
|
for (int i = 2; i <= 4; i++) {
|
||||||
|
int fileSize = i * ONE_MB;
|
||||||
|
final AzureBlobFileSystem fs = getFileSystem(true);
|
||||||
|
String fileName = methodName.getMethodName() + i;
|
||||||
|
byte[] fileContent = getRandomBytesArray(fileSize);
|
||||||
|
Path testFilePath = createFileWithContent(fs, fileName, fileContent);
|
||||||
|
partialReadWithSomeData(fs, testFilePath, fileSize / 2,
|
||||||
|
fileSize / 4, fileContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void partialReadWithSomeData(final FileSystem fs,
|
||||||
|
final Path testFilePath,
|
||||||
|
final int seekPos, final int length, final byte[] fileContent)
|
||||||
|
throws IOException, NoSuchFieldException, IllegalAccessException {
|
||||||
|
FSDataInputStream iStream = fs.open(testFilePath);
|
||||||
|
try {
|
||||||
|
AbfsInputStream abfsInputStream = (AbfsInputStream) iStream
|
||||||
|
.getWrappedStream();
|
||||||
|
abfsInputStream = spy(abfsInputStream);
|
||||||
|
// first readRemote, will return first 10 bytes
|
||||||
|
// second readRemote, seekPos - someDataLength(10) will reach the
|
||||||
|
// seekPos as 10 bytes are already read in the first call. Plus
|
||||||
|
// someDataLength(10)
|
||||||
|
int someDataLength = 10;
|
||||||
|
int secondReturnSize = seekPos - 10 + someDataLength;
|
||||||
|
doReturn(10)
|
||||||
|
.doReturn(secondReturnSize)
|
||||||
|
.doCallRealMethod()
|
||||||
|
.when(abfsInputStream)
|
||||||
|
.readRemote(anyLong(), any(), anyInt(), anyInt());
|
||||||
|
|
||||||
|
iStream = new FSDataInputStream(abfsInputStream);
|
||||||
|
seek(iStream, seekPos);
|
||||||
|
|
||||||
|
byte[] buffer = new byte[length];
|
||||||
|
int bytesRead = iStream.read(buffer, 0, length);
|
||||||
|
assertEquals(length, bytesRead);
|
||||||
|
assertTrue(abfsInputStream.getFCursor() > seekPos + length);
|
||||||
|
assertTrue(abfsInputStream.getFCursorAfterLastRead() > seekPos + length);
|
||||||
|
// Optimized read was no complete but it got some user requested data
|
||||||
|
// from server. So obviously the buffer will contain data more than
|
||||||
|
// seekPos + len
|
||||||
|
assertEquals(length - someDataLength, abfsInputStream.getBCursor());
|
||||||
|
assertTrue(abfsInputStream.getLimit() > length - someDataLength);
|
||||||
|
} finally {
|
||||||
|
iStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum SeekTo {BEGIN, MIDDLE, END}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user