HDFS-3885. QJM: optimize log sync when JN is lagging behind. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-3077@1383039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aa65777ef0
commit
ca4582222e
@ -54,3 +54,5 @@ HDFS-3893. QJM: Make QJM work with security enabled. (atm)
|
|||||||
HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
|
HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
|
||||||
|
|
||||||
HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
|
HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
|
||||||
|
|
||||||
|
HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
|
||||||
|
@ -84,7 +84,7 @@ public void create() throws IOException {
|
|||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
setReadyToFlush();
|
setReadyToFlush();
|
||||||
flushAndSync();
|
flushAndSync(true);
|
||||||
try {
|
try {
|
||||||
lh.close();
|
lh.close();
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
@ -130,7 +130,7 @@ public void setReadyToFlush() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void flushAndSync() throws IOException {
|
public void flushAndSync(boolean durable) throws IOException {
|
||||||
assert(syncLatch != null);
|
assert(syncLatch != null);
|
||||||
try {
|
try {
|
||||||
syncLatch.await();
|
syncLatch.await();
|
||||||
|
@ -77,7 +77,7 @@ public void setReadyToFlush() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void flushAndSync() throws IOException {
|
protected void flushAndSync(boolean durable) throws IOException {
|
||||||
int numReadyBytes = buf.countReadyBytes();
|
int numReadyBytes = buf.countReadyBytes();
|
||||||
if (numReadyBytes > 0) {
|
if (numReadyBytes > 0) {
|
||||||
int numReadyTxns = buf.countReadyTxns();
|
int numReadyTxns = buf.countReadyTxns();
|
||||||
|
@ -301,17 +301,23 @@ synchronized void journal(RequestInfo reqInfo,
|
|||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
|
LOG.trace("Writing txid " + firstTxnId + "-" + lastTxnId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the edit has already been marked as committed, we know
|
||||||
|
// it has been fsynced on a quorum of other nodes, and we are
|
||||||
|
// "catching up" with the rest. Hence we do not need to fsync.
|
||||||
|
boolean isLagging = lastTxnId <= committedTxnId.get();
|
||||||
|
boolean shouldFsync = !isLagging;
|
||||||
|
|
||||||
curSegment.writeRaw(records, 0, records.length);
|
curSegment.writeRaw(records, 0, records.length);
|
||||||
curSegment.setReadyToFlush();
|
curSegment.setReadyToFlush();
|
||||||
Stopwatch sw = new Stopwatch();
|
Stopwatch sw = new Stopwatch();
|
||||||
sw.start();
|
sw.start();
|
||||||
curSegment.flush();
|
curSegment.flush(shouldFsync);
|
||||||
sw.stop();
|
sw.stop();
|
||||||
|
|
||||||
metrics.addSync(sw.elapsedTime(TimeUnit.MICROSECONDS));
|
metrics.addSync(sw.elapsedTime(TimeUnit.MICROSECONDS));
|
||||||
|
|
||||||
if (committedTxnId.get() > lastTxnId) {
|
if (isLagging) {
|
||||||
// This batch of edits has already been committed on a quorum of other
|
// This batch of edits has already been committed on a quorum of other
|
||||||
// nodes. So, we are in "catch up" mode. This gets its own metric.
|
// nodes. So, we are in "catch up" mode. This gets its own metric.
|
||||||
metrics.batchesWrittenWhileLagging.incr(1);
|
metrics.batchesWrittenWhileLagging.incr(1);
|
||||||
|
@ -114,7 +114,7 @@ public void setReadyToFlush() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override // EditLogOutputStream
|
@Override // EditLogOutputStream
|
||||||
protected void flushAndSync() throws IOException {
|
protected void flushAndSync(boolean durable) throws IOException {
|
||||||
assert out.getLength() == 0 : "Output buffer is not empty";
|
assert out.getLength() == 0 : "Output buffer is not empty";
|
||||||
|
|
||||||
int numReadyTxns = doubleBuf.countReadyTxns();
|
int numReadyTxns = doubleBuf.countReadyTxns();
|
||||||
|
@ -176,7 +176,7 @@ public void setReadyToFlush() throws IOException {
|
|||||||
* accumulates new log records while readyBuffer will be flushed and synced.
|
* accumulates new log records while readyBuffer will be flushed and synced.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void flushAndSync() throws IOException {
|
public void flushAndSync(boolean durable) throws IOException {
|
||||||
if (fp == null) {
|
if (fp == null) {
|
||||||
throw new IOException("Trying to use aborted output stream");
|
throw new IOException("Trying to use aborted output stream");
|
||||||
}
|
}
|
||||||
@ -186,7 +186,7 @@ public void flushAndSync() throws IOException {
|
|||||||
}
|
}
|
||||||
preallocate(); // preallocate file if necessay
|
preallocate(); // preallocate file if necessay
|
||||||
doubleBuf.flushTo(fp);
|
doubleBuf.flushTo(fp);
|
||||||
if (!shouldSkipFsyncForTests) {
|
if (durable && !shouldSkipFsyncForTests) {
|
||||||
fc.force(false); // metadata updates not needed
|
fc.force(false); // metadata updates not needed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,18 +93,24 @@ abstract public void writeRaw(byte[] bytes, int offset, int length)
|
|||||||
/**
|
/**
|
||||||
* Flush and sync all data that is ready to be flush
|
* Flush and sync all data that is ready to be flush
|
||||||
* {@link #setReadyToFlush()} into underlying persistent store.
|
* {@link #setReadyToFlush()} into underlying persistent store.
|
||||||
|
* @param durable if true, the edits should be made truly durable before
|
||||||
|
* returning
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
abstract protected void flushAndSync() throws IOException;
|
abstract protected void flushAndSync(boolean durable) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Flush data to persistent store.
|
* Flush data to persistent store.
|
||||||
* Collect sync metrics.
|
* Collect sync metrics.
|
||||||
*/
|
*/
|
||||||
public void flush() throws IOException {
|
public void flush() throws IOException {
|
||||||
|
flush(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flush(boolean durable) throws IOException {
|
||||||
numSync++;
|
numSync++;
|
||||||
long start = now();
|
long start = now();
|
||||||
flushAndSync();
|
flushAndSync(durable);
|
||||||
long end = now();
|
long end = now();
|
||||||
totalTimeSync += (end - start);
|
totalTimeSync += (end - start);
|
||||||
}
|
}
|
||||||
|
@ -471,12 +471,12 @@ public void apply(JournalAndStream jas) throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void flushAndSync() throws IOException {
|
protected void flushAndSync(final boolean durable) throws IOException {
|
||||||
mapJournalsAndReportErrors(new JournalClosure() {
|
mapJournalsAndReportErrors(new JournalClosure() {
|
||||||
@Override
|
@Override
|
||||||
public void apply(JournalAndStream jas) throws IOException {
|
public void apply(JournalAndStream jas) throws IOException {
|
||||||
if (jas.isActive()) {
|
if (jas.isActive()) {
|
||||||
jas.getCurrentStream().flushAndSync();
|
jas.getCurrentStream().flushAndSync(durable);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, "flushAndSync");
|
}, "flushAndSync");
|
||||||
|
@ -56,7 +56,7 @@ public void start(int version) throws IOException {
|
|||||||
@Override
|
@Override
|
||||||
public void close(Throwable error) throws IOException {
|
public void close(Throwable error) throws IOException {
|
||||||
elfos.setReadyToFlush();
|
elfos.setReadyToFlush();
|
||||||
elfos.flushAndSync();
|
elfos.flushAndSync(true);
|
||||||
elfos.close();
|
elfos.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1222,7 +1222,7 @@ static void validateNoCrash(byte garbage[]) throws IOException {
|
|||||||
elfos.create();
|
elfos.create();
|
||||||
elfos.writeRaw(garbage, 0, garbage.length);
|
elfos.writeRaw(garbage, 0, garbage.length);
|
||||||
elfos.setReadyToFlush();
|
elfos.setReadyToFlush();
|
||||||
elfos.flushAndSync();
|
elfos.flushAndSync(true);
|
||||||
elfos.close();
|
elfos.close();
|
||||||
elfos = null;
|
elfos = null;
|
||||||
file = new File(TEST_LOG_NAME);
|
file = new File(TEST_LOG_NAME);
|
||||||
|
@ -55,7 +55,7 @@ public void deleteEditsFile() {
|
|||||||
static void flushAndCheckLength(EditLogFileOutputStream elos,
|
static void flushAndCheckLength(EditLogFileOutputStream elos,
|
||||||
long expectedLength) throws IOException {
|
long expectedLength) throws IOException {
|
||||||
elos.setReadyToFlush();
|
elos.setReadyToFlush();
|
||||||
elos.flushAndSync();
|
elos.flushAndSync(true);
|
||||||
assertEquals(expectedLength, elos.getFile().length());
|
assertEquals(expectedLength, elos.getFile().length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ static void runEditLogTest(EditLogTestSetup elts) throws IOException {
|
|||||||
|
|
||||||
elts.addTransactionsToLog(elfos, cache);
|
elts.addTransactionsToLog(elfos, cache);
|
||||||
elfos.setReadyToFlush();
|
elfos.setReadyToFlush();
|
||||||
elfos.flushAndSync();
|
elfos.flushAndSync(true);
|
||||||
elfos.close();
|
elfos.close();
|
||||||
elfos = null;
|
elfos = null;
|
||||||
file = new File(TEST_LOG_NAME);
|
file = new File(TEST_LOG_NAME);
|
||||||
|
Loading…
Reference in New Issue
Block a user