HDFS-9533. seen_txid in the shared edits directory is modified during bootstrapping. Contributed by Kihwal Lee.
This commit is contained in:
parent
03bab8dea1
commit
5cb1e0118b
@ -2540,6 +2540,9 @@ Release 2.7.3 - UNRELEASED
|
||||
HDFS-9516. Truncate file fails with data dirs on multiple disks.
|
||||
(Plamen Jeliazkov via shv)
|
||||
|
||||
HDFS-9533. seen_txid in the shared edits directory is modified during
|
||||
bootstrapping (kihwal)
|
||||
|
||||
Release 2.7.2 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -482,8 +482,24 @@ long getMostRecentCheckpointTime() {
|
||||
* @param txid the txid that has been reached
|
||||
*/
|
||||
public void writeTransactionIdFileToStorage(long txid) {
|
||||
writeTransactionIdFileToStorage(txid, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a small file in all available storage directories that
|
||||
* indicates that the namespace has reached some given transaction ID.
|
||||
*
|
||||
* This is used when the image is loaded to avoid accidental rollbacks
|
||||
* in the case where an edit log is fully deleted but there is no
|
||||
* checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
|
||||
* @param txid the txid that has been reached
|
||||
* @param type the type of directory
|
||||
*/
|
||||
public void writeTransactionIdFileToStorage(long txid,
|
||||
NameNodeDirType type) {
|
||||
// Write txid marker in all storage directories
|
||||
for (StorageDirectory sd : storageDirs) {
|
||||
for (Iterator<StorageDirectory> it = dirIterator(type); it.hasNext();) {
|
||||
StorageDirectory sd = it.next();
|
||||
try {
|
||||
writeTransactionIdFile(sd, txid);
|
||||
} catch(IOException e) {
|
||||
|
@ -51,6 +51,7 @@
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSImage;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNUpgradeUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
|
||||
@ -329,13 +330,14 @@ private int downloadImage(NNStorage storage, NamenodeProtocol proxy, RemoteNameN
|
||||
return ERR_CODE_LOGS_UNAVAILABLE;
|
||||
}
|
||||
|
||||
image.getStorage().writeTransactionIdFileToStorage(curTxId);
|
||||
|
||||
// Download that checkpoint into our storage directories.
|
||||
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
||||
proxyInfo.getHttpAddress(), imageTxId, storage, true, true);
|
||||
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
|
||||
hash);
|
||||
|
||||
// Write seen_txid to the formatted image directories.
|
||||
storage.writeTransactionIdFileToStorage(imageTxId, NameNodeDirType.IMAGE);
|
||||
} catch (IOException ioe) {
|
||||
throw ioe;
|
||||
} finally {
|
||||
|
@ -568,4 +568,11 @@ public static void assertNNFilesMatch(MiniDFSCluster cluster) throws Exception {
|
||||
FSImageTestUtil.assertParallelFilesAreIdentical(curDirs,
|
||||
ignoredFiles);
|
||||
}
|
||||
|
||||
public static long getStorageTxId(NameNode node, URI storageUri)
|
||||
throws IOException {
|
||||
StorageDirectory sDir = getFSImage(node).getStorage().
|
||||
getStorageDirectory(storageUri);
|
||||
return NNStorage.readTransactionIdFile(sDir);
|
||||
}
|
||||
}
|
||||
|
@ -146,6 +146,13 @@ public void testDownloadingLaterCheckpoint() throws Exception {
|
||||
.getFSImage().getMostRecentCheckpointTxId();
|
||||
assertEquals(6, expectedCheckpointTxId);
|
||||
|
||||
// advance the current txid
|
||||
cluster.getFileSystem(0).create(new Path("/test_txid"), (short)1).close();
|
||||
|
||||
// obtain the content of seen_txid
|
||||
URI editsUri = cluster.getSharedEditsDir(0, maxNNCount - 1);
|
||||
long seen_txid_shared = FSImageTestUtil.getStorageTxId(nn0, editsUri);
|
||||
|
||||
for (int i = 1; i < maxNNCount; i++) {
|
||||
assertEquals(0, forceBootstrap(i));
|
||||
|
||||
@ -156,6 +163,10 @@ public void testDownloadingLaterCheckpoint() throws Exception {
|
||||
}
|
||||
FSImageTestUtil.assertNNFilesMatch(cluster);
|
||||
|
||||
// Make sure the seen_txid was not modified by the standby
|
||||
assertEquals(seen_txid_shared,
|
||||
FSImageTestUtil.getStorageTxId(nn0, editsUri));
|
||||
|
||||
// We should now be able to start the standby successfully.
|
||||
restartNameNodesFromIndex(1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user