HDFS-7185. The active NameNode will not accept an fsimage sent from the standby during rolling upgrade. Contributed by Jing Zhao.
This commit is contained in:
parent
b9edad6403
commit
18620649f9
@ -956,6 +956,9 @@ Release 2.6.0 - UNRELEASED
|
||||
HDFS-7237. The command "hdfs namenode -rollingUpgrade" throws
|
||||
ArrayIndexOutOfBoundsException. (szetszwo)
|
||||
|
||||
HDFS-7185. The active NameNode will not accept an fsimage sent from the
|
||||
standby during rolling upgrade. (jing9)
|
||||
|
||||
BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
|
||||
|
||||
HDFS-6387. HDFS CLI admin tool for creating & deleting an
|
||||
|
@ -322,7 +322,7 @@ private boolean recoverStorageDirs(StartupOption startOpt,
|
||||
if (curState != StorageState.NOT_FORMATTED
|
||||
&& startOpt != StartupOption.ROLLBACK) {
|
||||
// read and verify consistency with other directories
|
||||
storage.readProperties(sd);
|
||||
storage.readProperties(sd, startOpt);
|
||||
isFormatted = true;
|
||||
}
|
||||
if (startOpt == StartupOption.IMPORT && isFormatted)
|
||||
@ -563,7 +563,7 @@ void openEditLogForWrite() throws IOException {
|
||||
assert editLog != null : "editLog must be initialized";
|
||||
editLog.openForWrite();
|
||||
storage.writeTransactionIdFileToStorage(editLog.getCurSegmentTxId());
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Toss the current image and namesystem, reloading from the specified
|
||||
@ -572,7 +572,7 @@ void openEditLogForWrite() throws IOException {
|
||||
void reloadFromImageFile(File file, FSNamesystem target) throws IOException {
|
||||
target.clear();
|
||||
LOG.debug("Reloading namespace from " + file);
|
||||
loadFSImage(file, target, null);
|
||||
loadFSImage(file, target, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -603,7 +603,8 @@ private boolean loadFSImage(FSNamesystem target, StartupOption startOpt,
|
||||
// otherwise we can load from both IMAGE and IMAGE_ROLLBACK
|
||||
nnfs = EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK);
|
||||
}
|
||||
final FSImageStorageInspector inspector = storage.readAndInspectDirs(nnfs);
|
||||
final FSImageStorageInspector inspector = storage
|
||||
.readAndInspectDirs(nnfs, startOpt);
|
||||
|
||||
isUpgradeFinalized = inspector.isUpgradeFinalized();
|
||||
List<FSImageFile> imageFiles = inspector.getLatestImages();
|
||||
@ -659,7 +660,7 @@ LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
|
||||
for (int i = 0; i < imageFiles.size(); i++) {
|
||||
try {
|
||||
imageFile = imageFiles.get(i);
|
||||
loadFSImageFile(target, recovery, imageFile);
|
||||
loadFSImageFile(target, recovery, imageFile, startOpt);
|
||||
break;
|
||||
} catch (IOException ioe) {
|
||||
LOG.error("Failed to load image from " + imageFile, ioe);
|
||||
@ -712,16 +713,18 @@ private void rollingRollback(long discardSegmentTxId, long ckptId)
|
||||
}
|
||||
|
||||
void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery,
|
||||
FSImageFile imageFile) throws IOException {
|
||||
FSImageFile imageFile, StartupOption startupOption) throws IOException {
|
||||
LOG.debug("Planning to load image :\n" + imageFile);
|
||||
StorageDirectory sdForProperties = imageFile.sd;
|
||||
storage.readProperties(sdForProperties);
|
||||
storage.readProperties(sdForProperties, startupOption);
|
||||
|
||||
if (NameNodeLayoutVersion.supports(
|
||||
LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
|
||||
// For txid-based layout, we should have a .md5 file
|
||||
// next to the image file
|
||||
loadFSImage(imageFile.getFile(), target, recovery);
|
||||
boolean isRollingRollback = RollingUpgradeStartupOption.ROLLBACK
|
||||
.matches(startupOption);
|
||||
loadFSImage(imageFile.getFile(), target, recovery, isRollingRollback);
|
||||
} else if (NameNodeLayoutVersion.supports(
|
||||
LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
|
||||
// In 0.22, we have the checksum stored in the VERSION file.
|
||||
@ -733,10 +736,11 @@ LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
|
||||
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
|
||||
" not set for storage directory " + sdForProperties.getRoot());
|
||||
}
|
||||
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery);
|
||||
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery,
|
||||
false);
|
||||
} else {
|
||||
// We don't have any record of the md5sum
|
||||
loadFSImage(imageFile.getFile(), null, target, recovery);
|
||||
loadFSImage(imageFile.getFile(), null, target, recovery, false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -894,13 +898,15 @@ private static void updateCountForQuotaRecursively(INodeDirectory dir,
|
||||
* it against the MD5 sum stored in its associated .md5 file.
|
||||
*/
|
||||
private void loadFSImage(File imageFile, FSNamesystem target,
|
||||
MetaRecoveryContext recovery) throws IOException {
|
||||
MetaRecoveryContext recovery, boolean requireSameLayoutVersion)
|
||||
throws IOException {
|
||||
MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile);
|
||||
if (expectedMD5 == null) {
|
||||
throw new IOException("No MD5 file found corresponding to image file "
|
||||
+ imageFile);
|
||||
}
|
||||
loadFSImage(imageFile, expectedMD5, target, recovery);
|
||||
loadFSImage(imageFile, expectedMD5, target, recovery,
|
||||
requireSameLayoutVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -908,13 +914,14 @@ private void loadFSImage(File imageFile, FSNamesystem target,
|
||||
* filenames and blocks.
|
||||
*/
|
||||
private void loadFSImage(File curFile, MD5Hash expectedMd5,
|
||||
FSNamesystem target, MetaRecoveryContext recovery) throws IOException {
|
||||
FSNamesystem target, MetaRecoveryContext recovery,
|
||||
boolean requireSameLayoutVersion) throws IOException {
|
||||
// BlockPoolId is required when the FsImageLoader loads the rolling upgrade
|
||||
// information. Make sure the ID is properly set.
|
||||
target.setBlockPoolId(this.getBlockPoolID());
|
||||
|
||||
FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target);
|
||||
loader.load(curFile);
|
||||
loader.load(curFile, requireSameLayoutVersion);
|
||||
|
||||
// Check that the image digest we loaded matches up with what
|
||||
// we expected
|
||||
@ -1033,7 +1040,7 @@ public synchronized void updateStorageVersion() throws IOException {
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #saveNamespace(FSNamesystem, Canceler)
|
||||
* @see #saveNamespace(FSNamesystem, NameNodeFile, Canceler)
|
||||
*/
|
||||
public synchronized void saveNamespace(FSNamesystem source)
|
||||
throws IOException {
|
||||
@ -1072,7 +1079,7 @@ public synchronized void saveNamespace(FSNamesystem source, NameNodeFile nnf,
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #saveFSImageInAllDirs(FSNamesystem, long, Canceler)
|
||||
* @see #saveFSImageInAllDirs(FSNamesystem, NameNodeFile, long, Canceler)
|
||||
*/
|
||||
protected synchronized void saveFSImageInAllDirs(FSNamesystem source, long txid)
|
||||
throws IOException {
|
||||
|
@ -209,7 +209,8 @@ public long getLoadedImageTxId() {
|
||||
return impl.getLoadedImageTxId();
|
||||
}
|
||||
|
||||
public void load(File file) throws IOException {
|
||||
public void load(File file, boolean requireSameLayoutVersion)
|
||||
throws IOException {
|
||||
Preconditions.checkState(impl == null, "Image already loaded!");
|
||||
|
||||
FileInputStream is = null;
|
||||
@ -219,7 +220,7 @@ public void load(File file) throws IOException {
|
||||
IOUtils.readFully(is, magic, 0, magic.length);
|
||||
if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
|
||||
FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
|
||||
conf, fsn);
|
||||
conf, fsn, requireSameLayoutVersion);
|
||||
impl = loader;
|
||||
loader.load(file);
|
||||
} else {
|
||||
@ -227,7 +228,6 @@ public void load(File file) throws IOException {
|
||||
impl = loader;
|
||||
loader.load(file);
|
||||
}
|
||||
|
||||
} finally {
|
||||
IOUtils.cleanup(LOG, is);
|
||||
}
|
||||
|
@ -42,9 +42,12 @@
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
|
||||
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
|
||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
||||
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
|
||||
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
|
||||
@ -139,11 +142,19 @@ public static final class Loader implements FSImageFormat.AbstractLoader {
|
||||
private MD5Hash imgDigest;
|
||||
/** The transaction ID of the last edit represented by the loaded file */
|
||||
private long imgTxId;
|
||||
/**
|
||||
* Whether the image's layout version must be the same with
|
||||
* {@link HdfsConstants#NAMENODE_LAYOUT_VERSION}. This is only set to true
|
||||
* when we're doing (rollingUpgrade rollback).
|
||||
*/
|
||||
private final boolean requireSameLayoutVersion;
|
||||
|
||||
Loader(Configuration conf, FSNamesystem fsn) {
|
||||
Loader(Configuration conf, FSNamesystem fsn,
|
||||
boolean requireSameLayoutVersion) {
|
||||
this.conf = conf;
|
||||
this.fsn = fsn;
|
||||
this.ctx = new LoaderContext();
|
||||
this.requireSameLayoutVersion = requireSameLayoutVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -181,6 +192,12 @@ private void loadInternal(RandomAccessFile raFile, FileInputStream fin)
|
||||
throw new IOException("Unrecognized file format");
|
||||
}
|
||||
FileSummary summary = FSImageUtil.loadSummary(raFile);
|
||||
if (requireSameLayoutVersion && summary.getLayoutVersion() !=
|
||||
HdfsConstants.NAMENODE_LAYOUT_VERSION) {
|
||||
throw new IOException("Image version " + summary.getLayoutVersion() +
|
||||
" is not equal to the software version " +
|
||||
HdfsConstants.NAMENODE_LAYOUT_VERSION);
|
||||
}
|
||||
|
||||
FileChannel channel = fin.getChannel();
|
||||
|
||||
|
@ -1021,7 +1021,8 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
|
||||
MetaRecoveryContext recovery = startOpt.createRecoveryContext();
|
||||
final boolean staleImage
|
||||
= fsImage.recoverTransitionRead(startOpt, this, recovery);
|
||||
if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
|
||||
if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) ||
|
||||
RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) {
|
||||
rollingUpgradeInfo = null;
|
||||
}
|
||||
final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
|
||||
@ -1031,6 +1032,8 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
|
||||
if (needToSave) {
|
||||
fsImage.saveNamespace(this);
|
||||
} else {
|
||||
updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(),
|
||||
startOpt);
|
||||
// No need to save, so mark the phase done.
|
||||
StartupProgress prog = NameNode.getStartupProgress();
|
||||
prog.beginPhase(Phase.SAVING_CHECKPOINT);
|
||||
@ -1052,6 +1055,18 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
|
||||
imageLoadComplete();
|
||||
}
|
||||
|
||||
private void updateStorageVersionForRollingUpgrade(final long layoutVersion,
|
||||
StartupOption startOpt) throws IOException {
|
||||
boolean rollingStarted = RollingUpgradeStartupOption.STARTED
|
||||
.matches(startOpt) && layoutVersion > HdfsConstants
|
||||
.NAMENODE_LAYOUT_VERSION;
|
||||
boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK
|
||||
.matches(startOpt);
|
||||
if (rollingRollback || rollingStarted) {
|
||||
fsImage.updateStorageVersion();
|
||||
}
|
||||
}
|
||||
|
||||
private void startSecretManager() {
|
||||
if (dtSecretManager != null) {
|
||||
try {
|
||||
|
@ -39,9 +39,11 @@
|
||||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
|
||||
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
|
||||
import org.apache.hadoop.hdfs.server.common.Util;
|
||||
@ -620,6 +622,23 @@ LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
|
||||
setDeprecatedPropertiesForUpgrade(props);
|
||||
}
|
||||
|
||||
void readProperties(StorageDirectory sd, StartupOption startupOption)
|
||||
throws IOException {
|
||||
Properties props = readPropertiesFile(sd.getVersionFile());
|
||||
if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
|
||||
(startupOption)) {
|
||||
int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
|
||||
if (lv > getServiceLayoutVersion()) {
|
||||
// we should not use a newer version for rollingUpgrade rollback
|
||||
throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
|
||||
"storage directory " + sd.getRoot().getAbsolutePath());
|
||||
}
|
||||
props.setProperty("layoutVersion",
|
||||
Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
|
||||
}
|
||||
setFieldsFromProperties(props, sd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull any properties out of the VERSION file that are from older
|
||||
* versions of HDFS and only necessary during upgrade.
|
||||
@ -1002,8 +1021,8 @@ void inspectStorageDirs(FSImageStorageInspector inspector)
|
||||
* <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
|
||||
* @throws IOException if no valid storage dirs are found or no valid layout version
|
||||
*/
|
||||
FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
|
||||
throws IOException {
|
||||
FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
|
||||
StartupOption startupOption) throws IOException {
|
||||
Integer layoutVersion = null;
|
||||
boolean multipleLV = false;
|
||||
StringBuilder layoutVersions = new StringBuilder();
|
||||
@ -1016,7 +1035,7 @@ FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
|
||||
FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
|
||||
continue;
|
||||
}
|
||||
readProperties(sd); // sets layoutVersion
|
||||
readProperties(sd, startupOption); // sets layoutVersion
|
||||
int lv = getLayoutVersion();
|
||||
if (layoutVersion == null) {
|
||||
layoutVersion = Integer.valueOf(lv);
|
||||
|
@ -157,7 +157,7 @@ private void loadFSImageFromTempFile(File imageFile) throws IOException {
|
||||
fsn.writeLock();
|
||||
fsn.getFSDirectory().writeLock();
|
||||
try {
|
||||
loader.load(imageFile);
|
||||
loader.load(imageFile, false);
|
||||
FSImage.updateCountForQuota(
|
||||
INodeDirectory.valueOf(fsn.getFSDirectory().getINode("/"), "/"));
|
||||
} finally {
|
||||
|
Loading…
Reference in New Issue
Block a user