HDFS-6029. Secondary NN fails to checkpoint after -rollingUpgrade prepare. Contributed by Jing Zhao.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1572800 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9b81883952
commit
e9a17c8ce0
@ -117,3 +117,6 @@ HDFS-5535 subtasks:
|
|||||||
|
|
||||||
HDFS-6026. Fix TestDFSUpgrade and TestDataNodeRollingUpgrade.
|
HDFS-6026. Fix TestDFSUpgrade and TestDataNodeRollingUpgrade.
|
||||||
(jing9 via szetszwo)
|
(jing9 via szetszwo)
|
||||||
|
|
||||||
|
HDFS-6029. Secondary NN fails to checkpoint after -rollingUpgrade prepare.
|
||||||
|
(jing9)
|
||||||
|
@ -221,8 +221,8 @@ void doCheckpoint() throws IOException {
|
|||||||
LOG.info("Unable to roll forward using only logs. Downloading " +
|
LOG.info("Unable to roll forward using only logs. Downloading " +
|
||||||
"image with txid " + sig.mostRecentCheckpointTxId);
|
"image with txid " + sig.mostRecentCheckpointTxId);
|
||||||
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
||||||
backupNode.nnHttpAddress, NameNodeFile.IMAGE,
|
backupNode.nnHttpAddress, sig.mostRecentCheckpointTxId, bnStorage,
|
||||||
sig.mostRecentCheckpointTxId, bnStorage, true);
|
true);
|
||||||
bnImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE,
|
bnImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE,
|
||||||
sig.mostRecentCheckpointTxId, downloadedHash);
|
sig.mostRecentCheckpointTxId, downloadedHash);
|
||||||
lastApplied = sig.mostRecentCheckpointTxId;
|
lastApplied = sig.mostRecentCheckpointTxId;
|
||||||
|
@ -122,14 +122,14 @@ public void doGet(final HttpServletRequest request,
|
|||||||
public Void run() throws Exception {
|
public Void run() throws Exception {
|
||||||
if (parsedParams.isGetImage()) {
|
if (parsedParams.isGetImage()) {
|
||||||
long txid = parsedParams.getTxId();
|
long txid = parsedParams.getTxId();
|
||||||
final NameNodeFile nnf = parsedParams.getNameNodeFile();
|
|
||||||
File imageFile = null;
|
File imageFile = null;
|
||||||
String errorMessage = "Could not find image";
|
String errorMessage = "Could not find image";
|
||||||
if (parsedParams.shouldFetchLatest()) {
|
if (parsedParams.shouldFetchLatest()) {
|
||||||
imageFile = nnImage.getStorage().getHighestFsImageName();
|
imageFile = nnImage.getStorage().getHighestFsImageName();
|
||||||
} else {
|
} else {
|
||||||
errorMessage += " with txid " + txid;
|
errorMessage += " with txid " + txid;
|
||||||
imageFile = nnImage.getStorage().getFsImageName(txid, nnf);
|
imageFile = nnImage.getStorage().getFsImage(txid,
|
||||||
|
EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK));
|
||||||
}
|
}
|
||||||
if (imageFile == null) {
|
if (imageFile == null) {
|
||||||
throw new IOException(errorMessage);
|
throw new IOException(errorMessage);
|
||||||
@ -183,7 +183,7 @@ public Void run() throws Exception {
|
|||||||
// issue a HTTP get request to download the new fsimage
|
// issue a HTTP get request to download the new fsimage
|
||||||
MD5Hash downloadImageDigest = TransferFsImage
|
MD5Hash downloadImageDigest = TransferFsImage
|
||||||
.downloadImageToStorage(parsedParams.getInfoServer(conf),
|
.downloadImageToStorage(parsedParams.getInfoServer(conf),
|
||||||
nnf, txid, nnImage.getStorage(), true);
|
txid, nnImage.getStorage(), true);
|
||||||
nnImage.saveDigestAndRenameCheckpointImage(nnf, txid,
|
nnImage.saveDigestAndRenameCheckpointImage(nnf, txid,
|
||||||
downloadImageDigest);
|
downloadImageDigest);
|
||||||
if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
|
if (nnf == NameNodeFile.IMAGE_ROLLBACK) {
|
||||||
@ -324,8 +324,10 @@ static String getParamStringForMostRecentImage() {
|
|||||||
|
|
||||||
static String getParamStringForImage(NameNodeFile nnf, long txid,
|
static String getParamStringForImage(NameNodeFile nnf, long txid,
|
||||||
StorageInfo remoteStorageInfo) {
|
StorageInfo remoteStorageInfo) {
|
||||||
|
final String imageType = nnf == null ? "" : "&" + IMAGE_FILE_TYPE + "="
|
||||||
|
+ nnf.name();
|
||||||
return "getimage=1&" + TXID_PARAM + "=" + txid
|
return "getimage=1&" + TXID_PARAM + "=" + txid
|
||||||
+ "&" + IMAGE_FILE_TYPE + "=" + nnf.name()
|
+ imageType
|
||||||
+ "&" + STORAGEINFO_PARAM + "=" +
|
+ "&" + STORAGEINFO_PARAM + "=" +
|
||||||
remoteStorageInfo.toColonSeparatedString();
|
remoteStorageInfo.toColonSeparatedString();
|
||||||
}
|
}
|
||||||
|
@ -514,6 +514,24 @@ public File getFsImageName(long txid, NameNodeFile nnf) {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The first image file whose txid is the same with the given txid and
|
||||||
|
* image type is one of the given types.
|
||||||
|
*/
|
||||||
|
public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
|
||||||
|
for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
|
||||||
|
it.hasNext();) {
|
||||||
|
StorageDirectory sd = it.next();
|
||||||
|
for (NameNodeFile nnf : nnfs) {
|
||||||
|
File fsImage = getStorageFile(sd, nnf, txid);
|
||||||
|
if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
|
||||||
|
return fsImage;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
public File getFsImageName(long txid) {
|
public File getFsImageName(long txid) {
|
||||||
return getFsImageName(txid, NameNodeFile.IMAGE);
|
return getFsImageName(txid, NameNodeFile.IMAGE);
|
||||||
}
|
}
|
||||||
|
@ -445,7 +445,7 @@ public Boolean run() throws Exception {
|
|||||||
} else {
|
} else {
|
||||||
LOG.info("Image has changed. Downloading updated image from NN.");
|
LOG.info("Image has changed. Downloading updated image from NN.");
|
||||||
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
MD5Hash downloadedHash = TransferFsImage.downloadImageToStorage(
|
||||||
nnHostPort, NameNodeFile.IMAGE, sig.mostRecentCheckpointTxId,
|
nnHostPort, sig.mostRecentCheckpointTxId,
|
||||||
dstImage.getStorage(), true);
|
dstImage.getStorage(), true);
|
||||||
dstImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE,
|
dstImage.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE,
|
||||||
sig.mostRecentCheckpointTxId, downloadedHash);
|
sig.mostRecentCheckpointTxId, downloadedHash);
|
||||||
|
@ -87,10 +87,9 @@ public static void downloadMostRecentImageToDirectory(URL infoServer,
|
|||||||
null, false);
|
null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MD5Hash downloadImageToStorage(URL fsName, NameNodeFile nnf,
|
public static MD5Hash downloadImageToStorage(URL fsName, long imageTxId,
|
||||||
long imageTxId, Storage dstStorage, boolean needDigest)
|
Storage dstStorage, boolean needDigest) throws IOException {
|
||||||
throws IOException {
|
String fileid = GetImageServlet.getParamStringForImage(null,
|
||||||
String fileid = GetImageServlet.getParamStringForImage(nnf,
|
|
||||||
imageTxId, dstStorage);
|
imageTxId, dstStorage);
|
||||||
String fileName = NNStorage.getCheckpointImageFileName(imageTxId);
|
String fileName = NNStorage.getCheckpointImageFileName(imageTxId);
|
||||||
|
|
||||||
|
@ -208,8 +208,7 @@ private int doRun() throws IOException {
|
|||||||
|
|
||||||
// Download that checkpoint into our storage directories.
|
// Download that checkpoint into our storage directories.
|
||||||
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
MD5Hash hash = TransferFsImage.downloadImageToStorage(
|
||||||
otherHttpAddr, NameNodeFile.IMAGE, imageTxId,
|
otherHttpAddr, imageTxId, storage, true);
|
||||||
storage, true);
|
|
||||||
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
|
image.saveDigestAndRenameCheckpointImage(NameNodeFile.IMAGE, imageTxId,
|
||||||
hash);
|
hash);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
|
@ -35,7 +35,9 @@
|
|||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSImage;
|
import org.apache.hadoop.hdfs.server.namenode.FSImage;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode;
|
||||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -492,4 +494,47 @@ static void queryForPreparation(DistributedFileSystem dfs) throws IOException,
|
|||||||
Assert.fail("Query return false");
|
Assert.fail("Query return false");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In non-HA setup, after rolling upgrade prepare, the Secondary NN should
|
||||||
|
* still be able to do checkpoint
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCheckpointWithSNN() throws Exception {
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
DistributedFileSystem dfs = null;
|
||||||
|
SecondaryNameNode snn = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
|
||||||
|
"0.0.0.0:0");
|
||||||
|
snn = new SecondaryNameNode(conf);
|
||||||
|
|
||||||
|
dfs = cluster.getFileSystem();
|
||||||
|
dfs.mkdirs(new Path("/test/foo"));
|
||||||
|
|
||||||
|
snn.doCheckpoint();
|
||||||
|
|
||||||
|
//start rolling upgrade
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||||
|
dfs.rollingUpgrade(RollingUpgradeAction.PREPARE);
|
||||||
|
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||||
|
|
||||||
|
dfs.mkdirs(new Path("/test/bar"));
|
||||||
|
// do checkpoint in SNN again
|
||||||
|
snn.doCheckpoint();
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(null, dfs);
|
||||||
|
if (snn != null) {
|
||||||
|
snn.shutdown();
|
||||||
|
}
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1980,8 +1980,7 @@ public void testNamespaceVerifiedOnFileTransfer() throws IOException {
|
|||||||
.when(dstImage).toColonSeparatedString();
|
.when(dstImage).toColonSeparatedString();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
TransferFsImage.downloadImageToStorage(fsName, NameNodeFile.IMAGE, 0,
|
TransferFsImage.downloadImageToStorage(fsName, 0, dstImage, false);
|
||||||
dstImage, false);
|
|
||||||
fail("Storage info was not verified");
|
fail("Storage info was not verified");
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
String msg = StringUtils.stringifyException(ioe);
|
String msg = StringUtils.stringifyException(ioe);
|
||||||
|
Loading…
Reference in New Issue
Block a user