YARN-2052. Embedded an epoch number in container id to ensure the uniqueness of container id after RM restarts. Contributed by Tsuyoshi OZAWA
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1606557 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2f75cd1190
commit
b0c51504c4
@ -45,6 +45,9 @@ Release 2.5.0 - UNRELEASED
|
||||
YARN-1365. Changed ApplicationMasterService to allow an app to re-register
|
||||
after RM restart. (Anubhav Dhoot via jianhe)
|
||||
|
||||
YARN-2052. Embedded an epoch number in container id to ensure the uniqueness
|
||||
of container id after RM restarts. (Tsuyoshi OZAWA via jianhe)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via
|
||||
|
@ -135,6 +135,10 @@ message RMStateVersionProto {
|
||||
optional int32 minor_version = 2;
|
||||
}
|
||||
|
||||
message EpochProto {
|
||||
optional int64 epoch = 1;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
///////////// RM Failover related records ////////////////////////
|
||||
//////////////////////////////////////////////////////////////////
|
||||
|
@ -101,4 +101,6 @@ void setRMApplicationHistoryWriter(
|
||||
ConfigurationProvider getConfigurationProvider();
|
||||
|
||||
boolean isWorkPreservingRecoveryEnabled();
|
||||
|
||||
int getEpoch();
|
||||
}
|
@ -82,6 +82,7 @@ public class RMContextImpl implements RMContext {
|
||||
private ApplicationMasterService applicationMasterService;
|
||||
private RMApplicationHistoryWriter rmApplicationHistoryWriter;
|
||||
private ConfigurationProvider configurationProvider;
|
||||
private int epoch;
|
||||
|
||||
/**
|
||||
* Default constructor. To be used in conjunction with setter methods for
|
||||
@ -359,4 +360,13 @@ public void setConfigurationProvider(
|
||||
ConfigurationProvider configurationProvider) {
|
||||
this.configurationProvider = configurationProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEpoch() {
|
||||
return this.epoch;
|
||||
}
|
||||
|
||||
void setEpoch(int epoch) {
|
||||
this.epoch = epoch;
|
||||
}
|
||||
}
|
@ -482,6 +482,9 @@ protected void serviceStart() throws Exception {
|
||||
if(recoveryEnabled) {
|
||||
try {
|
||||
rmStore.checkVersion();
|
||||
if (rmContext.isWorkPreservingRecoveryEnabled()) {
|
||||
rmContext.setEpoch(rmStore.getAndIncrementEpoch());
|
||||
}
|
||||
RMState state = rmStore.loadState();
|
||||
recover(state);
|
||||
} catch (Exception e) {
|
||||
|
@ -43,15 +43,19 @@
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RMStateVersionProto;
|
||||
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Epoch;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.EpochPBImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
|
||||
@ -71,7 +75,7 @@ public class FileSystemRMStateStore extends RMStateStore {
|
||||
|
||||
protected static final String ROOT_DIR_NAME = "FSRMStateRoot";
|
||||
protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion
|
||||
.newInstance(1, 0);
|
||||
.newInstance(1, 1);
|
||||
|
||||
protected FileSystem fs;
|
||||
|
||||
@ -145,7 +149,30 @@ protected synchronized void storeVersion() throws Exception {
|
||||
writeFile(versionNodePath, data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public synchronized int getAndIncrementEpoch() throws Exception {
|
||||
Path epochNodePath = getNodePath(rootDirPath, EPOCH_NODE);
|
||||
int currentEpoch = 0;
|
||||
if (fs.exists(epochNodePath)) {
|
||||
// load current epoch
|
||||
FileStatus status = fs.getFileStatus(epochNodePath);
|
||||
byte[] data = readFile(epochNodePath, status.getLen());
|
||||
Epoch epoch = new EpochPBImpl(EpochProto.parseFrom(data));
|
||||
currentEpoch = epoch.getEpoch();
|
||||
// increment epoch and store it
|
||||
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
|
||||
.toByteArray();
|
||||
updateFile(epochNodePath, storeData);
|
||||
} else {
|
||||
// initialize epoch file with 1 for the next time.
|
||||
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
|
||||
.toByteArray();
|
||||
writeFile(epochNodePath, storeData);
|
||||
}
|
||||
return currentEpoch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized RMState loadState() throws Exception {
|
||||
RMState rmState = new RMState();
|
||||
|
@ -43,6 +43,8 @@
|
||||
public class MemoryRMStateStore extends RMStateStore {
|
||||
|
||||
RMState state = new RMState();
|
||||
private int epoch = 0;
|
||||
|
||||
@VisibleForTesting
|
||||
public RMState getState() {
|
||||
return state;
|
||||
@ -52,6 +54,13 @@ public RMState getState() {
|
||||
public void checkVersion() throws Exception {
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int getAndIncrementEpoch() throws Exception {
|
||||
int currentEpoch = epoch;
|
||||
epoch = epoch + 1;
|
||||
return currentEpoch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized RMState loadState() throws Exception {
|
||||
// return a copy of the state to allow for modification of the real state
|
||||
|
@ -47,6 +47,11 @@ protected void closeInternal() throws Exception {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int getAndIncrementEpoch() throws Exception {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RMState loadState() throws Exception {
|
||||
throw new UnsupportedOperationException("Cannot load state from null store");
|
||||
|
@ -45,6 +45,7 @@
|
||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos;
|
||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMFatalEvent;
|
||||
@ -85,6 +86,7 @@ public abstract class RMStateStore extends AbstractService {
|
||||
protected static final String DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX =
|
||||
"RMDTSequenceNumber_";
|
||||
protected static final String VERSION_NODE = "RMVersionNode";
|
||||
protected static final String EPOCH_NODE = "EpochNode";
|
||||
|
||||
public static final Log LOG = LogFactory.getLog(RMStateStore.class);
|
||||
|
||||
@ -520,6 +522,12 @@ public void checkVersion() throws Exception {
|
||||
*/
|
||||
protected abstract RMStateVersion getCurrentVersion();
|
||||
|
||||
|
||||
/**
|
||||
* Get the current epoch of RM and increment the value.
|
||||
*/
|
||||
public abstract int getAndIncrementEpoch() throws Exception;
|
||||
|
||||
/**
|
||||
* Blocking API
|
||||
* The derived class must recover state from the store and return a new
|
||||
|
@ -44,16 +44,21 @@
|
||||
import org.apache.hadoop.yarn.conf.HAUtil;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationAttemptStateDataProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ApplicationStateDataProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RMStateVersionProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
|
||||
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMZKUtils;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Epoch;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.EpochPBImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
@ -81,7 +86,7 @@ public class ZKRMStateStore extends RMStateStore {
|
||||
|
||||
protected static final String ROOT_ZNODE_NAME = "ZKRMStateRoot";
|
||||
protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion
|
||||
.newInstance(1, 0);
|
||||
.newInstance(1, 1);
|
||||
private static final String RM_DELEGATION_TOKENS_ROOT_ZNODE_NAME =
|
||||
"RMDelegationTokensRoot";
|
||||
private static final String RM_DT_SEQUENTIAL_NUMBER_ZNODE_NAME =
|
||||
@ -102,6 +107,7 @@ public class ZKRMStateStore extends RMStateStore {
|
||||
*
|
||||
* ROOT_DIR_PATH
|
||||
* |--- VERSION_INFO
|
||||
* |--- EPOCH_NODE
|
||||
* |--- RM_ZK_FENCING_LOCK
|
||||
* |--- RM_APP_ROOT
|
||||
* | |----- (#ApplicationId1)
|
||||
@ -391,6 +397,28 @@ protected synchronized RMStateVersion loadVersion() throws Exception {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int getAndIncrementEpoch() throws Exception {
|
||||
String epochNodePath = getNodePath(zkRootNodePath, EPOCH_NODE);
|
||||
int currentEpoch = 0;
|
||||
if (existsWithRetries(epochNodePath, true) != null) {
|
||||
// load current epoch
|
||||
byte[] data = getDataWithRetries(epochNodePath, true);
|
||||
Epoch epoch = new EpochPBImpl(EpochProto.parseFrom(data));
|
||||
currentEpoch = epoch.getEpoch();
|
||||
// increment epoch and store it
|
||||
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
|
||||
.toByteArray();
|
||||
setDataWithRetries(epochNodePath, storeData, -1);
|
||||
} else {
|
||||
// initialize epoch node with 1 for the next time.
|
||||
byte[] storeData = Epoch.newInstance(currentEpoch + 1).getProto()
|
||||
.toByteArray();
|
||||
createWithRetries(epochNodePath, storeData, zkAcl, CreateMode.PERSISTENT);
|
||||
}
|
||||
return currentEpoch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized RMState loadState() throws Exception {
|
||||
RMState rmState = new RMState();
|
||||
|
@ -0,0 +1,74 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.recovery.records;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
|
||||
/**
|
||||
* The epoch information of RM for work-preserving restart.
|
||||
* Epoch is incremented each time RM restart. It's used for assuring
|
||||
* uniqueness of <code>ContainerId</code>.
|
||||
*/
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract class Epoch {
|
||||
|
||||
public static Epoch newInstance(int sequenceNumber) {
|
||||
Epoch epoch = Records.newRecord(Epoch.class);
|
||||
epoch.setEpoch(sequenceNumber);
|
||||
return epoch;
|
||||
}
|
||||
|
||||
public abstract int getEpoch();
|
||||
|
||||
public abstract void setEpoch(int sequenceNumber);
|
||||
|
||||
public abstract EpochProto getProto();
|
||||
|
||||
public String toString() {
|
||||
return String.valueOf(getEpoch());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + getEpoch();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
Epoch other = (Epoch) obj;
|
||||
if (this.getEpoch() == other.getEpoch()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb;
|
||||
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.EpochProtoOrBuilder;
|
||||
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Epoch;
|
||||
|
||||
public class EpochPBImpl extends Epoch {
|
||||
|
||||
EpochProto proto = EpochProto.getDefaultInstance();
|
||||
EpochProto.Builder builder = null;
|
||||
boolean viaProto = false;
|
||||
|
||||
public EpochPBImpl() {
|
||||
builder = EpochProto.newBuilder();
|
||||
}
|
||||
|
||||
public EpochPBImpl(EpochProto proto) {
|
||||
this.proto = proto;
|
||||
viaProto = true;
|
||||
}
|
||||
|
||||
public EpochProto getProto() {
|
||||
proto = viaProto ? proto : builder.build();
|
||||
viaProto = true;
|
||||
return proto;
|
||||
}
|
||||
|
||||
private void maybeInitBuilder() {
|
||||
if (viaProto || builder == null) {
|
||||
builder = EpochProto.newBuilder(proto);
|
||||
}
|
||||
viaProto = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEpoch() {
|
||||
EpochProtoOrBuilder p = viaProto ? proto : builder;
|
||||
return (int) (p.getEpoch() & 0xffffffff);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setEpoch(int sequentialNumber) {
|
||||
maybeInitBuilder();
|
||||
builder.setEpoch(sequentialNumber);
|
||||
}
|
||||
|
||||
}
|
@ -57,7 +57,10 @@ public class AppSchedulingInfo {
|
||||
private final String queueName;
|
||||
Queue queue;
|
||||
final String user;
|
||||
private final AtomicInteger containerIdCounter = new AtomicInteger(0);
|
||||
// TODO making containerIdCounter long
|
||||
private final AtomicInteger containerIdCounter;
|
||||
private final int EPOCH_BIT_MASK = 0x3ff;
|
||||
private final int EPOCH_BIT_SHIFT = 22;
|
||||
|
||||
final Set<Priority> priorities = new TreeSet<Priority>(
|
||||
new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator());
|
||||
@ -70,15 +73,19 @@ public class AppSchedulingInfo {
|
||||
|
||||
/* Allocated by scheduler */
|
||||
boolean pending = true; // for app metrics
|
||||
|
||||
|
||||
|
||||
public AppSchedulingInfo(ApplicationAttemptId appAttemptId,
|
||||
String user, Queue queue, ActiveUsersManager activeUsersManager) {
|
||||
String user, Queue queue, ActiveUsersManager activeUsersManager,
|
||||
int epoch) {
|
||||
this.applicationAttemptId = appAttemptId;
|
||||
this.applicationId = appAttemptId.getApplicationId();
|
||||
this.queue = queue;
|
||||
this.queueName = queue.getQueueName();
|
||||
this.user = user;
|
||||
this.activeUsersManager = activeUsersManager;
|
||||
this.containerIdCounter = new AtomicInteger(
|
||||
(epoch & EPOCH_BIT_MASK) << EPOCH_BIT_SHIFT);
|
||||
}
|
||||
|
||||
public ApplicationId getApplicationId() {
|
||||
@ -413,9 +420,6 @@ public synchronized void transferStateFromPreviousAppSchedulingInfo(
|
||||
}
|
||||
|
||||
public synchronized void recoverContainer(RMContainer rmContainer) {
|
||||
// ContainerIdCounter on recovery will be addressed in YARN-2052
|
||||
this.containerIdCounter.incrementAndGet();
|
||||
|
||||
QueueMetrics metrics = queue.getMetrics();
|
||||
if (pending) {
|
||||
// If there was any container to recover, the application was
|
||||
|
@ -17,6 +17,7 @@
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@ -40,6 +41,7 @@
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
@ -106,13 +108,14 @@ public class SchedulerApplicationAttempt {
|
||||
public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId,
|
||||
String user, Queue queue, ActiveUsersManager activeUsersManager,
|
||||
RMContext rmContext) {
|
||||
Preconditions.checkNotNull("RMContext should not be null", rmContext);
|
||||
this.rmContext = rmContext;
|
||||
this.appSchedulingInfo =
|
||||
new AppSchedulingInfo(applicationAttemptId, user, queue,
|
||||
activeUsersManager);
|
||||
activeUsersManager, rmContext.getEpoch());
|
||||
this.queue = queue;
|
||||
|
||||
if (rmContext != null && rmContext.getRMApps() != null &&
|
||||
|
||||
if (rmContext.getRMApps() != null &&
|
||||
rmContext.getRMApps()
|
||||
.containsKey(applicationAttemptId.getApplicationId())) {
|
||||
ApplicationSubmissionContext appSubmissionContext =
|
||||
|
@ -218,7 +218,7 @@ public void testSchedulerRecovery() throws Exception {
|
||||
assertEquals(availableResources, schedulerAttempt.getHeadroom());
|
||||
|
||||
// *********** check appSchedulingInfo state ***********
|
||||
assertEquals(4, schedulerAttempt.getNewContainerId());
|
||||
assertEquals((1 << 22) + 1, schedulerAttempt.getNewContainerId());
|
||||
}
|
||||
|
||||
private void checkCSQueue(MockRM rm,
|
||||
|
@ -495,6 +495,21 @@ public void testCheckVersion(RMStateStoreHelper stateStoreHelper)
|
||||
Assert.assertTrue(t instanceof RMStateVersionIncompatibleException);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEpoch(RMStateStoreHelper stateStoreHelper)
|
||||
throws Exception {
|
||||
RMStateStore store = stateStoreHelper.getRMStateStore();
|
||||
store.setRMDispatcher(new TestDispatcher());
|
||||
|
||||
int firstTimeEpoch = store.getAndIncrementEpoch();
|
||||
Assert.assertEquals(0, firstTimeEpoch);
|
||||
|
||||
int secondTimeEpoch = store.getAndIncrementEpoch();
|
||||
Assert.assertEquals(1, secondTimeEpoch);
|
||||
|
||||
int thirdTimeEpoch = store.getAndIncrementEpoch();
|
||||
Assert.assertEquals(2, thirdTimeEpoch);
|
||||
}
|
||||
|
||||
public void testAppDeletion(RMStateStoreHelper stateStoreHelper)
|
||||
throws Exception {
|
||||
|
@ -158,6 +158,7 @@ public void testFSRMStateStore() throws Exception {
|
||||
.getFileSystem(conf).exists(tempAppAttemptFile));
|
||||
testRMDTSecretManagerStateStore(fsTester);
|
||||
testCheckVersion(fsTester);
|
||||
testEpoch(fsTester);
|
||||
testAppDeletion(fsTester);
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
|
@ -120,6 +120,7 @@ public void testZKRMStateStoreRealZK() throws Exception {
|
||||
testRMAppStateStore(zkTester);
|
||||
testRMDTSecretManagerStateStore(zkTester);
|
||||
testCheckVersion(zkTester);
|
||||
testEpoch(zkTester);
|
||||
testAppDeletion(zkTester);
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
@ -61,10 +62,15 @@ public void testMove() {
|
||||
QueueMetrics newMetrics = newQueue.getMetrics();
|
||||
|
||||
ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
|
||||
RMContext rmContext = mock(RMContext.class);
|
||||
when(rmContext.getEpoch()).thenReturn(3);
|
||||
SchedulerApplicationAttempt app = new SchedulerApplicationAttempt(appAttId,
|
||||
user, oldQueue, oldQueue.getActiveUsersManager(), null);
|
||||
user, oldQueue, oldQueue.getActiveUsersManager(), rmContext);
|
||||
oldMetrics.submitApp(user);
|
||||
|
||||
// confirm that containerId is calculated based on epoch.
|
||||
assertEquals(app.getNewContainerId(), 0x00c00001);
|
||||
|
||||
// Resource request
|
||||
Resource requestedResource = Resource.newInstance(1536, 2);
|
||||
Priority requestedPriority = Priority.newInstance(2);
|
||||
|
@ -23,6 +23,7 @@
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
||||
import org.apache.hadoop.yarn.util.Clock;
|
||||
import org.junit.Test;
|
||||
@ -59,8 +60,11 @@ public void testDelayScheduling() {
|
||||
double rackLocalityThreshold = .6;
|
||||
|
||||
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
|
||||
RMContext rmContext = Mockito.mock(RMContext.class);
|
||||
Mockito.when(rmContext.getEpoch()).thenReturn(0);
|
||||
FSSchedulerApp schedulerApp =
|
||||
new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null);
|
||||
new FSSchedulerApp(applicationAttemptId, "user1", queue , null,
|
||||
rmContext);
|
||||
|
||||
// Default level should be node-local
|
||||
assertEquals(NodeType.NODE_LOCAL, schedulerApp.getAllowedLocalityLevel(
|
||||
@ -118,10 +122,12 @@ public void testDelaySchedulingForContinuousScheduling()
|
||||
long nodeLocalityDelayMs = 5 * 1000L; // 5 seconds
|
||||
long rackLocalityDelayMs = 6 * 1000L; // 6 seconds
|
||||
|
||||
RMContext rmContext = Mockito.mock(RMContext.class);
|
||||
Mockito.when(rmContext.getEpoch()).thenReturn(0);
|
||||
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
|
||||
FSSchedulerApp schedulerApp =
|
||||
new FSSchedulerApp(applicationAttemptId, "user1", queue,
|
||||
null, null);
|
||||
null, rmContext);
|
||||
AppSchedulable appSchedulable = Mockito.mock(AppSchedulable.class);
|
||||
long startTime = clock.getTime();
|
||||
Mockito.when(appSchedulable.getStartTime()).thenReturn(startTime);
|
||||
@ -173,9 +179,12 @@ public void testLocalityLevelWithoutDelays() {
|
||||
Priority prio = Mockito.mock(Priority.class);
|
||||
Mockito.when(prio.getPriority()).thenReturn(1);
|
||||
|
||||
RMContext rmContext = Mockito.mock(RMContext.class);
|
||||
Mockito.when(rmContext.getEpoch()).thenReturn(0);
|
||||
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
|
||||
FSSchedulerApp schedulerApp =
|
||||
new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null);
|
||||
new FSSchedulerApp(applicationAttemptId, "user1", queue , null,
|
||||
rmContext);
|
||||
assertEquals(NodeType.OFF_SWITCH, schedulerApp.getAllowedLocalityLevel(
|
||||
prio, 10, -1.0, -1.0));
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.junit.Before;
|
||||
@ -40,6 +41,7 @@ public class TestMaxRunningAppsEnforcer {
|
||||
private MaxRunningAppsEnforcer maxAppsEnforcer;
|
||||
private int appNum;
|
||||
private TestFairScheduler.MockClock clock;
|
||||
private RMContext rmContext;
|
||||
|
||||
@Before
|
||||
public void setup() throws Exception {
|
||||
@ -59,13 +61,16 @@ public void setup() throws Exception {
|
||||
userMaxApps = allocConf.userMaxApps;
|
||||
maxAppsEnforcer = new MaxRunningAppsEnforcer(scheduler);
|
||||
appNum = 0;
|
||||
rmContext = mock(RMContext.class);
|
||||
when(rmContext.getEpoch()).thenReturn(0);
|
||||
}
|
||||
|
||||
private FSSchedulerApp addApp(FSLeafQueue queue, String user) {
|
||||
ApplicationId appId = ApplicationId.newInstance(0l, appNum++);
|
||||
ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 0);
|
||||
boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, user);
|
||||
FSSchedulerApp app = new FSSchedulerApp(attId, user, queue, null, null);
|
||||
FSSchedulerApp app = new FSSchedulerApp(attId, user, queue, null,
|
||||
rmContext);
|
||||
queue.addApp(app, runnable);
|
||||
if (runnable) {
|
||||
maxAppsEnforcer.trackRunnableApp(app);
|
||||
|
Loading…
Reference in New Issue
Block a user