YARN-7458. TestContainerManagerSecurity is still flakey
(Contributed by Robert Kanter via Daniel Templeton) Change-Id: Ibb1975ad086c3a33f8af0b4f8b9a13c3cdca3f7d
This commit is contained in:
parent
0de10680b7
commit
49b4c0b334
@ -28,7 +28,9 @@ import java.util.Arrays;
|
|||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
import com.google.common.base.Supplier;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
import org.apache.hadoop.io.DataInputBuffer;
|
import org.apache.hadoop.io.DataInputBuffer;
|
||||||
@ -36,6 +38,7 @@ import org.apache.hadoop.minikdc.KerberosSecurityTestcase;
|
|||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
|
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
|
||||||
@ -49,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
@ -404,27 +408,33 @@ public class TestContainerManagerSecurity extends KerberosSecurityTestcase {
|
|||||||
newContainerToken, attempt1NMToken, false).isEmpty());
|
newContainerToken, attempt1NMToken, false).isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void waitForContainerToFinishOnNM(ContainerId containerId) {
|
private void waitForContainerToFinishOnNM(ContainerId containerId)
|
||||||
|
throws TimeoutException, InterruptedException {
|
||||||
Context nmContext = yarnCluster.getNodeManager(0).getNMContext();
|
Context nmContext = yarnCluster.getNodeManager(0).getNMContext();
|
||||||
int interval = 4 * 60; // Max time for container token to expire.
|
int interval = 4 * 60; // Max time for container token to expire.
|
||||||
|
|
||||||
Assert.assertNotNull(nmContext.getContainers().containsKey(containerId));
|
// If the container is null, then it has already completed and been removed
|
||||||
|
// from the Context by asynchronous calls.
|
||||||
// Get the container first, as it may be removed from the Context
|
|
||||||
// by asynchronous calls.
|
|
||||||
// This was leading to a flakey test as otherwise the container could
|
|
||||||
// be removed and end up null.
|
|
||||||
Container waitContainer = nmContext.getContainers().get(containerId);
|
Container waitContainer = nmContext.getContainers().get(containerId);
|
||||||
|
if (waitContainer != null) {
|
||||||
while ((interval-- > 0)
|
|
||||||
&& !waitContainer.cloneAndGetContainerStatus()
|
|
||||||
.getState().equals(ContainerState.COMPLETE)) {
|
|
||||||
try {
|
try {
|
||||||
LOG.info("Waiting for " + containerId + " to complete.");
|
LOG.info("Waiting for " + containerId + " to get to state " +
|
||||||
Thread.sleep(1000);
|
ContainerState.COMPLETE);
|
||||||
} catch (InterruptedException e) {
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
return ContainerState.COMPLETE.equals(
|
||||||
|
waitContainer.cloneAndGetContainerStatus().getState());
|
||||||
|
}
|
||||||
|
}, 10, interval);
|
||||||
|
} catch (TimeoutException te) {
|
||||||
|
fail("Was waiting for " + containerId + " to get to state " +
|
||||||
|
ContainerState.COMPLETE + " but was in state " +
|
||||||
|
waitContainer.cloneAndGetContainerStatus().getState() +
|
||||||
|
" after the timeout");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normally, Containers will be removed from NM context after they are
|
// Normally, Containers will be removed from NM context after they are
|
||||||
// explicitly acked by RM. Now, manually remove it for testing.
|
// explicitly acked by RM. Now, manually remove it for testing.
|
||||||
yarnCluster.getNodeManager(0).getNodeStatusUpdater()
|
yarnCluster.getNodeManager(0).getNodeStatusUpdater()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user