YARN-733. Fixed TestNMClient from failing occasionally. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1488618 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-06-01 22:01:06 +00:00
parent a2c4233004
commit 3dce234ed9
3 changed files with 45 additions and 25 deletions

View File

@ -397,6 +397,9 @@ Release 2.1.0-beta - UNRELEASED
YARN-578. Fixed NM to use SecureIOUtils for reading and aggregating logs.
(Omkar Vinit Joshi via vinodkv)
YARN-733. Fixed TestNMClient from failing occasionally. (Zhijie Shen via
vinodkv)
BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
YARN-158. Yarn creating package-info.java must not depend on sh.

View File

@ -64,6 +64,17 @@
* continue to run even after this client is stopped and till the application
* runs at which point ResourceManager will forcefully kill them.
* </p>
*
* <p>
* Note that the blocking APIs ensure the RPC calls to <code>NodeManager</code>
* are executed immediately, and the responses are received before these APIs
* return. However, when {@link #startContainer} or {@link #stopContainer}
* returns, <code>NodeManager</code> may still need some time to either start
* or stop the container because of its asynchronous implementation. Therefore,
* {@link #getContainerStatus} is likely to return a transit container status
* if it is executed immediately after {@link #startContainer} or
* {@link #stopContainer}.
* </p>
*/
public class NMClientImpl extends AbstractService implements NMClient {

View File

@ -20,8 +20,8 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
@ -228,7 +228,7 @@ private Set<Container> allocateContainers(
}
private void testContainerManagement(NMClientImpl nmClient,
Set<Container> containers) throws IOException {
Set<Container> containers) throws YarnRemoteException, IOException {
int size = containers.size();
int i = 0;
for (Container container : containers) {
@ -271,17 +271,9 @@ private void testContainerManagement(NMClientImpl nmClient,
// leave one container unclosed
if (++i < size) {
try {
ContainerStatus status = nmClient.getContainerStatus(container.getId(),
container.getNodeId(), container.getContainerToken());
// verify the container is started and in good shape
assertEquals(container.getId(), status.getContainerId());
assertEquals(ContainerState.RUNNING, status.getState());
assertEquals("", status.getDiagnostics());
assertEquals(-1000, status.getExitStatus());
} catch (YarnRemoteException e) {
fail("Exception is not expected");
}
// NodeManager may still need some time to make the container started
testGetContainerStatus(container, i, ContainerState.RUNNING, "",
-1000);
try {
nmClient.stopContainer(container.getId(), container.getNodeId(),
@ -291,18 +283,8 @@ private void testContainerManagement(NMClientImpl nmClient,
}
// getContainerStatus can be called after stopContainer
try {
ContainerStatus status = nmClient.getContainerStatus(
container.getId(), container.getNodeId(),
container.getContainerToken());
assertEquals(container.getId(), status.getContainerId());
assertEquals(ContainerState.RUNNING, status.getState());
assertTrue("" + i, status.getDiagnostics().contains(
"Container killed by the ApplicationMaster."));
assertEquals(-1000, status.getExitStatus());
} catch (YarnRemoteException e) {
fail("Exception is not expected");
}
testGetContainerStatus(container, i, ContainerState.COMPLETE,
"Container killed by the ApplicationMaster.", 143);
}
}
}
@ -315,4 +297,28 @@ private void sleep(int sleepTime) {
}
}
private void testGetContainerStatus(Container container, int index,
ContainerState state, String diagnostics, int exitStatus)
throws YarnRemoteException, IOException {
while (true) {
try {
ContainerStatus status = nmClient.getContainerStatus(
container.getId(), container.getNodeId(),
container.getContainerToken());
// NodeManager may still need some time to get the stable
// container status
if (status.getState() == state) {
assertEquals(container.getId(), status.getContainerId());
assertTrue("" + index + ": " + status.getDiagnostics(),
status.getDiagnostics().contains(diagnostics));
assertEquals(exitStatus, status.getExitStatus());
break;
}
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}