diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/UpgradeComponentsFinder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/UpgradeComponentsFinder.java index 96a34f4d3e..7a88ccf46c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/UpgradeComponentsFinder.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/UpgradeComponentsFinder.java @@ -68,12 +68,6 @@ public List findTargetComponentSpecs(Service currentDef, "not supported by upgrade"); } - if (!Objects.equals(currentDef.getQuicklinks(), - targetDef.getQuicklinks())) { - throw new UnsupportedOperationException("changes to quick links " + - "not supported by upgrade"); - } - if (!Objects.equals(currentDef.getLaunchTime(), targetDef.getLaunchTime())) { throw new UnsupportedOperationException("changes to launch time " + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index ef844a5871..a3aa143d4e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.service.component.ComponentEvent; import org.apache.hadoop.yarn.service.component.ComponentEventType; import org.apache.hadoop.yarn.service.component.ComponentRestartPolicy; +import org.apache.hadoop.yarn.service.monitor.probe.DefaultProbe; import org.apache.hadoop.yarn.service.monitor.probe.ProbeStatus; import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders; import org.apache.hadoop.yarn.service.timelineservice.ServiceTimelinePublisher; @@ -186,7 +187,7 @@ private static class ContainerStartedTransition extends BaseTransition { @Override public void transition(ComponentInstance compInstance, ComponentInstanceEvent event) { // Query container status for ip and host - compInstance.initializeStatusRetriever(event); + compInstance.initializeStatusRetriever(event, 0); long containerStartTime = System.currentTimeMillis(); try { ContainerTokenIdentifier containerTokenIdentifier = BuilderUtils @@ -266,7 +267,12 @@ public ComponentInstanceState transition(ComponentInstance instance, instance.upgradeInProgress.set(false); instance.setContainerState(ContainerState.RUNNING_BUT_UNREADY); - instance.initializeStatusRetriever(event); + if (instance.component.getProbe() != null && + instance.component.getProbe() instanceof DefaultProbe) { + instance.initializeStatusRetriever(event, 30); + } else { + instance.initializeStatusRetriever(event, 0); + } Component.UpgradeStatus status = instance.getState().equals(UPGRADING) ? instance.component.getUpgradeStatus() : @@ -625,7 +631,7 @@ private void cancelUpgrade() { private void reInitHelper(Component.UpgradeStatus upgradeStatus) { cancelContainerStatusRetriever(); - setContainerStatus(null); + setContainerStatus(container.getId(), null); scheduler.executorService.submit(() -> cleanupRegistry(container.getId())); scheduler.getContainerLaunchService() .reInitCompInstance(scheduler.getApp(), this, @@ -634,7 +640,8 @@ private void reInitHelper(Component.UpgradeStatus upgradeStatus) { upgradeStatus.getTargetVersion())); } - private void initializeStatusRetriever(ComponentInstanceEvent event) { + private void initializeStatusRetriever(ComponentInstanceEvent event, + long initialDelay) { boolean cancelOnSuccess = true; if (getCompSpec().getArtifact() != null && getCompSpec().getArtifact().getType() == Artifact.TypeEnum.DOCKER) { @@ -644,10 +651,11 @@ private void initializeStatusRetriever(ComponentInstanceEvent event) { // container relaunch (see YARN-8265). cancelOnSuccess = false; } + LOG.info("{} retrieve status after {}", compInstanceId, initialDelay); containerStatusFuture = scheduler.executorService.scheduleAtFixedRate( new ContainerStatusRetriever(scheduler, event.getContainerId(), - this, cancelOnSuccess), 0, 1, + this, cancelOnSuccess), initialDelay, 1, TimeUnit.SECONDS); } @@ -743,32 +751,44 @@ public ContainerStatus getContainerStatus() { } } - private void setContainerStatus(ContainerStatus latestStatus) { + private void setContainerStatus(ContainerId containerId, + ContainerStatus latestStatus) { try { writeLock.lock(); this.status = latestStatus; + org.apache.hadoop.yarn.service.api.records.Container containerRec = + getCompSpec().getContainer(containerId.toString()); + + if (containerRec != null) { + if (latestStatus != null) { + containerRec.setIp(StringUtils.join(",", latestStatus.getIPs())); + containerRec.setHostname(latestStatus.getHost()); + } else { + containerRec.setIp(null); + containerRec.setHostname(null); + } + } } finally { writeLock.unlock(); } } public void updateContainerStatus(ContainerStatus status) { - setContainerStatus(status); - org.apache.hadoop.yarn.service.api.records.Container container = + org.apache.hadoop.yarn.service.api.records.Container containerRec = getCompSpec().getContainer(status.getContainerId().toString()); boolean doRegistryUpdate = true; - if (container != null) { - String existingIP = container.getIp(); + if (containerRec != null) { + String existingIP = containerRec.getIp(); String newIP = StringUtils.join(",", status.getIPs()); - container.setIp(newIP); - container.setHostname(status.getHost()); if (existingIP != null && newIP.equals(existingIP)) { doRegistryUpdate = false; } - if (timelineServiceEnabled && doRegistryUpdate) { - serviceTimelinePublisher.componentInstanceIPHostUpdated(container); - } } + setContainerStatus(status.getContainerId(), status); + if (containerRec != null && timelineServiceEnabled && doRegistryUpdate) { + serviceTimelinePublisher.componentInstanceIPHostUpdated(containerRec); + } + if (doRegistryUpdate) { cleanupRegistry(status.getContainerId()); LOG.info( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java index 31a95b0c94..a22ada443c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java @@ -363,8 +363,12 @@ public void testRecoverComponentsAfterRMRestart() throws Exception { Multimap containersAfterFailure = waitForAllCompToBeReady( client, exampleApp); - Assert.assertEquals("component container affected by restart", - containersBeforeFailure, containersAfterFailure); + containersBeforeFailure.keys().forEach(compName -> { + Assert.assertEquals("num containers after by restart for " + compName, + containersBeforeFailure.get(compName).size(), + containersAfterFailure.get(compName) == null ? 0 : + containersAfterFailure.get(compName).size()); + }); LOG.info("Stop/destroy service {}", exampleApp); client.actionStop(exampleApp.getName(), true);