diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java index 5ae4115e85..d15f07b496 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java @@ -189,6 +189,8 @@ public AllocatedBlock allocateBlock(final long size, ReplicationType type, // factors are handled by pipeline creator pipeline = pipelineManager.createPipeline(type, factor); } catch (IOException e) { + LOG.error("pipeline creation failed type:{} factor:{}", type, + factor, e); break; } } else { diff --git a/hadoop-ozone/integration-test/src/test/bin/start-chaos.sh b/hadoop-ozone/integration-test/src/test/bin/start-chaos.sh new file mode 100755 index 0000000000..dcec909c83 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/bin/start-chaos.sh @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/usr/bin/env bash + +date=`date +"%m-%d-%y-%T"` +fileformat=".MiniOzoneChaosCluster.log" +heapformat=".dump" +current="/tmp/" +filename=$current$date$fileformat +heapdumpfile=$current$date$heapformat + +export MAVEN_OPTS="-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath={$heapdumpfile}" + +echo "logging to" ${filename} +echo "heapdump to" ${heapdumpfile} + +echo "Starting MiniOzoneChaosCluster" +mvn clean install -DskipTests > ${filename} 2>&1 +mvn exec:java \ + -Dexec.mainClass="org.apache.hadoop.ozone.TestMiniChaosOzoneCluster" \ + -Dexec.classpathScope=test \ + -Dexec.args="$*" >> ${filename} 2>&1 diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java index 8e25d48180..52a2d40abc 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneChaosCluster.java @@ -66,6 +66,7 @@ public MiniOzoneChaosCluster(OzoneConfiguration conf, this.executorService = Executors.newSingleThreadScheduledExecutor(); this.numDatanodes = getHddsDatanodes().size(); + LOG.info("Starting MiniOzoneChaosCluster with:{} datanodes" + numDatanodes); LogUtils.setLogLevel(GrpcClientProtocolClient.LOG, Level.WARN); } @@ -117,13 +118,16 @@ private void fail() { } void startChaos(long initialDelay, long period, TimeUnit timeUnit) { + LOG.info("Starting Chaos with failure period:{} unit:{}", period, timeUnit); scheduledFuture = executorService.scheduleAtFixedRate(this::fail, initialDelay, period, timeUnit); } void stopChaos() throws Exception { - scheduledFuture.cancel(false); - scheduledFuture.get(); + if (scheduledFuture != null) { + scheduledFuture.cancel(false); + scheduledFuture.get(); + } } public void shutdown() { @@ -192,6 +196,7 @@ void initializeConfiguration() throws IOException { 1, TimeUnit.SECONDS); conf.setTimeDuration(HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL, 1, TimeUnit.SECONDS); + conf.setInt(OzoneConfigKeys.OZONE_CONTAINER_CACHE_SIZE, 8); } @Override diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java index efb3b66637..005a528217 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneLoadGenerator.java @@ -81,7 +81,8 @@ public class MiniOzoneLoadGenerator { // Start IO load on an Ozone bucket. private void load(long runTimeMillis) { - LOG.info("Started IO Thread" + Thread.currentThread().getId()); + long threadID = Thread.currentThread().getId(); + LOG.info("Started IO Thread:{}.", threadID); String threadName = Thread.currentThread().getName(); long startTime = Time.monotonicNow(); @@ -98,8 +99,11 @@ private void load(long runTimeMillis) { new HashMap<>())) { stream.write(buffer.array()); } catch (Exception e) { - LOG.error("LOADGEN: Create key:{} failed with exception", keyName, e); - break; + LOG.error("LOADGEN: Create key:{} failed with exception, skipping", + keyName, e); + continue; + // TODO: HDDS-1403.A key write can fail after multiple block writes + // to closed container. add a break here once that is fixed. } try (OzoneInputStream stream = ozoneBucket.readKey(keyName)) { @@ -119,17 +123,20 @@ private void load(long runTimeMillis) { } } catch (Exception e) { - LOG.error("Read key:{} failed with exception", keyName, e); + LOG.error("LOADGEN: Read key:{} failed with exception", keyName, e); break; } } // This will terminate other threads too. isWriteThreadRunning.set(false); + LOG.info("Terminating IO thread:{}.", threadID); } public void startIO(long time, TimeUnit timeUnit) { List> writeFutures = new ArrayList<>(); + LOG.info("Starting MiniOzoneLoadGenerator for time {}:{} with {} buffers " + + "and {} threads", time, timeUnit, numBuffers, numWriteThreads); if (isWriteThreadRunning.compareAndSet(false, true)) { // Start the IO thread for (int i = 0; i < numWriteThreads; i++) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java index 04383519bc..a979c40259 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestMiniChaosOzoneCluster.java @@ -96,7 +96,7 @@ public static void shutdown() { public void run() { try { init(); - cluster.startChaos(5, failureInterval, TimeUnit.SECONDS); + cluster.startChaos(failureInterval, failureInterval, TimeUnit.SECONDS); loadGenerator.startIO(numMinutes, TimeUnit.MINUTES); } catch (Exception e) { } finally { @@ -109,8 +109,8 @@ public static void main(String... args) { } @Test - public void testReadWriteWithChaosCluster() throws Exception { - cluster.startChaos(5, 1, TimeUnit.SECONDS); + public void testReadWriteWithChaosCluster() { + cluster.startChaos(5, 10, TimeUnit.SECONDS); loadGenerator.startIO(1, TimeUnit.MINUTES); } }