HDDS-1040. Add blockade Tests for client failures. Contributed by Nilotpal Nandi.
This commit is contained in:
parent
e7d1ae52d2
commit
73b67b2df5
@ -22,6 +22,7 @@
|
||||
import time
|
||||
import re
|
||||
import yaml
|
||||
import os
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -64,17 +65,18 @@ def cluster_destroy(cls, docker_compose_file):
|
||||
|
||||
@classmethod
|
||||
def run_freon(cls, docker_compose_file, num_volumes, num_buckets,
|
||||
num_keys, key_size, replication_type, replication_factor):
|
||||
num_keys, key_size, replication_type, replication_factor,
|
||||
freon_client='ozoneManager'):
|
||||
# run freon
|
||||
cmd = "docker-compose -f %s " \
|
||||
"exec ozoneManager /opt/hadoop/bin/ozone " \
|
||||
"exec %s /opt/hadoop/bin/ozone " \
|
||||
"freon rk " \
|
||||
"--numOfVolumes %s " \
|
||||
"--numOfBuckets %s " \
|
||||
"--numOfKeys %s " \
|
||||
"--keySize %s " \
|
||||
"--replicationType %s " \
|
||||
"--factor %s" % (docker_compose_file, num_volumes,
|
||||
"--factor %s" % (docker_compose_file, freon_client, num_volumes,
|
||||
num_buckets, num_keys, key_size,
|
||||
replication_type, replication_factor)
|
||||
exit_code, output = cls.run_cmd(cmd)
|
||||
@ -189,3 +191,110 @@ def find_all_datanodes_container_status(cls, docker_compose_file, scale):
|
||||
' '.join(all_datanode_container_status))
|
||||
|
||||
return all_datanode_container_status
|
||||
|
||||
@classmethod
|
||||
def create_volume(cls, docker_compose_file, volume_name):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh volume create /%s --user root" % \
|
||||
(docker_compose_file, volume_name)
|
||||
logger.info("Creating Volume %s", volume_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "Ozone volume create failed with output=[%s]" \
|
||||
% output
|
||||
|
||||
@classmethod
|
||||
def delete_volume(cls, docker_compose_file, volume_name):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh volume delete /%s" % (docker_compose_file, volume_name)
|
||||
logger.info("Deleting Volume %s", volume_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
return exit_code, output
|
||||
|
||||
@classmethod
|
||||
def create_bucket(cls, docker_compose_file, bucket_name, volume_name):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh bucket create /%s/%s" % (docker_compose_file,
|
||||
volume_name, bucket_name)
|
||||
logger.info("Creating Bucket %s in volume %s",
|
||||
bucket_name, volume_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "Ozone bucket create failed with output=[%s]" \
|
||||
% output
|
||||
|
||||
@classmethod
|
||||
def delete_bucket(cls, docker_compose_file, bucket_name, volume_name):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh bucket delete /%s/%s" % (docker_compose_file,
|
||||
volume_name, bucket_name)
|
||||
logger.info("Running delete bucket of %s/%s", volume_name, bucket_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
return exit_code, output
|
||||
|
||||
@classmethod
|
||||
def put_key(cls, docker_compose_file, bucket_name, volume_name,
|
||||
filepath, key_name=None, replication_factor=None):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client ls %s" % (docker_compose_file, filepath)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "%s does not exist" % filepath
|
||||
if key_name is None:
|
||||
key_name = os.path.basename(filepath)
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh key put /%s/%s/%s %s" % (docker_compose_file,
|
||||
volume_name, bucket_name,
|
||||
key_name, filepath)
|
||||
if replication_factor:
|
||||
command = "%s --replication=%s" % (command, replication_factor)
|
||||
logger.info("Creating key %s in %s/%s", key_name,
|
||||
volume_name, bucket_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "Ozone put Key failed with output=[%s]" % output
|
||||
|
||||
@classmethod
|
||||
def delete_key(cls, docker_compose_file, bucket_name, volume_name,
|
||||
key_name):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh key delete /%s/%s/%s" \
|
||||
% (docker_compose_file, volume_name, bucket_name, key_name)
|
||||
logger.info("Running delete key %s in %s/%s",
|
||||
key_name, volume_name, bucket_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
return exit_code, output
|
||||
|
||||
@classmethod
|
||||
def get_key(cls, docker_compose_file, bucket_name, volume_name,
|
||||
key_name, filepath=None):
|
||||
if filepath is None:
|
||||
filepath = '.'
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client /opt/hadoop/bin/ozone " \
|
||||
"sh key get /%s/%s/%s %s" % (docker_compose_file,
|
||||
volume_name, bucket_name,
|
||||
key_name, filepath)
|
||||
logger.info("Running get key %s in %s/%s", key_name,
|
||||
volume_name, bucket_name)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "Ozone get Key failed with output=[%s]" % output
|
||||
|
||||
@classmethod
|
||||
def find_checksum(cls, docker_compose_file, filepath):
|
||||
command = "docker-compose -f %s " \
|
||||
"exec ozone_client md5sum %s" % (docker_compose_file, filepath)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "Cant find checksum"
|
||||
myoutput = output.split("\n")
|
||||
finaloutput = ""
|
||||
for line in myoutput:
|
||||
if line.find("Warning") >= 0 or line.find("is not a tty") >= 0:
|
||||
logger.info("skip this line: %s", line)
|
||||
else:
|
||||
finaloutput = finaloutput + line
|
||||
checksum = finaloutput.split(" ")
|
||||
logger.info("Checksum of %s is : %s", filepath, checksum[0])
|
||||
return checksum[0]
|
23
hadoop-ozone/dist/src/main/blockade/conftest.py
vendored
23
hadoop-ozone/dist/src/main/blockade/conftest.py
vendored
@ -15,8 +15,10 @@
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import subprocess
|
||||
|
||||
|
||||
EPOCH_TIME = int(time.time())
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption("--output-dir",
|
||||
action="store",
|
||||
@ -40,13 +42,14 @@ def pytest_addoption(parser):
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
global OUTPUT_DIR
|
||||
os.environ["CONTAINER_STATUS_SLEEP"] = config.option.containerStatusSleep
|
||||
outputdir = config.option.output_dir
|
||||
OUTPUT_DIR = "%s/%s" % (config.option.output_dir, EPOCH_TIME)
|
||||
try:
|
||||
os.makedirs(outputdir)
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
except OSError, e:
|
||||
raise Exception(e.strerror + ": " + e.filename)
|
||||
log_file = os.path.join(outputdir, "output.log")
|
||||
log_file = os.path.join(OUTPUT_DIR, "output.log")
|
||||
|
||||
if config.option.log_level == "trace":
|
||||
loglevel = eval("logging.DEBUG")
|
||||
@ -74,8 +77,20 @@ def pytest_report_teststatus(report):
|
||||
elif report.when == 'call':
|
||||
logger.info("TEST \"%s\" %s in %3.2f seconds" %
|
||||
(name, report.outcome.upper(), report.duration))
|
||||
log_file_path = "%s/%s_all_docker.log" % \
|
||||
(OUTPUT_DIR, name)
|
||||
gather_docker_logs(log_file_path)
|
||||
|
||||
|
||||
def pytest_sessionfinish(session):
|
||||
logger = logging.getLogger('main')
|
||||
logger.info("ALL TESTS FINISHED")
|
||||
logger.info("ALL logs present in following directory: %s", OUTPUT_DIR)
|
||||
|
||||
|
||||
def gather_docker_logs(log_file_path):
|
||||
docker_compose_file = os.environ["DOCKER_COMPOSE_FILE"]
|
||||
output = subprocess.check_output(["docker-compose", "-f",
|
||||
docker_compose_file, "logs"])
|
||||
with open(log_file_path, "w") as text_file:
|
||||
text_file.write(output)
|
||||
|
124
hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
vendored
Normal file
124
hadoop-ozone/dist/src/main/blockade/test_blockade_client_failure.py
vendored
Normal file
@ -0,0 +1,124 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
from blockadeUtils.blockade import Blockade
|
||||
from clusterUtils.cluster_utils import ClusterUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
SCM = []
|
||||
DATANODES = []
|
||||
CLIENT = []
|
||||
|
||||
|
||||
def setup():
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES, CLIENT, ORIG_CHECKSUM, \
|
||||
TEST_VOLUME_NAME, TEST_BUCKET_NAME
|
||||
epoch_time = int(time.time())
|
||||
TEST_VOLUME_NAME = "%s%s" % ("volume", epoch_time)
|
||||
TEST_BUCKET_NAME = "%s%s" % ("bucket", epoch_time)
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
CLIENT = filter(lambda x: 'ozone_client' in x, CONTAINER_LIST)
|
||||
|
||||
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
|
||||
"THREE", "ozone_client")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
ClusterUtils.create_volume(FILE, TEST_VOLUME_NAME)
|
||||
ClusterUtils.create_bucket(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME)
|
||||
ORIG_CHECKSUM = ClusterUtils.find_checksum(FILE, "/etc/passwd")
|
||||
|
||||
|
||||
def teardown():
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
|
||||
|
||||
def teardown_module():
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
|
||||
|
||||
def test_client_failure_isolate_two_datanodes():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
two of the datanodes cannot communicate with any other node in the cluster.
|
||||
Expectation :
|
||||
Write should fail.
|
||||
Keys written before parition created can be read.
|
||||
"""
|
||||
test_key_name = "testkey1"
|
||||
ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
|
||||
"/etc/passwd", key_name=test_key_name,
|
||||
replication_factor='THREE')
|
||||
first_set = [OM[0], SCM[0], DATANODES[0], CLIENT[0]]
|
||||
second_set = [DATANODES[1]]
|
||||
third_set = [DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert re.search(
|
||||
"Allocate block failed, error:INTERNAL_ERROR",
|
||||
output) is not None
|
||||
ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
|
||||
test_key_name, "/tmp/")
|
||||
key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
|
||||
|
||||
assert key_checksum == ORIG_CHECKSUM
|
||||
|
||||
|
||||
def test_client_failure_isolate_one_datanode():
|
||||
"""
|
||||
In this test, one of the datanodes is isolated from all other nodes.
|
||||
Expectation :
|
||||
Write should pass.
|
||||
Keys written before partition created can be read.
|
||||
"""
|
||||
test_key_name = "testkey2"
|
||||
ClusterUtils.put_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
|
||||
"/etc/passwd", key_name=test_key_name,
|
||||
replication_factor='THREE')
|
||||
first_set = [OM[0], SCM[0], DATANODES[0], DATANODES[1], CLIENT[0]]
|
||||
second_set = [DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set)
|
||||
Blockade.blockade_status()
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert re.search("3 way commit failed", output) is not None
|
||||
assert re.search("Status: Success", output) is not None
|
||||
ClusterUtils.get_key(FILE, TEST_BUCKET_NAME, TEST_VOLUME_NAME,
|
||||
test_key_name, "/tmp/")
|
||||
key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
|
||||
|
||||
assert key_checksum == ORIG_CHECKSUM
|
@ -26,6 +26,7 @@
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
|
@ -27,6 +27,7 @@
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 6
|
||||
CONTAINER_LIST = []
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
|
@ -26,6 +26,7 @@
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
|
@ -26,6 +26,7 @@
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
|
@ -26,6 +26,7 @@
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
|
@ -47,3 +47,12 @@ services:
|
||||
environment:
|
||||
ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION
|
||||
command: ["/opt/hadoop/bin/ozone","scm"]
|
||||
ozone_client:
|
||||
image: apache/hadoop-runner
|
||||
volumes:
|
||||
- ../..:/opt/hadoop
|
||||
ports:
|
||||
- 9869
|
||||
command: ["tail", "-f","/etc/passwd"]
|
||||
env_file:
|
||||
- ./docker-config
|
||||
|
Loading…
Reference in New Issue
Block a user