HDDS-1164. Add New blockade Tests to test Replica Manager. Contributed by Nilotpal Nandi.
This commit is contained in:
parent
002dcc4ebf
commit
7cd7045eea
@ -45,9 +45,8 @@ class Blockade(object):
|
||||
@classmethod
|
||||
def make_flaky(cls, flaky_node, container_list):
|
||||
# make the network flaky
|
||||
om = filter(lambda x: 'ozoneManager' in x, container_list)
|
||||
scm = filter(lambda x: 'scm' in x, container_list)
|
||||
datanodes = filter(lambda x: 'datanode' in x, container_list)
|
||||
om, scm, _, datanodes = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(container_list)
|
||||
node_dict = {
|
||||
"all": "--all",
|
||||
"scm" : scm[0],
|
||||
|
@ -71,7 +71,7 @@ class ClusterUtils(object):
|
||||
@classmethod
|
||||
def run_freon(cls, docker_compose_file, num_volumes, num_buckets,
|
||||
num_keys, key_size, replication_type, replication_factor,
|
||||
freon_client='ozoneManager'):
|
||||
freon_client='om'):
|
||||
# run freon
|
||||
cmd = "docker-compose -f %s " \
|
||||
"exec %s /opt/hadoop/bin/ozone " \
|
||||
@ -115,7 +115,7 @@ class ClusterUtils(object):
|
||||
@classmethod
|
||||
def get_ozone_confkey_value(cls, docker_compose_file, key_name):
|
||||
cmd = "docker-compose -f %s " \
|
||||
"exec ozoneManager /opt/hadoop/bin/ozone " \
|
||||
"exec om /opt/hadoop/bin/ozone " \
|
||||
"getconf -confKey %s" \
|
||||
% (docker_compose_file, key_name)
|
||||
exit_code, output = cls.run_cmd(cmd)
|
||||
@ -307,3 +307,22 @@ class ClusterUtils(object):
|
||||
checksum = finaloutput.split(" ")
|
||||
logger.info("Checksum of %s is : %s", filepath, checksum[0])
|
||||
return checksum[0]
|
||||
|
||||
@classmethod
|
||||
def get_pipelines(cls, docker_compose_file):
|
||||
command = "docker-compose -f %s " \
|
||||
+ "exec ozone_client /opt/hadoop/bin/ozone scmcli " \
|
||||
+ "listPipelines" % (docker_compose_file)
|
||||
exit_code, output = cls.run_cmd(command)
|
||||
assert exit_code == 0, "list pipeline command failed"
|
||||
return output
|
||||
|
||||
@classmethod
|
||||
def find_om_scm_client_datanodes(cls, container_list):
|
||||
|
||||
om = filter(lambda x: 'om_1' in x, container_list)
|
||||
scm = filter(lambda x: 'scm' in x, container_list)
|
||||
datanodes = sorted(
|
||||
list(filter(lambda x: 'datanode' in x, container_list)))
|
||||
client = filter(lambda x: 'ozone_client' in x, container_list)
|
||||
return om, scm, client, datanodes
|
@ -47,11 +47,8 @@ def setup():
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
CLIENT = filter(lambda x: 'ozone_client' in x, CONTAINER_LIST)
|
||||
|
||||
OM, SCM, CLIENT, DATANODES = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
|
||||
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
|
||||
"THREE", "ozone_client")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
@ -121,4 +118,4 @@ def test_client_failure_isolate_one_datanode():
|
||||
test_key_name, "/tmp/")
|
||||
key_checksum = ClusterUtils.find_checksum(FILE, "/tmp/%s" % test_key_name)
|
||||
|
||||
assert key_checksum == ORIG_CHECKSUM
|
||||
assert key_checksum == ORIG_CHECKSUM
|
@ -42,9 +42,8 @@ def setup():
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = [x for x in CONTAINER_LIST if 'ozoneManager' in x]
|
||||
SCM = [x for x in CONTAINER_LIST if 'scm' in x]
|
||||
DATANODES = sorted(x for x in CONTAINER_LIST if 'datanode' in x)
|
||||
OM, SCM, _, DATANODES = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
|
||||
|
||||
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
|
||||
"THREE")
|
||||
|
@ -18,16 +18,17 @@
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import re
|
||||
from blockadeUtils.blockade import Blockade
|
||||
from clusterUtils.cluster_utils import ClusterUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
INCREASED_SCALE = 5
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
SCM = []
|
||||
@ -35,83 +36,114 @@ DATANODES = []
|
||||
|
||||
|
||||
def setup():
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM, SCM, _, DATANODES = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
|
||||
|
||||
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
|
||||
"THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
|
||||
"THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
|
||||
|
||||
def teardown():
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
|
||||
|
||||
def teardown_module():
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
|
||||
|
||||
def test_one_dn_isolate_scm_other_dn():
|
||||
"""
|
||||
In this test, one of the datanodes cannot communicate with SCM and other
|
||||
datanodes.
|
||||
Other datanodes can communicate with each other and SCM .
|
||||
Expectation : The container should eventually have two closed replicas.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
|
||||
second_set = [OM[0], DATANODES[0]]
|
||||
Blockade.blockade_create_partition(first_set, second_set)
|
||||
def test_one_dn_isolate_scm_other_dn(run_second_phase):
|
||||
"""
|
||||
In this test, one of the datanodes cannot communicate with SCM and other
|
||||
datanodes.
|
||||
Other datanodes can communicate with each other and SCM .
|
||||
Expectation : The container should eventually have two closed replicas.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[1], DATANODES[2]]
|
||||
second_set = [OM[0], DATANODES[0]]
|
||||
Blockade.blockade_create_partition(first_set, second_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
|
||||
all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) == 2, \
|
||||
"The container should have two closed replicas."
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
|
||||
all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) == 2, \
|
||||
"The container should have two closed replicas."
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) >= 3, \
|
||||
"The container should have at least three closed replicas."
|
||||
_, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert re.search("Status: Success", output) is not None
|
||||
|
||||
|
||||
def test_one_dn_isolate_other_dn():
|
||||
"""
|
||||
In this test, one of the datanodes (first datanode) cannot communicate
|
||||
other datanodes but can communicate with SCM.
|
||||
One of the other two datanodes (second datanode) cannot communicate with
|
||||
SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode can be either closed or
|
||||
quasi-closed.
|
||||
The container replica state in second datanode can be either closed or open.
|
||||
The container should eventually have at lease one closed replica.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
third_set = [SCM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
def test_one_dn_isolate_other_dn(run_second_phase):
|
||||
"""
|
||||
In this test, one of the datanodes (first datanode) cannot communicate
|
||||
other datanodes but can communicate with SCM.
|
||||
One of the other two datanodes (second datanode) cannot communicate with
|
||||
SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode can be either closed or
|
||||
quasi-closed.
|
||||
The container replica state in second datanode can be either closed or open.
|
||||
The container should eventually have at lease one closed replica.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
third_set = [SCM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
|
||||
all_datanodes_container_status)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
assert first_datanode_status == 'CLOSED' or \
|
||||
first_datanode_status == "QUASI_CLOSED"
|
||||
assert second_datanode_status == 'CLOSED' or \
|
||||
second_datanode_status == "OPEN"
|
||||
assert len(count_closed_container_datanodes) >= 1, \
|
||||
"The container should have at least one closed replica"
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
count_closed_container_datanodes = filter(lambda x: x == 'CLOSED',
|
||||
all_datanodes_container_status)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
assert first_datanode_status == 'CLOSED' or \
|
||||
first_datanode_status == "QUASI_CLOSED"
|
||||
assert second_datanode_status == 'CLOSED' or \
|
||||
second_datanode_status == "OPEN"
|
||||
assert len(count_closed_container_datanodes) >= 1, \
|
||||
"The container should have at least one closed replica"
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) >= 3, \
|
||||
"The container should have at least three closed replicas."
|
||||
_, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert re.search("Status: Success", output) is not None
|
@ -18,16 +18,17 @@
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import re
|
||||
from blockadeUtils.blockade import Blockade
|
||||
from clusterUtils.cluster_utils import ClusterUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
INCREASED_SCALE = 5
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
SCM = []
|
||||
@ -35,110 +36,190 @@ DATANODES = []
|
||||
|
||||
|
||||
def setup():
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM, SCM, _, DATANODES = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
|
||||
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
|
||||
|
||||
def teardown():
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
|
||||
|
||||
def teardown_module():
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
|
||||
|
||||
def test_three_dns_isolate_onescmfailure():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
One of the datanodes (third datanode) cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be closed.
|
||||
The container replica state in second datanode should be closed.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], SCM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
def test_three_dns_isolate_onescmfailure(run_second_phase):
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
One of the datanodes (third datanode) cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be closed.
|
||||
The container replica state in second datanode should be closed.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], SCM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'CLOSED'
|
||||
assert second_datanode_status == 'CLOSED'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'CLOSED'
|
||||
assert second_datanode_status == 'CLOSED'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
|
||||
def test_three_dns_isolate_twoscmfailure():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
two datanodes cannot communicate with SCM (second datanode and third
|
||||
datanode)
|
||||
Expectation :
|
||||
The container replica state in first datanode should be quasi-closed.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) == 3, \
|
||||
"The container should have three closed replicas."
|
||||
Blockade.blockade_join()
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'QUASI_CLOSED'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) == 3, \
|
||||
"The container should have three closed replicas."
|
||||
|
||||
|
||||
def test_three_dns_isolate_threescmfailure():
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other and also cannot
|
||||
communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be open.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
def test_three_dns_isolate_twoscmfailure(run_second_phase):
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other.
|
||||
two datanodes cannot communicate with SCM (second datanode and third
|
||||
datanode)
|
||||
Expectation :
|
||||
The container replica state in first datanode should be quasi-closed.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'QUASI_CLOSED'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'OPEN'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_quasi_closed_container_datanodes = filter(
|
||||
lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
|
||||
assert len(count_quasi_closed_container_datanodes) >= 3, \
|
||||
"The container should have at least three quasi-closed replicas."
|
||||
Blockade.blockade_join()
|
||||
Blockade.blockade_status()
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) == 3, \
|
||||
"The container should have three closed replicas."
|
||||
|
||||
|
||||
def test_three_dns_isolate_threescmfailure(run_second_phase):
|
||||
"""
|
||||
In this test, all datanodes are isolated from each other and also cannot
|
||||
communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be open.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1]]
|
||||
third_set = [OM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'OPEN'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
output = ClusterUtils.get_pipelines(FILE)
|
||||
if output:
|
||||
assert re.search("Factor:THREE", output) is None
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
datanodes_having_container_status = filter(
|
||||
lambda x: x != 'None', all_datanodes_container_status)
|
||||
assert len(datanodes_having_container_status) == 3, \
|
||||
"Containers should not be replicated on addition of new nodes."
|
||||
Blockade.blockade_join()
|
||||
Blockade.blockade_status()
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) == 3, \
|
||||
"The container should have three closed replicas."
|
@ -18,16 +18,17 @@
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import re
|
||||
from blockadeUtils.blockade import Blockade
|
||||
from clusterUtils.cluster_utils import ClusterUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||
"docker-compose.yaml")
|
||||
os.environ["DOCKER_COMPOSE_FILE"] = FILE
|
||||
SCALE = 3
|
||||
INCREASED_SCALE = 5
|
||||
CONTAINER_LIST = []
|
||||
OM = []
|
||||
SCM = []
|
||||
@ -35,87 +36,138 @@ DATANODES = []
|
||||
|
||||
|
||||
def setup():
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||
Blockade.blockade_destroy()
|
||||
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM, SCM, _, DATANODES = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
|
||||
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
exit_code, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||
|
||||
|
||||
def teardown():
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
logger.info("Inside teardown")
|
||||
Blockade.blockade_destroy()
|
||||
|
||||
|
||||
def teardown_module():
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
ClusterUtils.cluster_destroy(FILE)
|
||||
|
||||
|
||||
def test_two_dns_isolate_scm_same_partition():
|
||||
"""
|
||||
In this test, one of the datanodes (first datanode) cannot communicate
|
||||
with other two datanodes.
|
||||
Two datanodes (second datanode and third datanode), on same network
|
||||
parition, cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be quasi-closed.
|
||||
The container replica state in second datanode should be open.
|
||||
The container replica state in third datanode should be open.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
second_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
Blockade.blockade_create_partition(first_set, second_set)
|
||||
def test_two_dns_isolate_scm_same_partition(run_second_phase):
|
||||
"""
|
||||
In this test, there are three datanodes, DN1, DN2, DN3
|
||||
DN1 is on a network partition and
|
||||
DN2, DN3 are on a different network partition.
|
||||
DN2 and DN3 cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in DN1 should be quasi-closed.
|
||||
The container replica state in DN2 should be open.
|
||||
The container replica state in DN3 should be open.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
second_set = [OM[0], SCM[0], DATANODES[0]]
|
||||
Blockade.blockade_create_partition(first_set, second_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'QUASI_CLOSED'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'QUASI_CLOSED'
|
||||
assert second_datanode_status == 'OPEN'
|
||||
assert third_datanode_status == 'OPEN'
|
||||
|
||||
|
||||
def test_two_dns_isolate_scm_different_partition():
|
||||
"""
|
||||
In this test, one of the datanodes (first datanode) cannot communicate with
|
||||
other two datanodes.
|
||||
Two datanodes (first datanode and second datanode),
|
||||
on different network paritions, cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in first datanode should be open.
|
||||
The container replica states can be either 'closed'
|
||||
in both second and third datanode, or,
|
||||
'open' in second datanode and 'quasi-closed' in third datanode.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
third_set = [SCM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_quasi_closed_container_datanodes = filter(
|
||||
lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
|
||||
assert len(count_quasi_closed_container_datanodes) >= 3, \
|
||||
"The container should have at least three quasi-closed replicas."
|
||||
Blockade.blockade_join()
|
||||
Blockade.blockade_status()
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) >= 3
|
||||
|
||||
|
||||
def test_two_dns_isolate_scm_different_partition(run_second_phase):
|
||||
"""
|
||||
In this test, there are three datanodes, DN1, DN2, DN3
|
||||
DN1 is on a network partition and
|
||||
DN2, DN3 are on a different network partition.
|
||||
DN1 and DN2 cannot communicate with SCM.
|
||||
Expectation :
|
||||
The container replica state in datanode DN1 should be open.
|
||||
The container replica states can be either 'closed'
|
||||
in DN2 and DN3, or,
|
||||
'open' in DN2 and 'quasi-closed' in DN3.
|
||||
"""
|
||||
first_set = [OM[0], DATANODES[0]]
|
||||
second_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||
third_set = [SCM[0], DATANODES[2]]
|
||||
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'OPEN'
|
||||
assert (second_datanode_status == 'CLOSED' and
|
||||
third_datanode_status == 'CLOSED') or \
|
||||
(second_datanode_status == 'OPEN' and
|
||||
third_datanode_status == 'QUASI_CLOSED')
|
||||
|
||||
if str(run_second_phase).lower() == "true":
|
||||
ClusterUtils.cluster_setup(FILE, INCREASED_SCALE, False)
|
||||
Blockade.blockade_status()
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
logger.info("Waiting for %s seconds before checking container status",
|
||||
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||
first_datanode_status = all_datanodes_container_status[0]
|
||||
second_datanode_status = all_datanodes_container_status[1]
|
||||
third_datanode_status = all_datanodes_container_status[2]
|
||||
assert first_datanode_status == 'OPEN'
|
||||
assert (second_datanode_status == 'CLOSED' and
|
||||
third_datanode_status == 'CLOSED') or \
|
||||
(second_datanode_status == 'OPEN' and
|
||||
third_datanode_status == 'QUASI_CLOSED')
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
count_qausi_closed_container_datanodes = filter(
|
||||
lambda x: x == 'QUASI_CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) >= 3 or \
|
||||
len(count_qausi_closed_container_datanodes) >= 3
|
||||
Blockade.blockade_join()
|
||||
Blockade.blockade_status()
|
||||
if len(count_closed_container_datanodes) < 3:
|
||||
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||
all_datanodes_container_status = \
|
||||
ClusterUtils.findall_container_status(
|
||||
FILE, INCREASED_SCALE)
|
||||
count_closed_container_datanodes = filter(
|
||||
lambda x: x == 'CLOSED', all_datanodes_container_status)
|
||||
assert len(count_closed_container_datanodes) >= 3
|
||||
_, output = \
|
||||
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||
assert re.search("Status: Success", output) is not None
|
@ -42,9 +42,8 @@ def setup():
|
||||
exit_code, output = Blockade.blockade_status()
|
||||
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||
output
|
||||
OM = [x for x in CONTAINER_LIST if 'ozoneManager' in x]
|
||||
SCM = [x for x in CONTAINER_LIST if 'scm' in x]
|
||||
DATANODES = sorted(x for x in CONTAINER_LIST if 'datanode' in x)
|
||||
OM, SCM, _, DATANODES = \
|
||||
ClusterUtils.find_om_scm_client_datanodes(CONTAINER_LIST)
|
||||
|
||||
exit_code, output = ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS",
|
||||
"THREE")
|
||||
|
Loading…
x
Reference in New Issue
Block a user