HDDS-1027. Add blockade Tests for datanode isolation and scm failures. Contributed by Nilotpal Nandi.
This commit is contained in:
parent
49ddd8a6ed
commit
911790cc26
143
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
vendored
Normal file
143
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_three_nodes_isolate.py
vendored
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from blockadeUtils.blockade import Blockade
|
||||||
|
from clusterUtils.cluster_utils import ClusterUtils
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||||
|
"docker-compose.yaml")
|
||||||
|
SCALE = 3
|
||||||
|
CONTAINER_LIST = []
|
||||||
|
OM = []
|
||||||
|
SCM = []
|
||||||
|
DATANODES = []
|
||||||
|
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||||
|
Blockade.blockade_destroy()
|
||||||
|
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||||
|
exit_code, output = Blockade.blockade_status()
|
||||||
|
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||||
|
output
|
||||||
|
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||||
|
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||||
|
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||||
|
|
||||||
|
exit_code, output = \
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||||
|
|
||||||
|
|
||||||
|
def teardown():
|
||||||
|
logger.info("Inside teardown")
|
||||||
|
Blockade.blockade_destroy()
|
||||||
|
|
||||||
|
|
||||||
|
def teardown_module():
|
||||||
|
ClusterUtils.cluster_destroy(FILE)
|
||||||
|
|
||||||
|
|
||||||
|
def test_three_dns_isolate_onescmfailure():
|
||||||
|
"""
|
||||||
|
In this test, all datanodes are isolated from each other.
|
||||||
|
One of the datanodes (third datanode) cannot communicate with SCM.
|
||||||
|
Expectation :
|
||||||
|
The container replica state in first datanode should be closed.
|
||||||
|
The container replica state in second datanode should be closed.
|
||||||
|
The container replica state in third datanode should be open.
|
||||||
|
"""
|
||||||
|
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||||
|
second_set = [OM[0], SCM[0], DATANODES[1]]
|
||||||
|
third_set = [OM[0], DATANODES[2]]
|
||||||
|
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||||
|
Blockade.blockade_status()
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
logger.info("Waiting for %s seconds before checking container status",
|
||||||
|
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||||
|
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||||
|
all_datanodes_container_status = \
|
||||||
|
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||||
|
first_datanode_status = all_datanodes_container_status[0]
|
||||||
|
second_datanode_status = all_datanodes_container_status[1]
|
||||||
|
third_datanode_status = all_datanodes_container_status[2]
|
||||||
|
assert first_datanode_status == 'CLOSED'
|
||||||
|
assert second_datanode_status == 'CLOSED'
|
||||||
|
assert third_datanode_status == 'OPEN'
|
||||||
|
|
||||||
|
|
||||||
|
def test_three_dns_isolate_twoscmfailure():
|
||||||
|
"""
|
||||||
|
In this test, all datanodes are isolated from each other.
|
||||||
|
two datanodes cannot communicate with SCM (second datanode and third
|
||||||
|
datanode)
|
||||||
|
Expectation :
|
||||||
|
The container replica state in first datanode should be quasi-closed.
|
||||||
|
The container replica state in second datanode should be open.
|
||||||
|
The container replica state in third datanode should be open.
|
||||||
|
"""
|
||||||
|
first_set = [OM[0], SCM[0], DATANODES[0]]
|
||||||
|
second_set = [OM[0], DATANODES[1]]
|
||||||
|
third_set = [OM[0], DATANODES[2]]
|
||||||
|
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||||
|
Blockade.blockade_status()
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
logger.info("Waiting for %s seconds before checking container status",
|
||||||
|
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||||
|
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||||
|
all_datanodes_container_status = \
|
||||||
|
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||||
|
first_datanode_status = all_datanodes_container_status[0]
|
||||||
|
second_datanode_status = all_datanodes_container_status[1]
|
||||||
|
third_datanode_status = all_datanodes_container_status[2]
|
||||||
|
assert first_datanode_status == 'QUASI_CLOSED'
|
||||||
|
assert second_datanode_status == 'OPEN'
|
||||||
|
assert third_datanode_status == 'OPEN'
|
||||||
|
|
||||||
|
|
||||||
|
def test_three_dns_isolate_threescmfailure():
|
||||||
|
"""
|
||||||
|
In this test, all datanodes are isolated from each other and also cannot
|
||||||
|
communicate with SCM.
|
||||||
|
Expectation :
|
||||||
|
The container replica state in first datanode should be open.
|
||||||
|
The container replica state in second datanode should be open.
|
||||||
|
The container replica state in third datanode should be open.
|
||||||
|
"""
|
||||||
|
first_set = [OM[0], DATANODES[0]]
|
||||||
|
second_set = [OM[0], DATANODES[1]]
|
||||||
|
third_set = [OM[0], DATANODES[2]]
|
||||||
|
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||||
|
Blockade.blockade_status()
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
logger.info("Waiting for %s seconds before checking container status",
|
||||||
|
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||||
|
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||||
|
all_datanodes_container_status = \
|
||||||
|
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||||
|
first_datanode_status = all_datanodes_container_status[0]
|
||||||
|
second_datanode_status = all_datanodes_container_status[1]
|
||||||
|
third_datanode_status = all_datanodes_container_status[2]
|
||||||
|
assert first_datanode_status == 'OPEN'
|
||||||
|
assert second_datanode_status == 'OPEN'
|
||||||
|
assert third_datanode_status == 'OPEN'
|
120
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
vendored
Normal file
120
hadoop-ozone/dist/src/main/blockade/test_blockade_mixed_failure_two_nodes.py
vendored
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from blockadeUtils.blockade import Blockade
|
||||||
|
from clusterUtils.cluster_utils import ClusterUtils
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
parent_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
FILE = os.path.join(parent_dir, "compose", "ozoneblockade",
|
||||||
|
"docker-compose.yaml")
|
||||||
|
SCALE = 3
|
||||||
|
CONTAINER_LIST = []
|
||||||
|
OM = []
|
||||||
|
SCM = []
|
||||||
|
DATANODES = []
|
||||||
|
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
global CONTAINER_LIST, OM, SCM, DATANODES
|
||||||
|
Blockade.blockade_destroy()
|
||||||
|
CONTAINER_LIST = ClusterUtils.cluster_setup(FILE, SCALE)
|
||||||
|
exit_code, output = Blockade.blockade_status()
|
||||||
|
assert exit_code == 0, "blockade status command failed with output=[%s]" % \
|
||||||
|
output
|
||||||
|
OM = filter(lambda x: 'ozoneManager' in x, CONTAINER_LIST)
|
||||||
|
SCM = filter(lambda x: 'scm' in x, CONTAINER_LIST)
|
||||||
|
DATANODES = sorted(list(filter(lambda x: 'datanode' in x, CONTAINER_LIST)))
|
||||||
|
|
||||||
|
exit_code, output = \
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
assert exit_code == 0, "freon run failed with output=[%s]" % output
|
||||||
|
|
||||||
|
|
||||||
|
def teardown():
|
||||||
|
logger.info("Inside teardown")
|
||||||
|
Blockade.blockade_destroy()
|
||||||
|
|
||||||
|
|
||||||
|
def teardown_module():
|
||||||
|
ClusterUtils.cluster_destroy(FILE)
|
||||||
|
|
||||||
|
|
||||||
|
def test_two_dns_isolate_scm_same_partition():
|
||||||
|
"""
|
||||||
|
In this test, one of the datanodes (first datanode) cannot communicate
|
||||||
|
with other two datanodes.
|
||||||
|
Two datanodes (second datanode and third datanode), on same network
|
||||||
|
parition, cannot communicate with SCM.
|
||||||
|
Expectation :
|
||||||
|
The container replica state in first datanode should be quasi-closed.
|
||||||
|
The container replica state in second datanode should be open.
|
||||||
|
The container replica state in third datanode should be open.
|
||||||
|
"""
|
||||||
|
first_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||||
|
second_set = [OM[0], SCM[0], DATANODES[0]]
|
||||||
|
Blockade.blockade_create_partition(first_set, second_set)
|
||||||
|
Blockade.blockade_status()
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
logger.info("Waiting for %s seconds before checking container status",
|
||||||
|
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||||
|
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||||
|
all_datanodes_container_status = \
|
||||||
|
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||||
|
first_datanode_status = all_datanodes_container_status[0]
|
||||||
|
second_datanode_status = all_datanodes_container_status[1]
|
||||||
|
third_datanode_status = all_datanodes_container_status[2]
|
||||||
|
assert first_datanode_status == 'QUASI_CLOSED'
|
||||||
|
assert second_datanode_status == 'OPEN'
|
||||||
|
assert third_datanode_status == 'OPEN'
|
||||||
|
|
||||||
|
|
||||||
|
def test_two_dns_isolate_scm_different_partition():
|
||||||
|
"""
|
||||||
|
In this test, one of the datanodes (first datanode) cannot communicate with
|
||||||
|
other two datanodes.
|
||||||
|
Two datanodes (first datanode and second datanode),
|
||||||
|
on different network paritions, cannot communicate with SCM.
|
||||||
|
Expectation :
|
||||||
|
The container replica state in first datanode should be open.
|
||||||
|
The container replica states can be either 'closed'
|
||||||
|
in both second and third datanode, or,
|
||||||
|
'open' in second datanode and 'quasi-closed' in third datanode.
|
||||||
|
"""
|
||||||
|
first_set = [OM[0], DATANODES[0]]
|
||||||
|
second_set = [OM[0], DATANODES[1], DATANODES[2]]
|
||||||
|
third_set = [SCM[0], DATANODES[2]]
|
||||||
|
Blockade.blockade_create_partition(first_set, second_set, third_set)
|
||||||
|
Blockade.blockade_status()
|
||||||
|
ClusterUtils.run_freon(FILE, 1, 1, 1, 10240, "RATIS", "THREE")
|
||||||
|
logger.info("Waiting for %s seconds before checking container status",
|
||||||
|
os.environ["CONTAINER_STATUS_SLEEP"])
|
||||||
|
time.sleep(int(os.environ["CONTAINER_STATUS_SLEEP"]))
|
||||||
|
all_datanodes_container_status = \
|
||||||
|
ClusterUtils.find_all_datanodes_container_status(FILE, SCALE)
|
||||||
|
first_datanode_status = all_datanodes_container_status[0]
|
||||||
|
second_datanode_status = all_datanodes_container_status[1]
|
||||||
|
third_datanode_status = all_datanodes_container_status[2]
|
||||||
|
assert first_datanode_status == 'OPEN'
|
||||||
|
assert (second_datanode_status == 'CLOSED' and
|
||||||
|
third_datanode_status == 'CLOSED') or \
|
||||||
|
(second_datanode_status == 'OPEN' and
|
||||||
|
third_datanode_status == 'QUASI_CLOSED')
|
Loading…
Reference in New Issue
Block a user