diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..37d1834ca --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-2025 simplyblock GmbH + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/docker/Dockerfile b/docker/Dockerfile index ce1a83ae1..1e1f8c3bd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,12 +1,33 @@ # syntax=docker/dockerfile:1 FROM simplyblock/simplyblock:base_image +LABEL name="simplyblock" +LABEL vendor="Simplyblock" +LABEL version="1.0.0" +LABEL release="1" +LABEL summary="Simplyblock controlplane plane component" +LABEL description="Simplyblock controlplane plane container" +LABEL maintainer="developers@simplyblock.io" + +COPY LICENSE /licenses/LICENSE + WORKDIR /app COPY requirements.txt . -RUN pip3 install -r requirements.txt +RUN pip3 install --no-cache-dir -r requirements.txt + COPY . /app RUN python setup.py install + +RUN if [ -d /usr/share/terminfo ]; then \ + find /usr/share/terminfo -lname '*ncr260vt300wpp*' -exec rm -f {} + ; \ + rm -f /usr/share/terminfo/n/ncr260vt300wpp || true ; \ + fi + +RUN useradd -u 1001 -r -g 0 -d /app -s /sbin/nologin simplyblock && \ + chown -R 1001:0 /app + +USER 1001 diff --git a/docker/Dockerfile_base b/docker/Dockerfile_base index 226188c96..735d331b1 100644 --- a/docker/Dockerfile_base +++ b/docker/Dockerfile_base @@ -38,3 +38,4 @@ RUN pip3 install setuptools --upgrade COPY requirements.txt requirements.txt RUN pip3 install -r requirements.txt + diff --git a/docs/talos.md b/docs/talos.md index 47ff817d5..f1406ef38 100644 --- a/docs/talos.md +++ b/docs/talos.md @@ -19,26 +19,12 @@ kubectl label namespace simplyblock \ --overwrite ``` - -Patch the host machine so that OpenEBS could work - Create a machine config patch with the contents below and save as patch.yaml ``` cat > patch.yaml <<'EOF' machine: sysctls: vm.nr_hugepages: "1024" - nodeLabels: - openebs.io/engine: mayastor - kubelet: - extraMounts: - - destination: /var/openebs/local - type: bind - source: /var/openebs/local - options: - - rbind - - rshared - - rw EOF talosctl -e -n patch mc -p @patch.yaml diff --git a/requirements.txt b/requirements.txt index 030cca8e0..9ee458f00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,3 +24,4 @@ flask-openapi3 jsonschema fastapi uvicorn +prometheus_api_client \ No newline at end of file diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py index 103123934..d7ac4628a 100644 --- a/simplyblock_core/cluster_ops.py +++ b/simplyblock_core/cluster_ops.py @@ -25,6 +25,7 @@ from simplyblock_core.models.stats import LVolStatObject, ClusterStatObject, NodeStatObject, DeviceStatObject from simplyblock_core.models.nvme_device import NVMeDevice from simplyblock_core.models.storage_node import StorageNode +from simplyblock_core.prom_client import PromClient from simplyblock_core.utils import pull_docker_image_with_retry logger = utils.get_logger(__name__) @@ -371,8 +372,6 @@ def create_cluster(blk_size, page_size_in_blocks, cli_pass, cluster.write_to_db(db_controller.kv_store) - qos_controller.add_class("Default", 100, cluster.get_id()) - cluster_events.cluster_create(cluster) mgmt_node_ops.add_mgmt_node(dev_ip, mode, cluster.uuid) @@ -1002,16 +1001,11 @@ def list_all_info(cluster_id) -> str: def get_capacity(cluster_id, history, records_count=20) -> t.List[dict]: - cluster = db_controller.get_cluster_by_id(cluster_id) - - if history: - records_number = utils.parse_history_param(history) - if not records_number: - raise ValueError(f"Error parsing history string: {history}") - else: - records_number = 20 - - records = db_controller.get_cluster_capacity(cluster, records_number) + try: + _ = db_controller.get_cluster_by_id(cluster_id) + except KeyError: + logger.error(f"Cluster not found: {cluster_id}") + return [] cap_stats_keys = [ "date", @@ -1022,20 +1016,17 @@ def get_capacity(cluster_id, history, records_count=20) -> t.List[dict]: "size_util", "size_prov_util", ] + prom_client = PromClient(cluster_id) + records = prom_client.get_cluster_metrics(cluster_id, cap_stats_keys, history) return utils.process_records(records, records_count, keys=cap_stats_keys) def get_iostats_history(cluster_id, history_string, records_count=20, with_sizes=False) -> t.List[dict]: - cluster = db_controller.get_cluster_by_id(cluster_id) - - if history_string: - records_number = utils.parse_history_param(history_string) - if not records_number: - raise ValueError(f"Error parsing history string: {history_string}") - else: - records_number = 20 - - records = db_controller.get_cluster_stats(cluster, records_number) + try: + _ = db_controller.get_cluster_by_id(cluster_id) + except KeyError: + logger.error(f"Cluster not found: {cluster_id}") + return [] io_stats_keys = [ "date", @@ -1073,6 +1064,9 @@ def get_iostats_history(cluster_id, history_string, records_count=20, with_sizes "write_latency_ticks", ] ) + + prom_client = PromClient(cluster_id) + records = prom_client.get_cluster_metrics(cluster_id, io_stats_keys, history_string) # combine records return utils.process_records(records, records_count, keys=io_stats_keys) @@ -1137,6 +1131,7 @@ def get_logs(cluster_id, limit=50, **kwargs) -> t.List[dict]: if record.event in ["device_status", "node_status"]: msg = msg+f" ({record.count})" + logger.debug(record) out.append({ "Date": record.get_date_string(), "NodeId": record.node_id, @@ -1159,10 +1154,6 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, logger.info("Updating mgmt cluster") if cluster.mode == "docker": - sbcli=constants.SIMPLY_BLOCK_CLI_NAME - subprocess.check_call(f"pip install {sbcli} --upgrade".split(' ')) - logger.info(f"{sbcli} upgraded") - cluster_docker = utils.get_docker_client(cluster_id) logger.info(f"Pulling image {constants.SIMPLY_BLOCK_DOCKER_IMAGE}") pull_docker_image_with_retry(cluster_docker, constants.SIMPLY_BLOCK_DOCKER_IMAGE) @@ -1176,9 +1167,13 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, for service in cluster_docker.services.list(): if image_parts in service.attrs['Spec']['Labels']['com.docker.stack.image'] or \ "simplyblock" in service.attrs['Spec']['Labels']['com.docker.stack.image']: - logger.info(f"Updating service {service.name}") - service.update(image=service_image, force_update=True) - service_names.append(service.attrs['Spec']['Name']) + if service.name in ["app_CachingNodeMonitor", "app_CachedLVolStatsCollector"]: + logger.info(f"Removing service {service.name}") + service.remove() + else: + logger.info(f"Updating service {service.name}") + service.update(image=service_image, force_update=True) + service_names.append(service.attrs['Spec']['Name']) if "app_SnapshotMonitor" not in service_names: logger.info("Creating snapshot monitor service") @@ -1191,6 +1186,18 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, networks=["host"], constraints=["node.role == manager"] ) + + if "app_TasksRunnerLVolSyncDelete" not in service_names: + logger.info("Creating lvol sync delete service") + cluster_docker.services.create( + image=service_image, + command="python simplyblock_core/services/tasks_runner_sync_lvol_del.py", + name="app_TasksRunnerLVolSyncDelete", + mounts=["/etc/foundationdb:/etc/foundationdb"], + env=["SIMPLYBLOCK_LOG_LEVEL=DEBUG"], + networks=["host"], + constraints=["node.role == manager"] + ) logger.info("Done updating mgmt cluster") elif cluster.mode == "kubernetes": @@ -1270,7 +1277,12 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, logger.info(f"Restarting node: {node.get_id()} with SPDK image: {spdk_image}") else: logger.info(f"Restarting node: {node.get_id()}") - storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image) + try: + storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image) + except Exception as e: + logger.debug(e) + logger.error(f"Failed to restart node: {node.get_id()}") + return logger.info("Done") diff --git a/simplyblock_core/constants.py b/simplyblock_core/constants.py index 41824c73a..fd388c140 100644 --- a/simplyblock_core/constants.py +++ b/simplyblock_core/constants.py @@ -133,12 +133,12 @@ def get_config_var(name, default=None): LVOL_NVME_CONNECT_NR_IO_QUEUES=3 LVOL_NVME_KEEP_ALIVE_TO=10 LVOL_NVME_KEEP_ALIVE_TO_TCP=7 -LVOL_NVMF_PORT_START=int(os.getenv('LVOL_NVMF_PORT_START', 9100)) +LVOL_NVMF_PORT_ENV = os.getenv("LVOL_NVMF_PORT_START", "") +LVOL_NVMF_PORT_START = int(LVOL_NVMF_PORT_ENV) if LVOL_NVMF_PORT_ENV else 9100 QPAIR_COUNT=32 CLIENT_QPAIR_COUNT=3 NVME_TIMEOUT_US=8000000 NVMF_MAX_SUBSYSTEMS=50000 -HA_JM_COUNT=3 KATO=10000 ACK_TO=11 BDEV_RETRY=0 @@ -224,4 +224,5 @@ def get_config_var(name, default=None): qos_class_meta_and_migration_weight_percent = 25 -MIG_PARALLEL_JOBS = 16 \ No newline at end of file +MIG_PARALLEL_JOBS = 64 +MIG_JOB_SIZE = 64 diff --git a/simplyblock_core/controllers/device_controller.py b/simplyblock_core/controllers/device_controller.py index 8e684c942..2c624b9b1 100644 --- a/simplyblock_core/controllers/device_controller.py +++ b/simplyblock_core/controllers/device_controller.py @@ -6,6 +6,7 @@ from simplyblock_core.db_controller import DBController from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice from simplyblock_core.models.storage_node import StorageNode +from simplyblock_core.prom_client import PromClient from simplyblock_core.rpc_client import RPCClient @@ -440,7 +441,7 @@ def get_device_capacity(device_id, history, records_count=20, parse_sizes=True): else: records_number = 20 - records = db_controller.get_device_capacity(device, records_number) + # records = db_controller.get_device_capacity(device, records_number) cap_stats_keys = [ "date", "size_total", @@ -448,6 +449,8 @@ def get_device_capacity(device_id, history, records_count=20, parse_sizes=True): "size_free", "size_util", ] + prom_client = PromClient(device.cluster_id) + records = prom_client.get_device_metrics(device_id, cap_stats_keys, history) records_list = utils.process_records(records, records_count, keys=cap_stats_keys) if not parse_sizes: @@ -474,15 +477,6 @@ def get_device_iostats(device_id, history, records_count=20, parse_sizes=True): logger.error("device not found") return False - if history: - records_number = utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 - - records_list = db_controller.get_device_stats(device, records_number) io_stats_keys = [ "date", "read_bytes", @@ -496,8 +490,10 @@ def get_device_iostats(device_id, history, records_count=20, parse_sizes=True): "write_io_ps", "write_latency_ps", ] + prom_client = PromClient(device.cluster_id) + records = prom_client.get_device_metrics(device_id, io_stats_keys, history) # combine records - new_records = utils.process_records(records_list, records_count, keys=io_stats_keys) + new_records = utils.process_records(records, records_count, keys=io_stats_keys) if not parse_sizes: return new_records @@ -657,81 +653,6 @@ def add_device(device_id, add_migration_task=True): tasks_controller.add_new_device_mig_task(device_id) return device_id - # - # # create partitions - # partitions = snode.num_partitions_per_dev - # rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) - # # look for partitions - # partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj) - # logger.debug("partitioned_devices") - # logger.debug(partitioned_devices) - # if len(partitioned_devices) == partitions+1: - # logger.info("Partitioned devices found") - # else: - # logger.info(f"Creating partitions for {device_obj.nvme_bdev}") - # storage_node_ops._create_device_partitions(rpc_client, device_obj, snode, partitions, snode.jm_percent) - # partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj) - # if len(partitioned_devices) == partitions+1: - # logger.info("Device partitions created") - # else: - # logger.error("Failed to create partitions") - # return False - # - # jm_part = partitioned_devices.pop(0) - # new_devices = [] - # dev_order = storage_node_ops.get_next_cluster_device_order(db_controller, snode.cluster_id) - # for dev in partitioned_devices: - # new_device = storage_node_ops._create_storage_device_stack(rpc_client, dev, snode, after_restart=False) - # if not new_device: - # logger.error("failed to create dev stack") - # continue - # - # new_device.cluster_device_order = dev_order - # dev_order += 1 - # device_events.device_create(new_device) - # new_devices.append(new_device) - # - # if new_devices: - # snode.nvme_devices.remove(device_obj) - # snode.nvme_devices.extend(new_devices) - # snode.write_to_db(db_controller.kv_store) - # else: - # logger.error("failed to create devices") - # return False - # - # for dev in new_devices: - # distr_controller.send_cluster_map_add_device(dev, snode) - # - # logger.info("Make other nodes connect to the node devices") - # snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id) - # for node in snodes: - # if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE: - # continue - # node.remote_devices = storage_node_ops._connect_to_remote_devs(node) - # node.write_to_db() - # for dev in new_devices: - # distr_controller.send_cluster_map_add_device(dev, node) - # - # for dev in new_devices: - # tasks_controller.add_new_device_mig_task(dev.get_id()) - # - # # add to jm raid - # if snode.jm_device and snode.jm_device.raid_bdev and jm_part: - # # looking for jm partition - # jm_dev_part = jm_part.nvme_bdev - # ret = rpc_client.get_bdevs(jm_dev_part) - # if ret: - # logger.info(f"JM part found: {jm_dev_part}") - # if snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_REMOVED]: - # restart_jm_device(snode.jm_device.get_id(), force=True, format_alceml=True) - # - # if snode.jm_device.status == JMDevice.STATUS_ONLINE and \ - # jm_dev_part not in snode.jm_device.jm_nvme_bdev_list: - # remove_jm_device(snode.jm_device.get_id(), force=True) - # restart_jm_device(snode.jm_device.get_id(), force=True) - # - # return "Done" - def device_set_failed_and_migrated(device_id): db_controller = DBController() diff --git a/simplyblock_core/controllers/health_controller.py b/simplyblock_core/controllers/health_controller.py index c013e2d58..0dde91243 100644 --- a/simplyblock_core/controllers/health_controller.py +++ b/simplyblock_core/controllers/health_controller.py @@ -9,7 +9,7 @@ from simplyblock_core.db_controller import DBController from simplyblock_core.fw_api_client import FirewallClient from simplyblock_core.models.cluster import Cluster -from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice +from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice from simplyblock_core.models.storage_node import StorageNode from simplyblock_core.rpc_client import RPCClient from simplyblock_core.snode_client import SNodeClient @@ -18,7 +18,7 @@ logger = utils.get_logger(__name__) -def check_bdev(name, *, rpc_client=None, bdev_names=None): +def check_bdev(name, *, rpc_client=None, bdev_names=None) -> bool: present = ( ((bdev_names is not None) and (name in bdev_names)) or (rpc_client is not None and (rpc_client.get_bdevs(name) is not None)) @@ -27,7 +27,7 @@ def check_bdev(name, *, rpc_client=None, bdev_names=None): return present -def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None): +def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None) -> bool: if rpc_client: subsystem = subsystems[0] if (subsystems := rpc_client.subsystem_list(nqn)) is not None else None elif nqns: @@ -59,7 +59,7 @@ def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None): for listener in listeners: logger.info(f"Checking listener {listener['traddr']}:{listener['trsvcid']} ... ok") - return bool(listeners) and namespaces + return bool(listeners) and bool(namespaces) def check_cluster(cluster_id): @@ -117,7 +117,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password, timeout=5, ret def _check_node_api(ip): try: - snode_api = SNodeClient(f"{ip}:5000", timeout=10, retry=2) + snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) logger.debug(f"Node API={ip}:5000") ret, _ = snode_api.is_live() logger.debug(f"snode is alive: {ret}") @@ -128,43 +128,35 @@ def _check_node_api(ip): return False -def _check_spdk_process_up(ip, rpc_port): - try: - snode_api = SNodeClient(f"{ip}:5000", timeout=10, retry=2) - logger.debug(f"Node API={ip}:5000") - is_up, _ = snode_api.spdk_process_is_up(rpc_port) - logger.debug(f"SPDK is {is_up}") - return is_up - except Exception as e: - logger.debug(e) - return False - - -def _check_port_on_node(snode, port_id): - try: - fw_api = FirewallClient(snode, timeout=5, retry=2) - iptables_command_output, _ = fw_api.get_firewall(snode.rpc_port) - if type(iptables_command_output) is str: - iptables_command_output = [iptables_command_output] - for rules in iptables_command_output: - result = jc.parse('iptables', rules) - for chain in result: - if chain['chain'] in ["INPUT", "OUTPUT"]: # type: ignore - for rule in chain['rules']: # type: ignore - if str(port_id) in rule['options']: # type: ignore - action = rule['target'] # type: ignore - if action in ["DROP"]: - return False - - # check RDMA port block - if snode.active_rdma: - rdma_fw_port_list = snode.rpc_client().nvmf_get_blocked_ports_rdma() - if port_id in rdma_fw_port_list: - return False +def _check_spdk_process_up(ip, rpc_port, cluster_id): + snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) + logger.debug(f"Node API={ip}:5000") + is_up, _ = snode_api.spdk_process_is_up(rpc_port, cluster_id) + logger.debug(f"SPDK is {is_up}") + return is_up + + +def check_port_on_node(snode, port_id): + fw_api = FirewallClient(snode, timeout=5, retry=2) + iptables_command_output, _ = fw_api.get_firewall(snode.rpc_port) + if type(iptables_command_output) is str: + iptables_command_output = [iptables_command_output] + for rules in iptables_command_output: + result = jc.parse('iptables', rules) + for chain in result: + if chain['chain'] in ["INPUT", "OUTPUT"]: # type: ignore + for rule in chain['rules']: # type: ignore + if str(port_id) in rule['options']: # type: ignore + action = rule['target'] # type: ignore + if action in ["DROP"]: + return False + + # check RDMA port block + if snode.active_rdma: + rdma_fw_port_list = snode.rpc_client().nvmf_get_blocked_ports_rdma() + if port_id in rdma_fw_port_list: + return False - return True - except Exception as e: - logger.error(e) return True @@ -175,7 +167,7 @@ def _check_node_ping(ip): else: return False -def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns=None): +def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns=None) -> bool: if not node.hublvol: logger.error(f"Node {node.get_id()} does not have a hublvol") return False @@ -235,15 +227,17 @@ def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns passed = False else: lvs_info_dict.append({"Key": k, "Value": v, "expected": " "}) - for line in utils.print_table(lvs_info_dict).splitlines(): - logger.info(line) + if not passed: + for line in utils.print_table(lvs_info_dict).splitlines(): + logger.info(line) except Exception as e: logger.exception(e) + return False return passed -def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=None, auto_fix=False): +def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=None, auto_fix=False) -> bool: db_controller = DBController() try: primary_node = db_controller.get_storage_node_by_id(node.lvstore_stack_secondary_1) @@ -294,6 +288,16 @@ def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=N passed = bool(ret) logger.info(f"Checking controller: {primary_node.hublvol.bdev_name} ... {passed}") + node_bdev = {} + ret = rpc_client.get_bdevs() + if ret: + for b in ret: + node_bdev[b['name']] = b + for al in b['aliases']: + node_bdev[al]= b + else: + node_bdev = [] + passed &= check_bdev(primary_node.hublvol.get_remote_bdev_name(), bdev_names=node_bdev) if not passed: return False @@ -331,20 +335,19 @@ def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=N else: lvs_info_dict.append({"Key": k, "Value": v, "expected": " "}) - for line in utils.print_table(lvs_info_dict).splitlines(): - logger.info(line) + if not passed: + for line in utils.print_table(lvs_info_dict).splitlines(): + logger.info(line) except Exception as e: logger.exception(e) + return False return passed def _check_node_lvstore( - lvstore_stack, node, auto_fix=False, node_bdev_names=None, stack_src_node=None): + lvstore_stack, node, auto_fix=False, node_bdev_names=None, stack_src_node=None) -> bool: db_controller = DBController() - lvstore_check = True logger.info(f"Checking distr stack on node : {node.get_id()}") - rpc_client = RPCClient( - node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=1) cluster = db_controller.get_cluster_by_id(node.cluster_id) if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: auto_fix = False @@ -367,12 +370,24 @@ def _check_node_lvstore( node_distribs_list = bdev["distribs_list"] if not node_bdev_names: - ret = rpc_client.get_bdevs() + try: + ret = node.rpc_client().get_bdevs() + except Exception as e: + logger.info(e) + return False + if ret: node_bdev_names = [b['name'] for b in ret] else: node_bdev_names = [] + nodes = {} + devices = {} + for n in db_controller.get_storage_nodes(): + nodes[n.get_id()] = n + for dev in n.nvme_devices: + devices[dev.get_id()] = dev + for distr in distribs_list: if distr in node_bdev_names: logger.info(f"Checking distr bdev : {distr} ... ok") @@ -386,22 +401,34 @@ def _check_node_lvstore( for jm in jm_names: logger.info(jm) logger.info("Checking Distr map ...") - ret = rpc_client.distr_get_cluster_map(distr) + try: + ret = node.rpc_client().distr_get_cluster_map(distr) + except Exception as e: + logger.info(f"Failed to get cluster map: {e}") + return False if not ret: logger.error("Failed to get cluster map") - lvstore_check = False + return False else: - results, is_passed = distr_controller.parse_distr_cluster_map(ret) + results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices) if results: - logger.info(utils.print_table(results)) logger.info(f"Checking Distr map ... {is_passed}") - if not is_passed and auto_fix: + if is_passed: + continue + + elif not auto_fix: + return False + + else: # is_passed is False and auto_fix is True + logger.info(utils.print_table(results)) for result in results: if result['Results'] == 'failed': if result['Kind'] == "Device": if result['Found Status']: dev = db_controller.get_storage_device_by_id(result['UUID']) - if dev.status == NVMeDevice.STATUS_ONLINE: + dev_node = db_controller.get_storage_node_by_id(dev.node_id) + if dev.status == NVMeDevice.STATUS_ONLINE and dev_node.status in [ + StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: try: remote_bdev = storage_node_ops.connect_device( f"remote_{dev.alceml_bdev}", dev, node, @@ -413,44 +440,66 @@ def _check_node_lvstore( if dev.get_id() == rem_dev.get_id(): continue new_remote_devices.append(rem_dev) - dev.remote_bdev = remote_bdev - new_remote_devices.append(dev) + + remote_device = RemoteDevice() + remote_device.uuid = dev.uuid + remote_device.alceml_name = dev.alceml_name + remote_device.node_id = dev.node_id + remote_device.size = dev.size + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.nvmf_multipath = dev.nvmf_multipath + remote_device.remote_bdev = remote_bdev + new_remote_devices.append(remote_device) n.remote_devices = new_remote_devices n.write_to_db() distr_controller.send_dev_status_event(dev, dev.status, node) except Exception as e: logger.error(f"Failed to connect to {dev.get_id()}: {e}") + else: + distr_controller.send_dev_status_event(dev, dev.status, node) + if result['Kind'] == "Node": n = db_controller.get_storage_node_by_id(result['UUID']) distr_controller.send_node_status_event(n, n.status, node) - ret = rpc_client.distr_get_cluster_map(distr) + + try: + ret = node.rpc_client().distr_get_cluster_map(distr) + except Exception as e: + logger.error(e) + return False if not ret: logger.error("Failed to get cluster map") - lvstore_check = False + return False else: - results, is_passed = distr_controller.parse_distr_cluster_map(ret) + results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices) logger.info(f"Checking Distr map ... {is_passed}") + if not is_passed: + return False else: logger.error("Failed to parse distr cluster map") - lvstore_check &= is_passed + return False else: logger.info(f"Checking distr bdev : {distr} ... not found") - lvstore_check = False + return False if raid: if raid in node_bdev_names: logger.info(f"Checking raid bdev: {raid} ... ok") else: logger.info(f"Checking raid bdev: {raid} ... not found") - lvstore_check = False + return False if bdev_lvstore: - ret = rpc_client.bdev_lvol_get_lvstores(bdev_lvstore) + try: + ret = node.rpc_client().bdev_lvol_get_lvstores(bdev_lvstore) + except Exception as e: + logger.error(e) + return False if ret: logger.info(f"Checking lvstore: {bdev_lvstore} ... ok") else: logger.info(f"Checking lvstore: {bdev_lvstore} ... not found") - lvstore_check = False - return lvstore_check + return False + return True def check_node(node_id, with_devices=True): db_controller = DBController() @@ -493,13 +542,19 @@ def check_node(node_id, with_devices=True): if snode.lvstore_stack_secondary_1: try: n = db_controller.get_storage_node_by_id(snode.lvstore_stack_secondary_1) - lvol_port_check = _check_port_on_node(snode, n.lvol_subsys_port) + lvol_port_check = check_port_on_node(snode, n.lvol_subsys_port) logger.info(f"Check: node {snode.mgmt_ip}, port: {n.lvol_subsys_port} ... {lvol_port_check}") except KeyError: - pass + logger.error("node not found") + except Exception: + logger.error("Check node port failed, connection error") + if not snode.is_secondary_node: - lvol_port_check = _check_port_on_node(snode, snode.lvol_subsys_port) - logger.info(f"Check: node {snode.mgmt_ip}, port: {snode.lvol_subsys_port} ... {lvol_port_check}") + try: + lvol_port_check = check_port_on_node(snode, snode.lvol_subsys_port) + logger.info(f"Check: node {snode.mgmt_ip}, port: {snode.lvol_subsys_port} ... {lvol_port_check}") + except Exception: + logger.error("Check node port failed, connection error") is_node_online = ping_check and node_api_check and node_rpc_check diff --git a/simplyblock_core/controllers/lvol_controller.py b/simplyblock_core/controllers/lvol_controller.py index 4d7a5aad3..be8c4fc55 100644 --- a/simplyblock_core/controllers/lvol_controller.py +++ b/simplyblock_core/controllers/lvol_controller.py @@ -15,6 +15,7 @@ from simplyblock_core.models.pool import Pool from simplyblock_core.models.lvol_model import LVol from simplyblock_core.models.storage_node import StorageNode +from simplyblock_core.prom_client import PromClient from simplyblock_core.rpc_client import RPCClient logger = lg.getLogger() @@ -1521,19 +1522,11 @@ def get_capacity(lvol_uuid, history, records_count=20, parse_sizes=True): db_controller = DBController() try: lvol = db_controller.get_lvol_by_id(lvol_uuid) + pool = db_controller.get_pool_by_id(lvol.pool_uuid) except KeyError as e: logger.error(e) return False - if history: - records_number = utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 - - records_list = db_controller.get_lvol_stats(lvol, limit=records_number) cap_stats_keys = [ "date", "size_total", @@ -1543,6 +1536,8 @@ def get_capacity(lvol_uuid, history, records_count=20, parse_sizes=True): "size_prov", "size_prov_util" ] + prom_client = PromClient(pool.cluster_id) + records_list = prom_client.get_lvol_metrics(lvol_uuid, cap_stats_keys, history) new_records = utils.process_records(records_list, records_count, keys=cap_stats_keys) if not parse_sizes: @@ -1564,19 +1559,11 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si db_controller = DBController() try: lvol = db_controller.get_lvol_by_id(lvol_uuid) + pool = db_controller.get_pool_by_id(lvol.pool_uuid) except KeyError as e: logger.error(e) return False - if history: - records_number = utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 - - records_list = db_controller.get_lvol_stats(lvol, limit=records_number) io_stats_keys = [ "date", "read_bytes", @@ -1587,7 +1574,6 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si "write_bytes_ps", "write_io_ps", "write_latency_ps", - "connected_clients", ] if with_sizes: io_stats_keys.extend( @@ -1612,6 +1598,8 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si "write_latency_ticks", ] ) + prom_client = PromClient(pool.cluster_id) + records_list = prom_client.get_lvol_metrics(lvol_uuid, io_stats_keys, history) # combine records new_records = utils.process_records(records_list, records_count, keys=io_stats_keys) @@ -1630,7 +1618,6 @@ def get_io_stats(lvol_uuid, history, records_count=20, parse_sizes=True, with_si "Write speed": utils.humanbytes(record['write_bytes_ps']), "Write IOPS": record['write_io_ps'], "Write lat": record['write_latency_ps'], - "Con": record['connected_clients'], }) return out diff --git a/simplyblock_core/controllers/pool_controller.py b/simplyblock_core/controllers/pool_controller.py index db7016d7d..2440a6bd7 100644 --- a/simplyblock_core/controllers/pool_controller.py +++ b/simplyblock_core/controllers/pool_controller.py @@ -12,6 +12,7 @@ from simplyblock_core.controllers import pool_events, lvol_controller from simplyblock_core.db_controller import DBController from simplyblock_core.models.pool import Pool +from simplyblock_core.prom_client import PromClient from simplyblock_core.rpc_client import RPCClient logger = lg.getLogger() @@ -321,15 +322,18 @@ def get_io_stats(pool_id, history, records_count=20): logger.error(f"Pool not found {pool_id}") return False - if history: - records_number = utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 + io_stats_keys = [ + "date", + "read_bytes_ps", + "read_io_ps", + "read_latency_ps", + "write_bytes_ps", + "write_io_ps", + "write_latency_ps", + ] - out = db_controller.get_pool_stats(pool, records_number) + prom_client = PromClient(pool.cluster_id) + out = prom_client.get_pool_metrics(pool_id, io_stats_keys, history) new_records = utils.process_records(out, records_count) return utils.print_table([ diff --git a/simplyblock_core/controllers/tasks_controller.py b/simplyblock_core/controllers/tasks_controller.py index 689027d08..dab539943 100644 --- a/simplyblock_core/controllers/tasks_controller.py +++ b/simplyblock_core/controllers/tasks_controller.py @@ -70,6 +70,11 @@ def _add_task(function_name, cluster_id, node_id, device_id, if task_id: logger.info(f"Task found, skip adding new task: {task_id}") return False + elif function_name == JobSchedule.FN_LVOL_SYNC_DEL: + task_id = get_lvol_sync_del_task(cluster_id, node_id, function_params['lvol_bdev_name']) + if task_id: + logger.info(f"Task found, skip adding new task: {task_id}") + return False task_obj = JobSchedule() task_obj.uuid = str(uuid.uuid4()) @@ -95,11 +100,13 @@ def add_device_mig_task(device_id_list, cluster_id): device = db.get_storage_device_by_id(device_id_list[0]) tasks = db.get_job_tasks(cluster_id) + master_task = None for task in tasks: if task.function_name == JobSchedule.FN_BALANCING_AFTER_NODE_RESTART : if task.status != JobSchedule.STATUS_DONE and task.canceled is False: - logger.info(f"Task found, skip adding new task: {task.get_id()}") - return False + logger.info("Master task found, skip adding new master task") + master_task = task + break for node in db.get_storage_nodes_by_cluster_id(cluster_id): if node.status == StorageNode.STATUS_REMOVED: @@ -112,16 +119,19 @@ def add_device_mig_task(device_id_list, cluster_id): if task_id: sub_tasks.append(task_id) if sub_tasks: - task_obj = JobSchedule() - task_obj.uuid = str(uuid.uuid4()) - task_obj.cluster_id = cluster_id - task_obj.date = int(time.time()) - task_obj.function_name = JobSchedule.FN_BALANCING_AFTER_NODE_RESTART - task_obj.sub_tasks = sub_tasks - task_obj.status = JobSchedule.STATUS_NEW - task_obj.write_to_db(db.kv_store) - tasks_events.task_create(task_obj) - + if master_task: + master_task.sub_tasks.extend(sub_tasks) + master_task.write_to_db() + else: + task_obj = JobSchedule() + task_obj.uuid = str(uuid.uuid4()) + task_obj.cluster_id = cluster_id + task_obj.date = int(time.time()) + task_obj.function_name = JobSchedule.FN_BALANCING_AFTER_NODE_RESTART + task_obj.sub_tasks = sub_tasks + task_obj.status = JobSchedule.STATUS_NEW + task_obj.write_to_db(db.kv_store) + tasks_events.task_create(task_obj) return True @@ -150,13 +160,15 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs): return False data = [] - tasks = db.get_job_tasks(cluster_id, reverse=True, limit=limit) + tasks = db.get_job_tasks(cluster_id, reverse=True) tasks.reverse() if is_json is True: for t in tasks: if t.function_name == JobSchedule.FN_DEV_MIG: continue data.append(t.get_clean_dict()) + if len(data)+1 > limit > 0: + return json.dumps(data, indent=2) return json.dumps(data, indent=2) for task in tasks: @@ -166,7 +178,7 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs): retry = f"{task.retry}/{task.max_retry}" else: retry = f"{task.retry}" - + logger.debug(task) upd = task.updated_at if upd: try: @@ -192,6 +204,8 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs): "Result": task.function_result, "Updated At": upd or "", }) + if len(data)+1 > limit > 0: + return utils.print_table(data) return utils.print_table(data) @@ -234,6 +248,7 @@ def get_subtasks(master_task_id): except Exception as e: logger.error(e) + logger.debug(sub_task) data.append({ "Task ID": sub_task.uuid, "Node ID / Device ID": f"{sub_task.node_id}\n{sub_task.device_id}", @@ -303,7 +318,8 @@ def add_new_device_mig_task(device_id): def add_node_add_task(cluster_id, function_params): - return _add_task(JobSchedule.FN_NODE_ADD, cluster_id, "", "", function_params=function_params) + return _add_task(JobSchedule.FN_NODE_ADD, cluster_id, "", "", + function_params=function_params, max_retry=11) def get_active_node_tasks(cluster_id, node_id): @@ -334,7 +350,7 @@ def get_new_device_mig_task(cluster_id, node_id, distr_name, dev_id=None): def get_device_mig_task(cluster_id, node_id, device_id, distr_name): tasks = db.get_job_tasks(cluster_id) for task in tasks: - if task.function_name == JobSchedule.FN_DEV_MIG and task.node_id == node_id and task.device_id == device_id: + if task.function_name == JobSchedule.FN_DEV_MIG and task.node_id == node_id: if task.status != JobSchedule.STATUS_DONE and task.canceled is False \ and "distr_name" in task.function_params and task.function_params["distr_name"] == distr_name: return task.uuid @@ -386,3 +402,21 @@ def get_jc_comp_task(cluster_id, node_id, jm_vuid=0): if jm_vuid and "jm_vuid" in task.function_params and task.function_params["jm_vuid"] == jm_vuid: return task.uuid return False + + +def add_lvol_sync_del_task(cluster_id, node_id, lvol_bdev_name): + return _add_task(JobSchedule.FN_LVOL_SYNC_DEL, cluster_id, node_id, "", + function_params={"lvol_bdev_name": lvol_bdev_name}, max_retry=10) + +def get_lvol_sync_del_task(cluster_id, node_id, lvol_bdev_name=None): + tasks = db.get_job_tasks(cluster_id) + for task in tasks: + if task.function_name == JobSchedule.FN_LVOL_SYNC_DEL and task.node_id == node_id : + if task.status != JobSchedule.STATUS_DONE and task.canceled is False: + if lvol_bdev_name: + if "lvol_bdev_name" in task.function_params and task.function_params["lvol_bdev_name"] == lvol_bdev_name: + return task.uuid + else: + return task.uuid + return False + diff --git a/simplyblock_core/distr_controller.py b/simplyblock_core/distr_controller.py index e50115f62..5a169cbd8 100644 --- a/simplyblock_core/distr_controller.py +++ b/simplyblock_core/distr_controller.py @@ -46,8 +46,9 @@ def send_node_status_event(node, node_status, target_node=None): continue logger.info(f"Sending to: {node.get_id()}") rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=3, retry=1) - ret = rpc_client.distr_status_events_update(events) - if not ret: + try: + rpc_client.distr_status_events_update(events) + except Exception: logger.warning("Failed to send event update") @@ -95,9 +96,9 @@ def send_dev_status_event(device, status, target_node=None): "storage_ID": storage_ID, "status": dev_status}]} logger.debug(f"Sending event updates, device: {storage_ID}, status: {dev_status}, node: {node.get_id()}") - rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=3, retry=1) - ret = rpc_client.distr_status_events_update(events) - if not ret: + try: + node.rpc_client(timeout=3, retry=1).distr_status_events_update(events) + except Exception: logger.warning("Failed to send event update") @@ -192,12 +193,20 @@ def get_distr_cluster_map(snodes, target_node, distr_name=""): return cl_map -def parse_distr_cluster_map(map_string): +def parse_distr_cluster_map(map_string, nodes=None, devices=None): db_controller = DBController() node_pattern = re.compile(r".*uuid_node=(.*) status=(.*)$", re.IGNORECASE) device_pattern = re.compile( r".*storage_ID=(.*) status=(.*) uuid_device=(.*) storage_bdev_name=(.*)$", re.IGNORECASE) + if not nodes or not devices: + nodes = {} + devices = {} + for n in db_controller.get_storage_nodes(): + nodes[n.get_id()] = n + for dev in n.nvme_devices: + devices[dev.get_id()] = dev + results = [] passed = True for line in map_string.split('\n'): @@ -213,8 +222,7 @@ def parse_distr_cluster_map(map_string): "Results": "", } try: - nd = db_controller.get_storage_node_by_id(node_id) - node_status = nd.status + node_status = nodes[node_id].status if node_status == StorageNode.STATUS_SCHEDULABLE: node_status = StorageNode.STATUS_UNREACHABLE data["Desired Status"] = node_status @@ -238,7 +246,7 @@ def parse_distr_cluster_map(map_string): "Results": "", } try: - sd = db_controller.get_storage_device_by_id(device_id) + sd = devices[device_id] data["Desired Status"] = sd.status if sd.status == status: data["Results"] = "ok" @@ -252,38 +260,26 @@ def parse_distr_cluster_map(map_string): return results, passed -def send_cluster_map_to_node(node): +def send_cluster_map_to_node(node: StorageNode): db_controller = DBController() snodes = db_controller.get_storage_nodes_by_cluster_id(node.cluster_id) - rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=10) - - # if node.lvstore_stack_secondary_1: - # for snode in db_controller.get_primary_storage_nodes_by_secondary_node_id(node.get_id()): - # for bdev in snode.lvstore_stack: - # if bdev['type'] == "bdev_distr": - # cluster_map_data = get_distr_cluster_map(snodes, node, bdev["name"]) - # ret = rpc_client.distr_send_cluster_map(cluster_map_data) - # if not ret: - # logger.error("Failed to send cluster map") - # return False - # return True - # else: cluster_map_data = get_distr_cluster_map(snodes, node) - ret = rpc_client.distr_send_cluster_map(cluster_map_data) - if not ret: + try: + node.rpc_client(timeout=10).distr_send_cluster_map(cluster_map_data) + except Exception: logger.error("Failed to send cluster map") logger.info(cluster_map_data) return False return True -def send_cluster_map_to_distr(node, distr_name): +def send_cluster_map_to_distr(node: StorageNode, distr_name: str): db_controller = DBController() snodes = db_controller.get_storage_nodes_by_cluster_id(node.cluster_id) - rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=10) cluster_map_data = get_distr_cluster_map(snodes, node, distr_name) - ret = rpc_client.distr_send_cluster_map(cluster_map_data) - if not ret: + try: + node.rpc_client(timeout=10).distr_send_cluster_map(cluster_map_data) + except Exception: logger.error("Failed to send cluster map") logger.info(cluster_map_data) return False @@ -294,14 +290,13 @@ def send_cluster_map_add_node(snode, target_node): if target_node.status != StorageNode.STATUS_ONLINE: return False logger.info(f"Sending to: {target_node.get_id()}") - rpc_client = RPCClient(target_node.mgmt_ip, target_node.rpc_port, target_node.rpc_username, target_node.rpc_password, timeout=5) - cluster_map_data = get_distr_cluster_map([snode], target_node) cl_map = { "map_cluster": cluster_map_data['map_cluster'], "map_prob": cluster_map_data['map_prob']} - ret = rpc_client.distr_add_nodes(cl_map) - if not ret: + try: + target_node.rpc_client(timeout=10).distr_add_nodes(cl_map) + except Exception: logger.error("Failed to send cluster map") return False return True @@ -355,8 +350,9 @@ def send_cluster_map_add_device(device: NVMeDevice, target_node: StorageNode): "weight": dev_w_gib, }} } - ret = rpc_client.distr_add_devices(cl_map) - if not ret: + try: + rpc_client.distr_add_devices(cl_map) + except Exception: logger.error("Failed to send cluster map") return False return True diff --git a/simplyblock_core/env_var b/simplyblock_core/env_var index f3e377ee4..f34a430a9 100644 --- a/simplyblock_core/env_var +++ b/simplyblock_core/env_var @@ -1,5 +1,5 @@ SIMPLY_BLOCK_COMMAND_NAME=sbcli-dev -SIMPLY_BLOCK_VERSION=19.2.23 +SIMPLY_BLOCK_VERSION=19.2.27 SIMPLY_BLOCK_DOCKER_IMAGE=public.ecr.aws/simply-block/simplyblock:main SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=public.ecr.aws/simply-block/ultra:main-latest diff --git a/simplyblock_core/fw_api_client.py b/simplyblock_core/fw_api_client.py index d17255c80..074bcc3dc 100644 --- a/simplyblock_core/fw_api_client.py +++ b/simplyblock_core/fw_api_client.py @@ -41,7 +41,7 @@ def _request(self, method, path, payload=None): response = self.session.request(method, self.url+path, data=data, timeout=self.timeout, params=params) except Exception as e: - raise e + raise FirewallClientException(str(e)) logger.debug("Response: status_code: %s, content: %s", response.status_code, response.content) diff --git a/simplyblock_core/models/cluster.py b/simplyblock_core/models/cluster.py index fd4802771..620309f77 100644 --- a/simplyblock_core/models/cluster.py +++ b/simplyblock_core/models/cluster.py @@ -45,7 +45,7 @@ class Cluster(BaseModel): distr_npcs: int = 0 enable_node_affinity: bool = False grafana_endpoint: str = "" - mode: str = "" + mode: str = "docker" grafana_secret: str = "" contact_point: str = "" ha_type: str = "single" diff --git a/simplyblock_core/models/job_schedule.py b/simplyblock_core/models/job_schedule.py index 3d87a9aca..bbdcd7871 100644 --- a/simplyblock_core/models/job_schedule.py +++ b/simplyblock_core/models/job_schedule.py @@ -22,6 +22,7 @@ class JobSchedule(BaseModel): FN_BALANCING_AFTER_DEV_REMOVE = "balancing_on_dev_rem" FN_BALANCING_AFTER_DEV_EXPANSION = "balancing_on_dev_add" FN_JC_COMP_RESUME = "jc_comp_resume" + FN_LVOL_SYNC_DEL = "lvol_sync_del" canceled: bool = False cluster_id: str = "" diff --git a/simplyblock_core/models/nvme_device.py b/simplyblock_core/models/nvme_device.py index b86e25c44..761d425da 100644 --- a/simplyblock_core/models/nvme_device.py +++ b/simplyblock_core/models/nvme_device.py @@ -47,19 +47,12 @@ class NVMeDevice(BaseModel): nvmf_nqn: str = "" nvmf_port: int = 0 nvmf_multipath: bool = False - overload_percentage: int = 0 # Unused - partition_jm_bdev: str = "" # Unused - partition_jm_size: int = 0 # Unused - partition_main_bdev: str = "" # Unused - partition_main_size: int = 0 # Unused - partitions_count: int = 0 # Unused pcie_address: str = "" physical_label: int = 0 pt_bdev: str = "" qos_bdev: str = "" remote_bdev: str = "" retries_exhausted: bool = False - sequential_number: int = 0 # Unused serial_number: str = "" size: int = -1 testing_bdev: str = "" @@ -73,3 +66,18 @@ class JMDevice(NVMeDevice): jm_bdev: str = "" jm_nvme_bdev_list: List[str] = [] raid_bdev: str = "" + + +class RemoteDevice(BaseModel): + + remote_bdev: str = "" + alceml_name: str = "" + node_id: str = "" + size: int = -1 + nvmf_multipath: bool = False + + +class RemoteJMDevice(RemoteDevice): + + jm_bdev: str = "" + diff --git a/simplyblock_core/models/storage_node.py b/simplyblock_core/models/storage_node.py index 8c76d3649..b2ad4c6f5 100644 --- a/simplyblock_core/models/storage_node.py +++ b/simplyblock_core/models/storage_node.py @@ -7,7 +7,7 @@ from simplyblock_core.models.base_model import BaseNodeObject from simplyblock_core.models.hublvol import HubLVol from simplyblock_core.models.iface import IFace -from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice +from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice, RemoteJMDevice from simplyblock_core.rpc_client import RPCClient, RPCException logger = utils.get_logger(__name__) @@ -79,8 +79,8 @@ class StorageNode(BaseNodeObject): pollers_mask: str = "" primary_ip: str = "" raid: str = "" - remote_devices: List[NVMeDevice] = [] - remote_jm_devices: List[JMDevice] = [] + remote_devices: List[RemoteDevice] = [] + remote_jm_devices: List[RemoteJMDevice] = [] rpc_password: str = "" rpc_port: int = -1 rpc_username: str = "" @@ -102,7 +102,6 @@ class StorageNode(BaseNodeObject): hublvol: HubLVol = None # type: ignore[assignment] active_tcp: bool = True active_rdma: bool = False - lvol_sync_del_queue: List[str] = [] def rpc_client(self, **kwargs): """Return rpc client to this node diff --git a/simplyblock_core/prom_client.py b/simplyblock_core/prom_client.py new file mode 100644 index 000000000..82756161b --- /dev/null +++ b/simplyblock_core/prom_client.py @@ -0,0 +1,126 @@ +import logging +import re +from datetime import datetime, timedelta + +from simplyblock_core.db_controller import DBController +from simplyblock_core.models.mgmt_node import MgmtNode + +from prometheus_api_client import PrometheusConnect + +logger = logging.getLogger() + + +class PromClientException(Exception): + def __init__(self, message): + self.message = message + + +class PromClient: + + def __init__(self, cluster_id): + db_controller = DBController() + cluster_ip = None + for node in db_controller.get_mgmt_nodes(): + if node.cluster_id == cluster_id and node.status == MgmtNode.STATUS_ONLINE: + cluster_ip = node.mgmt_ip + break + if cluster_ip is None: + raise PromClientException("Cluster has no online mgmt nodes") + + self.ip_address = f"{cluster_ip}:9090" + self.url = 'http://%s/' % self.ip_address + self.client = PrometheusConnect(url=self.url, disable_ssl=True) + + def parse_history_param(self, history_string): + if not history_string: + logger.error("Invalid history value") + return False + + # process history + results = re.search(r'^(\d+[hmd])(\d+[hmd])?$', history_string.lower()) + if not results: + logger.error(f"Error parsing history string: {history_string}") + logger.info("History format: xxdyyh , e.g: 1d12h, 1d, 2h, 1m") + return False + + history_in_days = 0 + history_in_hours = 0 + history_in_minutes = 0 + for s in results.groups(): + if not s: + continue + ind = s[-1] + v = int(s[:-1]) + if ind == 'd': + history_in_days = v + if ind == 'h': + history_in_hours = v + if ind == 'm': + history_in_minutes = v + + history_in_hours += int(history_in_minutes/60) + history_in_minutes = history_in_minutes % 60 + history_in_days += int(history_in_hours/24) + history_in_hours = history_in_hours % 24 + return history_in_days, history_in_hours, history_in_minutes + + def get_metrics(self, key_prefix, metrics_lst, params, history=None): + start_time = datetime.now() - timedelta(minutes=10) + if history: + try: + days,hours,minutes = self.parse_history_param(history) + start_time = datetime.now() - timedelta(days=days, hours=hours, minutes=minutes) + except Exception: + raise PromClientException(f"Error parsing history string: {history}") + end_time = datetime.now() + data_out: list[dict] = [] + for key in metrics_lst: + metrics = self.client.get_metric_range_data( + f"{key_prefix}_{key}", label_config=params, start_time=start_time, end_time=end_time) + for m in metrics: + mt_name = key + mt_values = m["values"] + for i, v in enumerate(mt_values): + value = v[1] + try: + value = int(value) + except Exception: + pass + if len(data_out) <= i: + data_out.append({mt_name: value}) + else: + d = data_out[i] + if mt_name not in d: + d[mt_name] = value + + return data_out + + def get_cluster_metrics(self, cluster_uuid, metrics_lst, history=None): + params = { + "cluster": cluster_uuid + } + return self.get_metrics("cluster", metrics_lst, params, history) + + def get_node_metrics(self, snode_uuid, metrics_lst, history=None): + params = { + "snode": snode_uuid + } + return self.get_metrics("snode", metrics_lst, params, history) + + def get_device_metrics(self, device_uuid, metrics_lst, history=None): + params = { + "device": device_uuid + } + return self.get_metrics("device", metrics_lst, params, history) + + def get_lvol_metrics(self, lvol_uuid, metrics_lst, history=None): + params = { + "lvol": lvol_uuid + } + return self.get_metrics("lvol", metrics_lst, params, history) + + def get_pool_metrics(self, pool_uuid, metrics_lst, history=None): + params = { + "pool": pool_uuid + } + return self.get_metrics("pool", metrics_lst, params, history) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index 62f37b1e9..d8f51fb43 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -379,11 +379,11 @@ def create_lvol(self, name, size_in_mib, lvs_name, lvol_priority_class=0, ndcs=0 "clear_method": "unmap", "lvol_priority_class": lvol_priority_class, } - # if ndcs or npcs: - # params.update({ - # 'ndcs' : ndcs, - # 'npcs' : npcs, - # }) + if ndcs or npcs: + params.update({ + 'ndcs' : ndcs, + 'npcs' : npcs, + }) return self._request("bdev_lvol_create", params) def delete_lvol(self, name, del_async=False): @@ -922,7 +922,7 @@ def distr_migration_status(self, name): params = {"name": name} return self._request("distr_migration_status", params) - def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=False, job_size=1024, jobs=4): + def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=False, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS): params = { "name": name, "storage_ID": storage_ID, @@ -935,7 +935,7 @@ def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=Fals params["jobs"] = jobs return self._request("distr_migration_failure_start", params) - def distr_migration_expansion_start(self, name, qos_high_priority=False, job_size=1024, jobs=4): + def distr_migration_expansion_start(self, name, qos_high_priority=False, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS): params = { "name": name, } diff --git a/simplyblock_core/scripts/charts/Chart.yaml b/simplyblock_core/scripts/charts/Chart.yaml index 9d1b62643..380f67bcd 100644 --- a/simplyblock_core/scripts/charts/Chart.yaml +++ b/simplyblock_core/scripts/charts/Chart.yaml @@ -26,11 +26,6 @@ dependencies: version: "25.18.0" repository: "https://prometheus-community.github.io/helm-charts" condition: monitoring.enabled - - name: openebs - version: 3.9.0 - repository: https://openebs.github.io/charts - alias: openebs - condition: openebs.enabled - name: ingress-nginx version: 4.10.1 repository: "https://kubernetes.github.io/ingress-nginx" diff --git a/simplyblock_core/scripts/charts/templates/app_k8s.yaml b/simplyblock_core/scripts/charts/templates/app_k8s.yaml index ec2e5b378..d17ea092a 100644 --- a/simplyblock_core/scripts/charts/templates/app_k8s.yaml +++ b/simplyblock_core/scripts/charts/templates/app_k8s.yaml @@ -19,6 +19,8 @@ spec: app: simplyblock-admin-control spec: serviceAccountName: simplyblock-control-sa + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: simplyblock-control image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -133,6 +135,8 @@ spec: labels: app: simplyblock-storage-node-monitor spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: storage-node-monitor image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -183,6 +187,8 @@ spec: labels: app: simplyblock-mgmt-node-monitor spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: mgmt-node-monitor image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -235,6 +241,8 @@ spec: labels: app: simplyblock-lvol-stats-collector spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: lvol-stats-collector image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -284,7 +292,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-main-distr-event-collector - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: main-distr-event-collector image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -334,7 +344,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-capacity-and-stats-collector - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: capacity-and-stats-collector image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -385,7 +397,8 @@ spec: labels: app: simplyblock-capacity-monitor spec: - + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: capacity-monitor image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -435,7 +448,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-health-check - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: health-check image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -485,7 +500,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-device-monitor - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: device-monitor image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -535,7 +552,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-lvol-monitor - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: lvol-monitor image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -584,7 +603,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-snapshot-monitor - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: snapshot-monitor image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -685,7 +706,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-restart - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-restart image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -735,7 +758,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-migration - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-migration image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -784,7 +809,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-failed-migration - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-failed-migration image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -833,7 +860,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-cluster-status - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-cluster-status image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -882,7 +911,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-new-device-migration - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-new-device-migration image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -931,7 +962,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-node-add-runner - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-node-addrunner image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -983,7 +1016,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-port-allow - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-port-allow image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" @@ -1032,7 +1067,9 @@ spec: reloader.stakater.com/configmap: "simplyblock-fdb-cluster-config" labels: app: simplyblock-tasks-runner-jc-comp-resume - spec: + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet containers: - name: tasks-runner-jc-comp-resume image: "{{ .Values.image.simplyblock.repository }}:{{ .Values.image.simplyblock.tag }}" diff --git a/simplyblock_core/scripts/charts/templates/csi-hostpath-driverinfo.yaml b/simplyblock_core/scripts/charts/templates/csi-hostpath-driverinfo.yaml new file mode 100644 index 000000000..2a9d7d044 --- /dev/null +++ b/simplyblock_core/scripts/charts/templates/csi-hostpath-driverinfo.yaml @@ -0,0 +1,24 @@ +apiVersion: storage.k8s.io/v1 +kind: CSIDriver +metadata: + name: hostpath.csi.k8s.io + labels: + app.kubernetes.io/instance: hostpath.csi.k8s.io + app.kubernetes.io/part-of: csi-driver-host-path + app.kubernetes.io/name: hostpath.csi.k8s.io + app.kubernetes.io/component: csi-driver +spec: + # Supports persistent and ephemeral inline volumes. + volumeLifecycleModes: + - Persistent + - Ephemeral + # To determine at runtime which mode a volume uses, pod info and its + # "csi.storage.k8s.io/ephemeral" entry are needed. + podInfoOnMount: true + # No attacher needed. + attachRequired: false + storageCapacity: false + # Kubernetes may use fsGroup to change permissions and ownership + # of the volume to match user requested fsGroup in the pod's SecurityPolicy + fsGroupPolicy: File + \ No newline at end of file diff --git a/simplyblock_core/scripts/charts/templates/csi-hostpath-plugin.yaml b/simplyblock_core/scripts/charts/templates/csi-hostpath-plugin.yaml new file mode 100644 index 000000000..8e695e593 --- /dev/null +++ b/simplyblock_core/scripts/charts/templates/csi-hostpath-plugin.yaml @@ -0,0 +1,233 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: csi-hostpathplugin-sa + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: csi-hostpathplugin +rules: + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "delete", "update", "patch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: [""] + resources: ["persistentvolumeclaims/status"] + verbs: ["get", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["csistoragecapacities"] + verbs: ["get", "list", "watch", "create", "update", "delete"] + - apiGroups: [""] + resources: ["events"] + verbs: ["create", "patch", "update", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: csi-hostpathplugin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: csi-hostpathplugin +subjects: + - kind: ServiceAccount + name: csi-hostpathplugin-sa + namespace: {{ .Release.Namespace }} + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: csi-hostpathplugin + labels: + app.kubernetes.io/instance: hostpath.csi.k8s.io + app.kubernetes.io/part-of: csi-driver-host-path + app.kubernetes.io/name: csi-hostpathplugin + app.kubernetes.io/component: plugin +spec: + selector: + matchLabels: + app.kubernetes.io/instance: hostpath.csi.k8s.io + app.kubernetes.io/part-of: csi-driver-host-path + app.kubernetes.io/name: csi-hostpathplugin + app.kubernetes.io/component: plugin + template: + metadata: + labels: + app.kubernetes.io/instance: hostpath.csi.k8s.io + app.kubernetes.io/part-of: csi-driver-host-path + app.kubernetes.io/name: csi-hostpathplugin + app.kubernetes.io/component: plugin + spec: + serviceAccountName: csi-hostpathplugin-sa + containers: + - name: csi-provisioner + image: registry.k8s.io/sig-storage/csi-provisioner:v6.0.0 + args: + - -v=5 + - --csi-address=/csi/csi.sock + - --feature-gates=Topology=true + - --node-deployment=true + - --strict-topology=true + - --immediate-topology=false + - --worker-threads=5 + env: + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + securityContext: + # This is necessary only for systems with SELinux, where + # non-privileged sidecar containers cannot access unix domain socket + # created by privileged CSI driver container. + privileged: true + volumeMounts: + - mountPath: /csi + name: socket-dir + - name: csi-resizer + image: registry.k8s.io/sig-storage/csi-resizer:v2.0.0 + args: + - -v=5 + - -csi-address=/csi/csi.sock + securityContext: + # This is necessary only for systems with SELinux, where + # non-privileged sidecar containers cannot access unix domain socket + # created by privileged CSI driver container. + privileged: true + volumeMounts: + - mountPath: /csi + name: socket-dir + + - name: node-driver-registrar + image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.12.0 + args: + - --v=5 + - --csi-address=/csi/csi.sock + - --kubelet-registration-path=/var/lib/kubelet/plugins/csi-hostpath/csi.sock + securityContext: + # This is necessary only for systems with SELinux, where + # non-privileged sidecar containers cannot access unix domain socket + # created by privileged CSI driver container. + privileged: true + env: + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /csi + name: socket-dir + - mountPath: /registration + name: registration-dir + - mountPath: /csi-data-dir + name: csi-data-dir + + - name: hostpath + image: registry.k8s.io/sig-storage/hostpathplugin:v1.15.0 + args: + - --drivername=hostpath.csi.k8s.io + - --v=5 + - --endpoint=$(CSI_ENDPOINT) + - --nodeid=$(KUBE_NODE_NAME) + - --capacity=slow=10Gi + - --capacity=fast=100Gi + env: + - name: CSI_ENDPOINT + value: unix:///csi/csi.sock + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + securityContext: + privileged: true + ports: + - containerPort: 9898 + name: healthz + protocol: TCP + livenessProbe: + failureThreshold: 5 + httpGet: + path: /healthz + port: healthz + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 2 + volumeMounts: + - mountPath: /csi + name: socket-dir + - mountPath: /var/lib/kubelet/pods + mountPropagation: Bidirectional + name: mountpoint-dir + - mountPath: /var/lib/kubelet/plugins + mountPropagation: Bidirectional + name: plugins-dir + - mountPath: /csi-data-dir + name: csi-data-dir + - mountPath: /dev + name: dev-dir + - name: liveness-probe + volumeMounts: + - mountPath: /csi + name: socket-dir + image: registry.k8s.io/sig-storage/livenessprobe:v2.15.0 + args: + - --csi-address=/csi/csi.sock + - --health-port=9898 + + volumes: + - hostPath: + path: /var/lib/kubelet/plugins/csi-hostpath + type: DirectoryOrCreate + name: socket-dir + - hostPath: + path: /var/lib/kubelet/pods + type: DirectoryOrCreate + name: mountpoint-dir + - hostPath: + path: /var/lib/kubelet/plugins_registry + type: Directory + name: registration-dir + - hostPath: + path: /var/lib/kubelet/plugins + type: Directory + name: plugins-dir + - hostPath: + # 'path' is where PV data is persisted on host. + # using /tmp is also possible while the PVs will not available after plugin container recreation or host reboot + path: /var/lib/csi-hostpath-data/ + type: DirectoryOrCreate + name: csi-data-dir + - hostPath: + path: /dev + type: Directory + name: dev-dir diff --git a/simplyblock_core/scripts/charts/templates/foundationdb.yaml b/simplyblock_core/scripts/charts/templates/foundationdb.yaml index 1a3134e58..4eb7f1410 100644 --- a/simplyblock_core/scripts/charts/templates/foundationdb.yaml +++ b/simplyblock_core/scripts/charts/templates/foundationdb.yaml @@ -2,20 +2,20 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: controller-manager + name: simplyblock-fdb-controller-manager labels: - control-plane: controller-manager - app: controller-manager + control-plane: simplyblock-fdb-controller-manager + app: simplyblock-fdb-controller-manager spec: selector: matchLabels: - app: controller-manager + app: simplyblock-fdb-controller-manager replicas: 1 template: metadata: labels: - control-plane: controller-manager - app: controller-manager + control-plane: simplyblock-fdb-controller-manager + app: simplyblock-fdb-controller-manager spec: securityContext: runAsUser: 4059 @@ -28,7 +28,7 @@ spec: emptyDir: {} - name: fdb-binaries emptyDir: {} - serviceAccountName: controller-manager + serviceAccountName: simplyblock-fdb-controller-manager initContainers: - name: foundationdb-kubernetes-init-7-3 image: foundationdb/fdb-kubernetes-monitor:7.3.63 @@ -51,6 +51,8 @@ spec: containers: - command: - /manager + args: + - "--health-probe-bind-address=:9443" image: foundationdb/fdb-kubernetes-operator:v2.13.0 name: manager env: @@ -86,13 +88,13 @@ spec: apiVersion: v1 kind: ServiceAccount metadata: - name: controller-manager + name: simplyblock-fdb-controller-manager --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: manager-role + name: simplyblock-fdb-manager-role rules: - apiGroups: - "" @@ -164,7 +166,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: creationTimestamp: null - name: manager-clusterrole + name: simplyblock-fdb-manager-clusterrole rules: - apiGroups: - "" @@ -179,27 +181,27 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: creationTimestamp: null - name: manager-rolebinding + name: simplyblock-fdb-manager-rolebinding roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: manager-role + name: simplyblock-fdb-manager-role subjects: - kind: ServiceAccount - name: controller-manager + name: simplyblock-fdb-controller-manager --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: creationTimestamp: null - name: manager-clusterrolebinding + name: simplyblock-fdb-manager-clusterrolebinding roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: manager-clusterrole + name: simplyblock-fdb-manager-clusterrole subjects: - kind: ServiceAccount - name: controller-manager + name: simplyblock-fdb-controller-manager namespace: metadata.namespace ##### cluster file ################# @@ -213,7 +215,11 @@ spec: replacements: enabled: true faultDomain: + {{- if .Values.foundationdb.multiAZ }} + key: topology.kubernetes.io/zone + {{- else }} key: foundationdb.org/none + {{- end }} imageType: split labels: filterOnOwnerReference: false @@ -225,10 +231,17 @@ spec: - foundationdb.org/fdb-process-group-id minimumUptimeSecondsForBounce: 60 processCounts: + {{- if .Values.foundationdb.multiAZ }} + cluster_controller: 1 + log: 4 + storage: 4 + stateless: -1 + {{- else }} cluster_controller: 1 log: 3 storage: 3 stateless: -1 + {{- end }} processes: general: customParameters: @@ -270,7 +283,7 @@ spec: runAsUser: 0 volumeClaimTemplate: spec: - storageClassName: openebs-local-hostpath + storageClassName: local-hostpath accessModes: - ReadWriteOnce resources: @@ -285,10 +298,10 @@ spec: resources: limits: cpu: 500m - memory: 2Gi + memory: 4Gi requests: cpu: 100m - memory: 512Mi + memory: 1Gi securityContext: runAsUser: 0 affinity: @@ -308,10 +321,10 @@ spec: resources: limits: cpu: 500m - memory: 2Gi + memory: 4Gi requests: cpu: 100m - memory: 512Mi + memory: 1Gi securityContext: runAsUser: 0 affinity: diff --git a/simplyblock_core/scripts/charts/templates/mongodb.yaml b/simplyblock_core/scripts/charts/templates/mongodb.yaml index 740dd7642..815df6505 100644 --- a/simplyblock_core/scripts/charts/templates/mongodb.yaml +++ b/simplyblock_core/scripts/charts/templates/mongodb.yaml @@ -14,7 +14,7 @@ spec: name: data-volume spec: accessModes: [ "ReadWriteOnce" ] - storageClassName: openebs-local-hostpath + storageClassName: local-hostpath resources: requests: storage: 5Gi @@ -22,7 +22,7 @@ spec: name: logs-volume spec: accessModes: [ "ReadWriteOnce" ] - storageClassName: openebs-local-hostpath + storageClassName: local-hostpath resources: requests: storage: 5Gi diff --git a/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml b/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml index 9c0f46e1f..1349a33a9 100644 --- a/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml +++ b/simplyblock_core/scripts/charts/templates/monitoring_k8s.yaml @@ -68,6 +68,8 @@ spec: value: "false" - name: GRAYLOG_ELASTICSEARCH_REPLICAS value: "1" + - name: GRAYLOG_MESSAGE_JOURNAL_MAX_SIZE + value: "10gb" ports: - containerPort: 5044 - containerPort: 5140 diff --git a/simplyblock_core/scripts/charts/templates/storage_class.yaml b/simplyblock_core/scripts/charts/templates/storage_class.yaml index 64e5e6280..b23cb4a07 100644 --- a/simplyblock_core/scripts/charts/templates/storage_class.yaml +++ b/simplyblock_core/scripts/charts/templates/storage_class.yaml @@ -2,9 +2,22 @@ apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: - name: openebs-local-hostpath -provisioner: openebs.io/local + name: local-hostpath + labels: + app.kubernetes.io/instance: hostpath.csi.k8s.io + app.kubernetes.io/part-of: csi-driver-host-path + app.kubernetes.io/name: csi-hostpath-fast + app.kubernetes.io/component: storageclass +provisioner: hostpath.csi.k8s.io allowVolumeExpansion: true reclaimPolicy: Retain volumeBindingMode: WaitForFirstConsumer - +{{- if .Values.storageclass.allowedTopologyZones }} +allowedTopologies: +- matchLabelExpressions: + - key: topology.kubernetes.io/zone + values: +{{- range .Values.storageclass.allowedTopologyZones }} + - {{ . }} +{{- end }} +{{- end }} diff --git a/simplyblock_core/scripts/charts/values-template.yaml b/simplyblock_core/scripts/charts/values-template.yaml deleted file mode 100644 index 79693e7cd..000000000 --- a/simplyblock_core/scripts/charts/values-template.yaml +++ /dev/null @@ -1,194 +0,0 @@ -graylog: - rootPasswordSha2: "${GRAYLOG_ROOT_PASSWORD_SHA2}" - passwordSecret: "${GRAYLOG_PASSWORD_SECRET}" - -cluster: - secret: "${CLUSTER_SECRET}" - id: "${CLUSTER_ID}" - ip: "${CLUSTER_IP}" - -monitoring: - enabled: ${ENABLE_MONITORING} - -log: - deletionInterval: "${LOG_DELETION_INTERVAL}" - retentionPeriod: "${RETENTION_PERIOD}" - level: "${LOG_LEVEL}" - maxNumberIndex: "${MAX_NUMBER_OF_INDICES}" - -grafana: - endpoint: "${GRAFANA_ENDPOINT}" - contactPoint: "${CONTACT_POINT}" - -image: - simplyblock: - repository: "${SIMPLYBLOCK_REPOSITORY}" - tag: "${SIMPLYBLOCK_TAG}" - pullPolicy: "Always" - -openebs: - enabled: true - -mongodb: - name: "simplyblock-mongodb" - deployment_name: "simplyblock-mongodb" - resources: - requests: - cpu: 100m - memory: 300Mi - limits: - cpu: 250m - memory: 1Gi - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app.kubernetes.io/component - operator: In - values: - - mongodb - topologyKey: "kubernetes.io/hostname" - -opensearch: - fullnameOverride: "simplyblock-opensearch" - singleNode: true - replicas: 1 - - antiAffinity: "hard" - persistence: - enabled: true - storageClass: openebs-local-hostpath - size: 10Gi - - resources: - requests: - cpu: "100m" - memory: "512Mi" - limits: - cpu: "500m" - memory: "3Gi" - - extraEnvs: - - name: OPENSEARCH_JAVA_OPTS - value: "-Xms1g -Xmx1g" - - name: bootstrap.memory_lock - value: "true" - - name: action.auto_create_index - value: "false" - - name: plugins.security.ssl.http.enabled - value: "false" - - name: plugins.security.disabled - value: "true" - - securityConfig: - enabled: false - -prometheus: - server: - fullnameOverride: simplyblock-prometheus - enabled: true - statefulSet: - enabled: true - name: simplyblock-prometheus - replicaCount: 1 - podLabels: - app: simplyblock-prometheus - podAnnotations: {} - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app.kubernetes.io/component - operator: In - values: - - simplyblock-prometheus - topologyKey: "kubernetes.io/hostname" - service: - servicePort: 9090 - type: ClusterIP - gRPC: - enabled: true - servicePort: 10901 - additionalPorts: - - name: http-thanos - port: 10902 - targetPort: 10902 - protocol: TCP - securityContext: - fsGroup: 65534 - persistentVolume: - enabled: true - size: 5Gi - storageClass: openebs-local-hostpath - extraArgs: - storage.tsdb.min-block-duration: 2h - storage.tsdb.max-block-duration: 2h - sidecarContainers: - thanos-sidecar: - image: thanosio/thanos:v0.31.0 - args: - - sidecar - - --tsdb.path=/prometheus - - --prometheus.url=http://localhost:9090 - - --objstore.config-file=/etc/thanos/objstore.yml - ports: - - name: grpc - containerPort: 10901 - - name: http - containerPort: 10902 - volumeMounts: - - name: storage-volume - mountPath: /prometheus - - name: objstore-config - mountPath: /etc/thanos - resources: - requests: - cpu: "100m" - memory: "256Mi" - limits: - cpu: "250m" - memory: "1Gi" - resources: - requests: - cpu: "100m" - memory: "512Mi" - limits: - cpu: "500m" - memory: "1Gi" - configMapOverrideName: simplyblock-prometheus-config - extraVolumes: - - name: objstore-config - configMap: - name: simplyblock-objstore-config - alertmanager: - enabled: false - - prometheus-pushgateway: - enabled: false - - prometheus-node-exporter: - enabled: false - - kube-state-metrics: - enabled: false - -ingress: - enabled: true - ingressClassName: nginx - useDNS: ${USE_DNS} - host: "${DNS_NAME}" - tlsSecret: ${TLS_SECRET} - controller: - hostNetwork: ${USE_HOST} - dnsPolicy: ClusterFirstWithHostNet - service: - type: ${SERVICE_TYPE} - nodePorts: - tcp: - 4501: 32451 - extraArgs: - tcp-services-configmap: "${K8S_NAMESPACE}/simplyblock-tcp-services" - nodeSelector: - simplyblock.io/role: mgmt-plane diff --git a/simplyblock_core/scripts/charts/values.yaml b/simplyblock_core/scripts/charts/values.yaml index 467734176..0b70f321e 100644 --- a/simplyblock_core/scripts/charts/values.yaml +++ b/simplyblock_core/scripts/charts/values.yaml @@ -24,9 +24,12 @@ image: ports: lvolNvmfPortStart: - -openebs: - enabled: true + +storageclass: + allowedTopologyZones: [] + +foundationdb: + multiAZ: false mongodb: name: "simplyblock-mongodb" @@ -57,7 +60,7 @@ opensearch: antiAffinity: "hard" persistence: enabled: true - storageClass: openebs-local-hostpath + storageClass: local-hostpath size: 10Gi resources: @@ -123,7 +126,7 @@ prometheus: persistentVolume: enabled: true size: 5Gi - storageClass: openebs-local-hostpath + storageClass: local-hostpath extraArgs: storage.tsdb.min-block-duration: 2h storage.tsdb.max-block-duration: 2h diff --git a/simplyblock_core/scripts/docker-compose-swarm.yml b/simplyblock_core/scripts/docker-compose-swarm.yml index ba0f8b61d..fd79f43c1 100644 --- a/simplyblock_core/scripts/docker-compose-swarm.yml +++ b/simplyblock_core/scripts/docker-compose-swarm.yml @@ -349,6 +349,20 @@ services: environment: SIMPLYBLOCK_LOG_LEVEL: "$LOG_LEVEL" + TasksRunnerLVolSyncDelete: + <<: *service-base + image: $SIMPLYBLOCK_DOCKER_IMAGE + command: "python simplyblock_core/services/tasks_runner_sync_lvol_del.py" + deploy: + placement: + constraints: [node.role == manager] + volumes: + - "/etc/foundationdb:/etc/foundationdb" + networks: + - hostnet + environment: + SIMPLYBLOCK_LOG_LEVEL: "$LOG_LEVEL" + networks: monitoring-net: external: true diff --git a/simplyblock_core/services/capacity_and_stats_collector.py b/simplyblock_core/services/capacity_and_stats_collector.py index 6f702d051..07a850edd 100644 --- a/simplyblock_core/services/capacity_and_stats_collector.py +++ b/simplyblock_core/services/capacity_and_stats_collector.py @@ -4,7 +4,6 @@ from simplyblock_core import constants, db_controller, utils from simplyblock_core.models.nvme_device import NVMeDevice from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.rpc_client import RPCClient from simplyblock_core.models.stats import DeviceStatObject, NodeStatObject, ClusterStatObject logger = utils.get_logger(__name__) @@ -62,17 +61,17 @@ def add_device_stats(cl, device, capacity_dict, stats_dict): if last_record: time_diff = (now - last_record.date) if time_diff > 0: - data['read_bytes_ps'] = int((data['read_bytes'] - last_record['read_bytes']) / time_diff) - data['read_io_ps'] = int((data['read_io'] - last_record['read_io']) / time_diff) - data['read_latency_ps'] = int((data['read_latency_ticks'] - last_record['read_latency_ticks']) / time_diff) + data['read_bytes_ps'] = abs(int((data['read_bytes'] - last_record['read_bytes']) / time_diff)) + data['read_io_ps'] = abs(int((data['read_io'] - last_record['read_io']) / time_diff)) + data['read_latency_ps'] = abs(int((data['read_latency_ticks'] - last_record['read_latency_ticks']) / time_diff)) - data['write_bytes_ps'] = int((data['write_bytes'] - last_record['write_bytes']) / time_diff) - data['write_io_ps'] = int((data['write_io'] - last_record['write_io']) / time_diff) - data['write_latency_ps'] = int((data['write_latency_ticks'] - last_record['write_latency_ticks']) / time_diff) + data['write_bytes_ps'] = abs(int((data['write_bytes'] - last_record['write_bytes']) / time_diff)) + data['write_io_ps'] = abs(int((data['write_io'] - last_record['write_io']) / time_diff)) + data['write_latency_ps'] = abs(int((data['write_latency_ticks'] - last_record['write_latency_ticks']) / time_diff)) - data['unmap_bytes_ps'] = int((data['unmap_bytes'] - last_record['unmap_bytes']) / time_diff) - data['unmap_io_ps'] = int((data['unmap_io'] - last_record['unmap_io']) / time_diff) - data['unmap_latency_ps'] = int((data['unmap_latency_ticks'] - last_record['unmap_latency_ticks']) / time_diff) + data['unmap_bytes_ps'] = abs(int((data['unmap_bytes'] - last_record['unmap_bytes']) / time_diff)) + data['unmap_io_ps'] = abs(int((data['unmap_io'] - last_record['unmap_io']) / time_diff)) + data['unmap_latency_ps'] = abs(int((data['unmap_latency_ticks'] - last_record['unmap_latency_ticks']) / time_diff)) else: logger.warning("last record not found") @@ -83,6 +82,11 @@ def add_device_stats(cl, device, capacity_dict, stats_dict): stat_obj.write_to_db(db.kv_store) last_object_record[device.get_id()] = stat_obj + all_stats = db.get_device_stats(device, limit=0) + if len(all_stats) > 10: + for st in all_stats[10:]: + st.remove(db.kv_store) + return stat_obj @@ -117,6 +121,11 @@ def add_node_stats(node, records): stat_obj = NodeStatObject(data=data) stat_obj.write_to_db(db.kv_store) + all_stats = db.get_node_stats(node, limit=0) + if len(all_stats) > 10: + for st in all_stats[10:]: + st.remove(db.kv_store) + return stat_obj @@ -146,6 +155,11 @@ def add_cluster_stats(cl, records): stat_obj = ClusterStatObject(data=data) stat_obj.write_to_db(db.kv_store) + all_stats = db.get_cluster_stats(cl, limit=0) + if len(all_stats) > 10: + for st in all_stats[10:]: + st.remove(db.kv_store) + return stat_obj @@ -173,15 +187,15 @@ def add_cluster_stats(cl, records): logger.error("No devices found in node: %s", node.get_id()) continue - rpc_client = RPCClient( - node.mgmt_ip, node.rpc_port, - node.rpc_username, node.rpc_password, - timeout=5, retry=2) - + rpc_client = node.rpc_client(timeout=5, retry=2) node_devs_stats = {} - ret = rpc_client.get_lvol_stats() - if ret: - node_devs_stats = {b['name']: b for b in ret['bdevs']} + try: + ret = rpc_client.get_lvol_stats() + if ret: + node_devs_stats = {b['name']: b for b in ret['bdevs']} + except Exception as e: + logger.error(e) + continue devices_records = [] for device in node.nvme_devices: @@ -189,7 +203,11 @@ def add_cluster_stats(cl, records): if device.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, NVMeDevice.STATUS_CANNOT_ALLOCATE]: logger.info(f"Device is skipped: {device.get_id()} status: {device.status}") continue - capacity_dict = rpc_client.alceml_get_capacity(device.alceml_name) + try: + capacity_dict = rpc_client.alceml_get_capacity(device.alceml_name) + except Exception as e: + logger.error(e) + continue if device.nvme_bdev in node_devs_stats: stats_dict = node_devs_stats[device.nvme_bdev] record = add_device_stats(cl, device, capacity_dict, stats_dict) diff --git a/simplyblock_core/services/health_check_service.py b/simplyblock_core/services/health_check_service.py index bb48e9620..fc7be7fb1 100644 --- a/simplyblock_core/services/health_check_service.py +++ b/simplyblock_core/services/health_check_service.py @@ -1,4 +1,7 @@ # coding=utf-8 +import logging +import sys +import threading import time from datetime import datetime @@ -10,8 +13,6 @@ from simplyblock_core.rpc_client import RPCClient from simplyblock_core import constants, db_controller, distr_controller, storage_node_ops -logger = utils.get_logger(__name__) - utils.init_sentry_sdk() @@ -42,223 +43,246 @@ def set_device_health_check(cluster_id, device, health_check_status): return -# get DB controller -db = db_controller.DBController() +def check_node(snode, logger): -logger.info("Starting health check service") -while True: - clusters = db.get_clusters() - for cluster in clusters: - cluster_id = cluster.get_id() - snodes = db.get_storage_nodes_by_cluster_id(cluster_id) - if not snodes: - logger.warning("storage nodes list is empty") - - for snode in snodes: - logger.info("Node: %s, status %s", snode.get_id(), snode.status) - - if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, - StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - logger.info(f"Node status is: {snode.status}, skipping") - set_node_health_check(snode, False) - for device in snode.nvme_devices: - set_device_health_check(cluster_id, device, False) - continue - - # 1- check node ping - ping_check = health_controller._check_node_ping(snode.mgmt_ip) - logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") - - # 2- check node API - node_api_check = health_controller._check_node_api(snode.mgmt_ip) - logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}") - - # 3- check node RPC - node_rpc_check = health_controller._check_node_rpc( - snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) - logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") - - is_node_online = ping_check and node_api_check and node_rpc_check - - health_check_status = is_node_online - if node_rpc_check: - logger.info(f"Node device count: {len(snode.nvme_devices)}") - node_devices_check = True - node_remote_devices_check = True - - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password, - timeout=3, retry=2) - connected_devices = [] - - node_bdevs = rpc_client.get_bdevs() - if node_bdevs: - # node_bdev_names = [b['name'] for b in node_bdevs] - node_bdev_names = {} - for b in node_bdevs: - node_bdev_names[b['name']] = b - for al in b['aliases']: - node_bdev_names[al] = b - else: - node_bdev_names = {} - - subsystem_list = rpc_client.subsystem_list() or [] - subsystems = { - subsystem['nqn']: subsystem - for subsystem - in subsystem_list - } - - for device in snode.nvme_devices: - passed = True - - if device.io_error: - logger.info(f"Device io_error {device.get_id()}") - passed = False - - if device.status != NVMeDevice.STATUS_ONLINE: - logger.info(f"Device status {device.status}") - passed = False - - if snode.enable_test_device: - bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev] - else: - bdevs_stack = [device.nvme_bdev, device.alceml_bdev, device.pt_bdev] - - logger.info(f"Checking Device: {device.get_id()}, status:{device.status}") - problems = 0 - for bdev in bdevs_stack: - if not bdev: - continue - - if not health_controller.check_bdev(bdev, bdev_names=node_bdev_names): - problems += 1 - passed = False - - logger.info(f"Checking Device's BDevs ... ({(len(bdevs_stack) - problems)}/{len(bdevs_stack)})") - - passed &= health_controller.check_subsystem(device.nvmf_nqn, nqns=subsystems) - - set_device_health_check(cluster_id, device, passed) - if device.status == NVMeDevice.STATUS_ONLINE: - node_devices_check &= passed - - logger.info(f"Node remote device: {len(snode.remote_devices)}") - - for remote_device in snode.remote_devices: - org_dev = db.get_storage_device_by_id(remote_device.get_id()) - org_node = db.get_storage_node_by_id(remote_device.node_id) - if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status == StorageNode.STATUS_ONLINE: - if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names): - connected_devices.append(remote_device.get_id()) - continue - - if not org_dev.alceml_bdev: - logger.error(f"device alceml bdev not found!, {org_dev.get_id()}") - continue - - try: - storage_node_ops.connect_device( - f"remote_{org_dev.alceml_bdev}", org_dev, snode, - bdev_names=list(node_bdev_names), reattach=False, - ) - connected_devices.append(org_dev.get_id()) - sn = db.get_storage_node_by_id(snode.get_id()) - for d in sn.remote_devices: - if d.get_id() == remote_device.get_id(): - d.status = NVMeDevice.STATUS_ONLINE - sn.write_to_db() - break - distr_controller.send_dev_status_event(org_dev, NVMeDevice.STATUS_ONLINE, snode) - except RuntimeError: - logger.error(f"Failed to connect to device: {org_dev.get_id()}") - node_remote_devices_check = False - - connected_jms = [] - if snode.jm_device and snode.jm_device.get_id(): - jm_device = snode.jm_device - logger.info(f"Node JM: {jm_device.get_id()}") - if jm_device.jm_bdev in node_bdev_names: - logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... ok") - connected_jms.append(jm_device.get_id()) + snode = db.get_storage_node_by_id(snode.get_id()) + logger.info("Node: %s, status %s", snode.get_id(), snode.status) + + if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, + StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + logger.info(f"Node status is: {snode.status}, skipping") + set_node_health_check(snode, False) + for device in snode.nvme_devices: + set_device_health_check(snode.cluster_id, device, False) + return + + # 1- check node ping + ping_check = health_controller._check_node_ping(snode.mgmt_ip) + logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") + + # 2- check node API + node_api_check = health_controller._check_node_api(snode.mgmt_ip) + logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}") + + # 3- check node RPC + node_rpc_check = health_controller._check_node_rpc( + snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") + + is_node_online = ping_check and node_api_check and node_rpc_check + + health_check_status = is_node_online + if node_rpc_check: + logger.info(f"Node device count: {len(snode.nvme_devices)}") + node_devices_check = True + node_remote_devices_check = True + + rpc_client = RPCClient( + snode.mgmt_ip, snode.rpc_port, + snode.rpc_username, snode.rpc_password, + timeout=3, retry=2) + connected_devices = [] + + node_bdevs = rpc_client.get_bdevs() + if node_bdevs: + # node_bdev_names = [b['name'] for b in node_bdevs] + node_bdev_names = {} + for b in node_bdevs: + node_bdev_names[b['name']] = b + for al in b['aliases']: + node_bdev_names[al] = b + else: + node_bdev_names = {} + + subsystem_list = rpc_client.subsystem_list() or [] + subsystems = { + subsystem['nqn']: subsystem + for subsystem + in subsystem_list + } + + for device in snode.nvme_devices: + passed = True + + if device.io_error: + logger.info(f"Device io_error {device.get_id()}") + passed = False + + if device.status != NVMeDevice.STATUS_ONLINE: + logger.info(f"Device status {device.status}") + passed = False + + if snode.enable_test_device: + bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev] + else: + bdevs_stack = [device.nvme_bdev, device.alceml_bdev, device.pt_bdev] + + logger.info(f"Checking Device: {device.get_id()}, status:{device.status}") + problems = 0 + for bdev in bdevs_stack: + if not bdev: + continue + + if not health_controller.check_bdev(bdev, bdev_names=node_bdev_names): + problems += 1 + passed = False + + logger.info(f"Checking Device's BDevs ... ({(len(bdevs_stack) - problems)}/{len(bdevs_stack)})") + + passed &= health_controller.check_subsystem(device.nvmf_nqn, nqns=subsystems) + + set_device_health_check(snode.cluster_id, device, passed) + if device.status == NVMeDevice.STATUS_ONLINE: + node_devices_check &= passed + + logger.info(f"Node remote device: {len(snode.remote_devices)}") + + for remote_device in snode.remote_devices: + org_dev = db.get_storage_device_by_id(remote_device.get_id()) + org_node = db.get_storage_node_by_id(remote_device.node_id) + if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: + if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names): + connected_devices.append(remote_device.get_id()) + continue + + if not org_dev.alceml_bdev: + logger.error(f"device alceml bdev not found!, {org_dev.get_id()}") + continue + + try: + storage_node_ops.connect_device( + f"remote_{org_dev.alceml_bdev}", org_dev, snode, + bdev_names=list(node_bdev_names), reattach=False, + ) + connected_devices.append(org_dev.get_id()) + sn = db.get_storage_node_by_id(snode.get_id()) + for d in sn.remote_devices: + if d.get_id() == remote_device.get_id(): + d.status = NVMeDevice.STATUS_ONLINE + sn.write_to_db() + break + distr_controller.send_dev_status_event(org_dev, NVMeDevice.STATUS_ONLINE, snode) + except RuntimeError: + logger.error(f"Failed to connect to device: {org_dev.get_id()}") + node_remote_devices_check = False + + connected_jms = [] + if snode.jm_device and snode.jm_device.get_id(): + jm_device = snode.jm_device + logger.info(f"Node JM: {jm_device.get_id()}") + if jm_device.jm_bdev in node_bdev_names: + logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... ok") + connected_jms.append(jm_device.get_id()) + else: + logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... not found") + + if snode.enable_ha_jm: + logger.info(f"Node remote JMs: {len(snode.remote_jm_devices)}") + for remote_device in snode.remote_jm_devices: + if remote_device.remote_bdev: + check = health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names) + if check: + connected_jms.append(remote_device.get_id()) else: - logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... not found") - - if snode.enable_ha_jm: - logger.info(f"Node remote JMs: {len(snode.remote_jm_devices)}") - for remote_device in snode.remote_jm_devices: - if remote_device.remote_bdev: - check = health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names) - if check: - connected_jms.append(remote_device.get_id()) - else: + node_remote_devices_check = False + + for jm_id in snode.jm_ids: + if jm_id and jm_id not in connected_jms: + for nd in db.get_storage_nodes(): + if nd.jm_device and nd.jm_device.get_id() == jm_id: + if nd.status == StorageNode.STATUS_ONLINE: node_remote_devices_check = False + break - for jm_id in snode.jm_ids: - if jm_id and jm_id not in connected_jms: - for nd in db.get_storage_nodes(): - if nd.jm_device and nd.jm_device.get_id() == jm_id: - if nd.status == StorageNode.STATUS_ONLINE: - node_remote_devices_check = False - break - - if not node_remote_devices_check and cluster.status in [ - Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: - snode = db.get_storage_node_by_id(snode.get_id()) - snode.remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(snode) - snode.write_to_db() - - lvstore_check = True + if not node_remote_devices_check and cluster.status in [ + Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: snode = db.get_storage_node_by_id(snode.get_id()) - if snode.lvstore_status == "ready" or snode.status == StorageNode.STATUS_ONLINE or \ - snode.lvstore_status == "failed" : + snode.remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(snode) + snode.write_to_db() + + lvstore_check = True + snode = db.get_storage_node_by_id(snode.get_id()) + if snode.lvstore_status == "ready" or snode.status == StorageNode.STATUS_ONLINE or \ + snode.lvstore_status == "failed": + + lvstore_stack = snode.lvstore_stack + lvstore_check &= health_controller._check_node_lvstore( + lvstore_stack, snode, auto_fix=True, node_bdev_names=node_bdev_names) - lvstore_stack = snode.lvstore_stack + if snode.secondary_node_id: + + lvstore_check &= health_controller._check_node_hublvol( + snode, node_bdev_names=node_bdev_names, node_lvols_nqns=subsystems) + + second_node_1 = db.get_storage_node_by_id(snode.secondary_node_id) + if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: lvstore_check &= health_controller._check_node_lvstore( - lvstore_stack, snode, auto_fix=True, node_bdev_names=node_bdev_names) - - if snode.secondary_node_id: - - lvstore_check &= health_controller._check_node_hublvol( - snode, node_bdev_names=node_bdev_names, node_lvols_nqns=subsystems) - - second_node_1 = db.get_storage_node_by_id(snode.secondary_node_id) - if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: - lvstore_check &= health_controller._check_node_lvstore( - lvstore_stack, second_node_1, auto_fix=True, stack_src_node=snode) - sec_node_check = health_controller._check_sec_node_hublvol(second_node_1) - if not sec_node_check: - if snode.status == StorageNode.STATUS_ONLINE: - ret = second_node_1.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if ret: - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - # is_sec_node_leader = True - # check jc_compression status - jc_compression_is_active = second_node_1.rpc_client().jc_compression_get_status(snode.jm_vuid) - if not jc_compression_is_active: - lvstore_check &= health_controller._check_sec_node_hublvol(second_node_1, auto_fix=True) - - - lvol_port_check = False - # if node_api_check: - ports = [snode.lvol_subsys_port] - - if snode.lvstore_stack_secondary_1: - second_node_1 = db.get_storage_node_by_id(snode.lvstore_stack_secondary_1) - if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: - ports.append(second_node_1.lvol_subsys_port) - - for port in ports: - lvol_port_check = health_controller._check_port_on_node(snode, port) - logger.info( - f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}") - if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED: - tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port) - - health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check - set_node_health_check(snode, bool(health_check_status)) + lvstore_stack, second_node_1, auto_fix=True, stack_src_node=snode) + sec_node_check = health_controller._check_sec_node_hublvol(second_node_1) + if not sec_node_check: + if snode.status == StorageNode.STATUS_ONLINE: + ret = second_node_1.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if ret: + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + # is_sec_node_leader = True + # check jc_compression status + jc_compression_is_active = second_node_1.rpc_client().jc_compression_get_status( + snode.jm_vuid) + if not jc_compression_is_active: + lvstore_check &= health_controller._check_sec_node_hublvol(second_node_1, + auto_fix=True) + + lvol_port_check = False + # if node_api_check: + ports = [snode.lvol_subsys_port] + + if snode.lvstore_stack_secondary_1: + second_node_1 = db.get_storage_node_by_id(snode.lvstore_stack_secondary_1) + if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: + ports.append(second_node_1.lvol_subsys_port) + + for port in ports: + try: + lvol_port_check = health_controller.check_port_on_node(snode, port) + logger.info( + f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}") + if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED: + tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port) + except Exception: + logger.error("Check node port failed, connection error") + + health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check + set_node_health_check(snode, bool(health_check_status)) + time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) + + +def loop_for_node(snode): + logger = logging.getLogger() + logger.setLevel("INFO") + logger_handler = logging.StreamHandler(stream=sys.stdout) + logger_handler.setFormatter(logging.Formatter(f'%(asctime)s: node:{snode.mgmt_ip} %(levelname)s: %(message)s')) + logger.addHandler(logger_handler) + while True: + try: + check_node(snode, logger) + except Exception as e: + logger.error(e) + time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) + + +# logger.info("Starting health check service") +db = db_controller.DBController() +threads_maps: dict[str, threading.Thread] = {} +while True: + clusters = db.get_clusters() + for cluster in clusters: + for node in db.get_storage_nodes_by_cluster_id(cluster.get_id()): + node_id = node.get_id() + if node_id not in threads_maps or threads_maps[node_id].is_alive() is False: + t = threading.Thread(target=loop_for_node, args=(node,)) + t.start() + threads_maps[node_id] = t time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) diff --git a/simplyblock_core/services/lvol_monitor.py b/simplyblock_core/services/lvol_monitor.py index 884b67396..ddb845169 100644 --- a/simplyblock_core/services/lvol_monitor.py +++ b/simplyblock_core/services/lvol_monitor.py @@ -132,8 +132,7 @@ def process_lvol_delete_finish(lvol): sec_node = db.get_storage_node_by_id(snode.get_id()) if sec_node: - sec_node.lvol_sync_del_queue.append(f"{lvol.lvs_name}/{lvol.lvol_bdev}") - sec_node.write_to_db() + tasks_controller.add_lvol_sync_del_task(sec_node.cluster_id, sec_node.get_id(), f"{lvol.lvs_name}/{lvol.lvol_bdev}") lvol_events.lvol_delete(lvol) lvol.remove(db.kv_store) @@ -160,6 +159,185 @@ def process_lvol_delete_try_again(lvol): lvol.write_to_db() +def check_node(snode): + node_bdev_names = [] + node_lvols_nqns = {} + sec_node_bdev_names = {} + sec_node_lvols_nqns = {} + sec_node = None + + if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + node_bdevs = snode.rpc_client().get_bdevs() + if node_bdevs: + node_bdev_names = [b['name'] for b in node_bdevs] + for bdev in node_bdevs: + if "aliases" in bdev and bdev["aliases"]: + node_bdev_names.extend(bdev['aliases']) + ret = snode.rpc_client().subsystem_list() + if ret: + for sub in ret: + node_lvols_nqns[sub['nqn']] = sub + + if snode.secondary_node_id: + sec_node = db.get_storage_node_by_id(snode.secondary_node_id) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + sec_rpc_client = RPCClient( + sec_node.mgmt_ip, sec_node.rpc_port, + sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) + ret = sec_rpc_client.get_bdevs() + if ret: + for bdev in ret: + sec_node_bdev_names[bdev['name']] = bdev + + ret = sec_rpc_client.subsystem_list() + if ret: + for sub in ret: + sec_node_lvols_nqns[sub['nqn']] = sub + + for lvol in db.get_lvols_by_node_id(snode.get_id()): + + if lvol.status == LVol.STATUS_IN_CREATION: + continue + + if lvol.status == lvol.STATUS_IN_DELETION: + # check leadership + leader_node = None + snode = db.get_storage_node_by_id(snode.get_id()) + if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if not ret: + raise Exception("Failed to get LVol info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = snode + + if not leader_node and sec_node: + ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if not ret: + raise Exception("Failed to get LVol info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = sec_node + + if not leader_node: + raise Exception("Failed to get leader node") + + if lvol.deletion_status == "" or lvol.deletion_status != leader_node.get_id(): + lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id()) + time.sleep(3) + + try: + ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status( + f"{lvol.lvs_name}/{lvol.lvol_bdev}") + except Exception as e: + logger.error(e) + # timeout detected, check other node + break + + if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed + process_lvol_delete_finish(lvol) + + elif ret == 1: # Async lvol deletion is in progress or queued + logger.info(f"LVol deletion in progress, id: {lvol.get_id()}") + pre_lvol_delete_rebalance() + + elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Async deletion is done, but leadership has changed (sync deletion is now blocked)") + + elif ret == 4: # No async delete request exists for this lvol + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("No async delete request exists for this lvol") + lvol = db.get_lvol_by_id(lvol.get_id()) + lvol.io_error = True + lvol.write_to_db() + set_lvol_status(lvol, LVol.STATUS_OFFLINE) + + elif ret == -1: # Operation not permitted + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Operation not permitted") + lvol = db.get_lvol_by_id(lvol.get_id()) + lvol.io_error = True + lvol.write_to_db() + set_lvol_status(lvol, LVol.STATUS_OFFLINE) + + elif ret == -2: # No such file or directory + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("No such file or directory") + process_lvol_delete_finish(lvol) + + elif ret == -5: # I/O error + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("I/O error") + process_lvol_delete_try_again(lvol) + + elif ret == -11: # Try again + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Try again") + process_lvol_delete_try_again(lvol) + + elif ret == -12: # Out of memory + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Out of memory") + process_lvol_delete_try_again(lvol) + + elif ret == -16: # Device or resource busy + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Device or resource busy") + process_lvol_delete_try_again(lvol) + + elif ret == -19: # No such device + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Finishing lvol delete") + process_lvol_delete_finish(lvol) + + elif ret == -35: # Leadership changed + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Leadership changed") + process_lvol_delete_try_again(lvol) + + elif ret == -36: # Failed to update lvol for deletion + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Failed to update lvol for deletion") + process_lvol_delete_try_again(lvol) + + else: # Failed to update lvol for deletion + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Failed to update lvol for deletion") + + continue + + passed = True + ret = health_controller.check_lvol_on_node( + lvol.get_id(), lvol.node_id, node_bdev_names, node_lvols_nqns) + if not ret: + passed = False + + if lvol.ha_type == "ha": + sec_node = db.get_storage_node_by_id(snode.secondary_node_id) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + ret = health_controller.check_lvol_on_node( + lvol.get_id(), snode.secondary_node_id, sec_node_bdev_names, sec_node_lvols_nqns) + if not ret: + passed = False + else: + passed = True + + if snode.lvstore_status == "ready": + + logger.info(f"LVol: {lvol.get_id()}, is healthy: {passed}") + set_lvol_health_check(lvol, passed) + if passed: + set_lvol_status(lvol, LVol.STATUS_ONLINE) + + if snode.lvstore_status == "ready": + + for snap in db.get_snapshots_by_node_id(snode.get_id()): + present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) + set_snapshot_health_check(snap, present) + + + # get DB controller db = db_controller.DBController() @@ -173,195 +351,9 @@ def process_lvol_delete_try_again(lvol): continue for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()): - node_bdev_names = [] - node_lvols_nqns = {} - sec_node_bdev_names = {} - sec_node_lvols_nqns = {} - sec_node = None - - if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - node_bdevs = snode.rpc_client().get_bdevs() - if node_bdevs: - node_bdev_names = [b['name'] for b in node_bdevs] - for bdev in node_bdevs: - if "aliases" in bdev and bdev["aliases"]: - node_bdev_names.extend(bdev['aliases']) - ret = snode.rpc_client().subsystem_list() - if ret: - for sub in ret: - node_lvols_nqns[sub['nqn']] = sub - - if snode.secondary_node_id: - sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: - sec_rpc_client = RPCClient( - sec_node.mgmt_ip, sec_node.rpc_port, - sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) - ret = sec_rpc_client.get_bdevs() - if ret: - for bdev in ret: - sec_node_bdev_names[bdev['name']] = bdev - - ret = sec_rpc_client.subsystem_list() - if ret: - for sub in ret: - sec_node_lvols_nqns[sub['nqn']] = sub - - for lvol in db.get_lvols_by_node_id(snode.get_id()): - - if lvol.status == LVol.STATUS_IN_CREATION: - continue - - if lvol.status == lvol.STATUS_IN_DELETION: - # check leadership - leader_node = None - snode = db.get_storage_node_by_id(snode.get_id()) - if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if not ret: - raise Exception("Failed to get LVol info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = snode - - if not leader_node and sec_node: - ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if not ret: - raise Exception("Failed to get LVol info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = sec_node - - if not leader_node: - raise Exception("Failed to get leader node") - - if lvol.deletion_status == "" or lvol.deletion_status != leader_node.get_id(): - lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id()) - time.sleep(3) - - try: - ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status( - f"{lvol.lvs_name}/{lvol.lvol_bdev}") - except Exception as e: - logger.error(e) - # timeout detected, check other node - break - - if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed - process_lvol_delete_finish(lvol) - - elif ret == 1: # Async lvol deletion is in progress or queued - logger.info(f"LVol deletion in progress, id: {lvol.get_id()}") - pre_lvol_delete_rebalance() - - elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Async deletion is done, but leadership has changed (sync deletion is now blocked)") - - elif ret == 4: # No async delete request exists for this lvol - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("No async delete request exists for this lvol") - lvol = db.get_lvol_by_id(lvol.get_id()) - lvol.io_error = True - lvol.write_to_db() - set_lvol_status(lvol, LVol.STATUS_OFFLINE) - - elif ret == -1: # Operation not permitted - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Operation not permitted") - lvol = db.get_lvol_by_id(lvol.get_id()) - lvol.io_error = True - lvol.write_to_db() - set_lvol_status(lvol, LVol.STATUS_OFFLINE) - - elif ret == -2: # No such file or directory - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("No such file or directory") - process_lvol_delete_finish(lvol) - - elif ret == -5: # I/O error - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("I/O error") - process_lvol_delete_try_again(lvol) - - elif ret == -11: # Try again - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Try again") - process_lvol_delete_try_again(lvol) - - elif ret == -12: # Out of memory - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Out of memory") - process_lvol_delete_try_again(lvol) - - elif ret == -16: # Device or resource busy - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Device or resource busy") - process_lvol_delete_try_again(lvol) - - elif ret == -19: # No such device - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Finishing lvol delete") - process_lvol_delete_finish(lvol) - - elif ret == -35: # Leadership changed - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Leadership changed") - process_lvol_delete_try_again(lvol) - - elif ret == -36: # Failed to update lvol for deletion - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Failed to update lvol for deletion") - process_lvol_delete_try_again(lvol) - - else: # Failed to update lvol for deletion - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Failed to update lvol for deletion") - - continue - - passed = True - ret = health_controller.check_lvol_on_node( - lvol.get_id(), lvol.node_id, node_bdev_names, node_lvols_nqns) - if not ret: - passed = False - - if lvol.ha_type == "ha": - sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - ret = health_controller.check_lvol_on_node( - lvol.get_id(), snode.secondary_node_id, sec_node_bdev_names, sec_node_lvols_nqns) - if not ret: - passed = False - else: - passed = True - - if snode.lvstore_status == "ready": - - logger.info(f"LVol: {lvol.get_id()}, is healthy: {passed}") - set_lvol_health_check(lvol, passed) - if passed: - set_lvol_status(lvol, LVol.STATUS_ONLINE) - - if snode.lvstore_status == "ready": - - for snap in db.get_snapshots_by_node_id(snode.get_id()): - present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) - set_snapshot_health_check(snap, present) - - snode = db.get_storage_node_by_id(snode.get_id()) - if snode.status == StorageNode.STATUS_ONLINE: - not_deleted = [] - for bdev_name in snode.lvol_sync_del_queue: - logger.info(f"Sync delete bdev: {bdev_name} from node: {snode.get_id()}") - ret, err = snode.rpc_client().delete_lvol(bdev_name, del_async=True) - if not ret: - if "code" in err and err["code"] == -19: - logger.error(f"Sync delete completed with error: {err}") - else: - logger.error(f"Failed to sync delete bdev: {bdev_name} from node: {snode.get_id()}") - not_deleted.append(bdev_name) - snode.lvol_sync_del_queue = not_deleted - snode.write_to_db() + try: + check_node(snode) + except Exception as e: + logger.error(e) time.sleep(constants.LVOL_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/lvol_stat_collector.py b/simplyblock_core/services/lvol_stat_collector.py index 09aa7d571..18f09d4ce 100644 --- a/simplyblock_core/services/lvol_stat_collector.py +++ b/simplyblock_core/services/lvol_stat_collector.py @@ -7,7 +7,6 @@ from simplyblock_core.models.lvol_model import LVol from simplyblock_core.models.stats import LVolStatObject, PoolStatObject from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.rpc_client import RPCClient logger = utils.get_logger(__name__) @@ -154,6 +153,11 @@ def add_lvol_stats(cluster, lvol, stats_list, capacity_dict=None): stat_obj.write_to_db(db.kv_store) last_object_record[lvol.get_id()] = stat_obj + all_stats = db.get_lvol_stats(lvol, limit=0) + if len(all_stats) > 10: + for st in all_stats[10:]: + st.remove(db.kv_store) + return stat_obj @@ -173,6 +177,12 @@ def add_pool_stats(pool, records): stat_obj = PoolStatObject(data=data) stat_obj.write_to_db(db.kv_store) + + all_stats = db.get_pool_stats(pool, limit=0) + if len(all_stats) > 10: + for st in all_stats[10:]: + st.remove(db.kv_store) + return stat_obj @@ -201,68 +211,66 @@ def add_pool_stats(pool, records): continue if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + try: + rpc_client = snode.rpc_client(timeout=3, retry=2) + if snode.get_id() in all_node_bdev_names and all_node_bdev_names[snode.get_id()]: + node_bdev_names = all_node_bdev_names[snode.get_id()] + else: + node_bdevs = rpc_client.get_bdevs() + if node_bdevs: + node_bdev_names = {b['name']: b for b in node_bdevs} + all_node_bdev_names[snode.get_id()] = node_bdev_names - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password, timeout=3, retry=2) - - if snode.get_id() in all_node_bdev_names and all_node_bdev_names[snode.get_id()]: - node_bdev_names = all_node_bdev_names[snode.get_id()] - else: - node_bdevs = rpc_client.get_bdevs() - if node_bdevs: - node_bdev_names = {b['name']: b for b in node_bdevs} - all_node_bdev_names[snode.get_id()] = node_bdev_names - - if snode.get_id() in all_node_lvols_nqns and all_node_lvols_nqns[snode.get_id()]: - node_lvols_nqns = all_node_lvols_nqns[snode.get_id()] - else: - ret = rpc_client.subsystem_list() - if ret: - node_lvols_nqns = {} - for sub in ret: - node_lvols_nqns[sub['nqn']] = sub - all_node_lvols_nqns[snode.get_id()] = node_lvols_nqns - - if snode.get_id() in all_node_lvols_stats and all_node_lvols_stats[snode.get_id()]: - node_lvols_stats = all_node_lvols_stats[snode.get_id()] - else: - ret = rpc_client.get_lvol_stats() - if ret: - node_lvols_stats = {} - for st in ret['bdevs']: - node_lvols_stats[st['name']] = st - all_node_lvols_stats[snode.get_id()] = node_lvols_stats - - if snode.secondary_node_id: - sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: - sec_rpc_client = RPCClient( - sec_node.mgmt_ip, sec_node.rpc_port, - sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) - - if sec_node.get_id() not in all_node_bdev_names or not all_node_bdev_names[sec_node.get_id()]: - ret = sec_rpc_client.get_bdevs() - if ret: - # node_bdev_names = {} - node_bdev_names = {b['name']: b for b in ret} - all_node_bdev_names[sec_node.get_id()] = node_bdev_names - - if sec_node.get_id() not in all_node_lvols_nqns or not all_node_lvols_nqns[sec_node.get_id()]: - ret = sec_rpc_client.subsystem_list() + if snode.get_id() in all_node_lvols_nqns and all_node_lvols_nqns[snode.get_id()]: + node_lvols_nqns = all_node_lvols_nqns[snode.get_id()] + else: + ret = rpc_client.subsystem_list() if ret: node_lvols_nqns = {} for sub in ret: node_lvols_nqns[sub['nqn']] = sub - all_node_lvols_nqns[sec_node.get_id()] = node_lvols_nqns + all_node_lvols_nqns[snode.get_id()] = node_lvols_nqns - if sec_node.get_id() not in all_node_lvols_stats or not all_node_lvols_stats[sec_node.get_id()]: - ret = sec_rpc_client.get_lvol_stats() + if snode.get_id() in all_node_lvols_stats and all_node_lvols_stats[snode.get_id()]: + node_lvols_stats = all_node_lvols_stats[snode.get_id()] + else: + ret = rpc_client.get_lvol_stats() if ret: - sec_node_lvols_stats = {} + node_lvols_stats = {} for st in ret['bdevs']: - sec_node_lvols_stats[st['name']] = st - all_node_lvols_stats[sec_node.get_id()] = sec_node_lvols_stats + node_lvols_stats[st['name']] = st + all_node_lvols_stats[snode.get_id()] = node_lvols_stats + except Exception as e: + logger.error(e) + + if snode.secondary_node_id: + sec_node = db.get_storage_node_by_id(snode.secondary_node_id) + if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: + try: + sec_rpc_client = sec_node.rpc_client(timeout=3, retry=2) + if sec_node.get_id() not in all_node_bdev_names or not all_node_bdev_names[sec_node.get_id()]: + ret = sec_rpc_client.get_bdevs() + if ret: + # node_bdev_names = {} + node_bdev_names = {b['name']: b for b in ret} + all_node_bdev_names[sec_node.get_id()] = node_bdev_names + if sec_node.get_id() not in all_node_lvols_nqns or not all_node_lvols_nqns[sec_node.get_id()]: + ret = sec_rpc_client.subsystem_list() + if ret: + node_lvols_nqns = {} + for sub in ret: + node_lvols_nqns[sub['nqn']] = sub + all_node_lvols_nqns[sec_node.get_id()] = node_lvols_nqns + + if sec_node.get_id() not in all_node_lvols_stats or not all_node_lvols_stats[sec_node.get_id()]: + ret = sec_rpc_client.get_lvol_stats() + if ret: + sec_node_lvols_stats = {} + for st in ret['bdevs']: + sec_node_lvols_stats[st['name']] = st + all_node_lvols_stats[sec_node.get_id()] = sec_node_lvols_stats + except Exception as e: + logger.error(e) for lvol in lvol_list: if lvol.status in [LVol.STATUS_IN_CREATION, LVol.STATUS_IN_DELETION]: diff --git a/simplyblock_core/services/main_distr_event_collector.py b/simplyblock_core/services/main_distr_event_collector.py index 31dffeda0..bd4acd16f 100644 --- a/simplyblock_core/services/main_distr_event_collector.py +++ b/simplyblock_core/services/main_distr_event_collector.py @@ -84,9 +84,9 @@ def process_device_event(event): else: distr_controller.send_dev_status_event(device_obj, NVMeDevice.STATUS_UNAVAILABLE, event_node_obj) event_node_obj = db.get_storage_node_by_id(event_node_obj.get_id()) - for dev in event_node_obj.remote_devices: - if dev.get_id() == device_obj.get_id(): - event_node_obj.remote_devices.remove(dev) + for remote_dev in event_node_obj.remote_devices: + if remote_dev.get_id() == device_obj.get_id(): + event_node_obj.remote_devices.remove(remote_dev) event_node_obj.write_to_db() break diff --git a/simplyblock_core/services/snapshot_monitor.py b/simplyblock_core/services/snapshot_monitor.py index c82476e7b..5006eb431 100644 --- a/simplyblock_core/services/snapshot_monitor.py +++ b/simplyblock_core/services/snapshot_monitor.py @@ -5,10 +5,9 @@ from simplyblock_core import constants, db_controller, utils from simplyblock_core.models.cluster import Cluster -from simplyblock_core.controllers import health_controller, snapshot_events +from simplyblock_core.controllers import health_controller, snapshot_events, tasks_controller from simplyblock_core.models.snapshot import SnapShot from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.rpc_client import RPCClient logger = utils.get_logger(__name__) @@ -76,8 +75,7 @@ def process_snap_delete_finish(snap, leader_node): non_leader = db.get_storage_node_by_id(non_leader_id) if non_leader: - non_leader.lvol_sync_del_queue.append(snap.snap_bdev) - non_leader.write_to_db() + tasks_controller.add_lvol_sync_del_task(non_leader.cluster_id, non_leader.get_id(), snap.snap_bdev) snapshot_events.snapshot_delete(snap) snap.remove(db.kv_store) @@ -96,6 +94,115 @@ def set_snap_offline(snap): sn.write_to_db() +def process_snap_delete(snap, snode): + # check leadership + leader_node = None + if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, + StorageNode.STATUS_DOWN]: + ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if not ret: + raise Exception("Failed to get LVol store info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = snode + + if not leader_node and sec_node: + ret = sec_node.rpc_client().bdev_lvol_get_lvstores(sec_node.lvstore) + if not ret: + raise Exception("Failed to get LVol store info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = sec_node + + if not leader_node: + raise Exception("Failed to get leader node") + + if snap.deletion_status == "" or snap.deletion_status != leader_node.get_id(): + + ret, _ = leader_node.rpc_client().delete_lvol(snap.snap_bdev) + if not ret: + logger.error(f"Failed to delete snap from node: {snode.get_id()}") + return False + snap = db.get_snapshot_by_id(snap.get_id()) + snap.deletion_status = leader_node.get_id() + snap.write_to_db() + + time.sleep(3) + + try: + ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(snap.snap_bdev) + except Exception as e: + logger.error(e) + # timeout detected, check other node + return False + + if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed + process_snap_delete_finish(snap, leader_node) + + elif ret == 1: # Async lvol deletion is in progress or queued + logger.info(f"Snap deletion in progress, id: {snap.get_id()}") + + elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error( + "Async deletion is done, but leadership has changed (sync deletion is now blocked)") + + elif ret == 4: # No async delete request exists for this Snap + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("No async delete request exists for this snap") + set_snap_offline(snap) + + elif ret == -1: # Operation not permitted + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Operation not permitted") + process_snap_delete_try_again(snap) + + elif ret == -2: # No such file or directory + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("No such file or directory") + process_snap_delete_finish(snap, leader_node) + + elif ret == -5: # I/O error + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("I/O error") + process_snap_delete_try_again(snap) + + elif ret == -11: # Try again + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Try again") + process_snap_delete_try_again(snap) + + elif ret == -12: # Out of memory + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Out of memory") + process_snap_delete_try_again(snap) + + elif ret == -16: # Device or resource busy + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Device or resource busy") + process_snap_delete_try_again(snap) + + elif ret == -19: # No such device + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("No such device") + set_snap_offline(snap) + + elif ret == -35: # Leadership changed + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Leadership changed") + process_snap_delete_try_again(snap) + + elif ret == -36: # Failed to update lvol for deletion + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Failed to update snapshot for deletion") + process_snap_delete_try_again(snap) + + else: # Failed to update lvol for deletion + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Failed to update snapshot for deletion") + + + # get DB controller db = db_controller.DBController() @@ -110,159 +217,46 @@ def set_snap_offline(snap): for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()): node_bdev_names = [] - node_lvols_nqns = {} sec_node_bdev_names = {} - sec_node_lvols_nqns = {} sec_node = None if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password, timeout=3, retry=2) - node_bdevs = rpc_client.get_bdevs() + rpc_client = snode.rpc_client(timeout=3, retry=2) + try: + node_bdevs = rpc_client.get_bdevs() + except Exception as e: + logger.error(e) + continue if node_bdevs: node_bdev_names = [b['name'] for b in node_bdevs] for bdev in node_bdevs: if "aliases" in bdev and bdev["aliases"]: node_bdev_names.extend(bdev['aliases']) - ret = rpc_client.subsystem_list() - if ret: - for sub in ret: - node_lvols_nqns[sub['nqn']] = sub - if snode.secondary_node_id: sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: - sec_rpc_client = RPCClient( - sec_node.mgmt_ip, sec_node.rpc_port, - sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) - ret = sec_rpc_client.get_bdevs() + if sec_node and sec_node.status in [ + StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + sec_rpc_client = sec_node.rpc_client(timeout=3, retry=2) + try: + ret = sec_rpc_client.get_bdevs() + except Exception as e: + logger.error(e) + continue if ret: for bdev in ret: sec_node_bdev_names[bdev['name']] = bdev - ret = sec_rpc_client.subsystem_list() - if ret: - for sub in ret: - sec_node_lvols_nqns[sub['nqn']] = sub - - if snode.lvstore_status == "ready": - - for snap in db.get_snapshots_by_node_id(snode.get_id()): - if snap.status == SnapShot.STATUS_ONLINE: - - present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) + for snap in db.get_snapshots_by_node_id(snode.get_id()): + if snap.status == SnapShot.STATUS_ONLINE: + present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) + if snode.lvstore_status == "ready": set_snapshot_health_check(snap, present) - elif snap.status == SnapShot.STATUS_IN_DELETION: - - # check leadership - leader_node = None - if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, - StorageNode.STATUS_DOWN]: - ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if not ret: - raise Exception("Failed to get LVol store info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = snode - - if not leader_node and sec_node: - ret = sec_node.rpc_client().bdev_lvol_get_lvstores(sec_node.lvstore) - if not ret: - raise Exception("Failed to get LVol store info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = sec_node - - if not leader_node: - raise Exception("Failed to get leader node") - - if snap.deletion_status == "" or snap.deletion_status != leader_node.get_id(): - - ret, _ = leader_node.rpc_client().delete_lvol(snap.snap_bdev) - if not ret: - logger.error(f"Failed to delete snap from node: {snode.get_id()}") - continue - snap = db.get_snapshot_by_id(snap.get_id()) - snap.deletion_status = leader_node.get_id() - snap.write_to_db() - - time.sleep(3) - - try: - ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(snap.snap_bdev) - except Exception as e: - logger.error(e) - # timeout detected, check other node - break - - if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed - process_snap_delete_finish(snap, leader_node) - - elif ret == 1: # Async lvol deletion is in progress or queued - logger.info(f"Snap deletion in progress, id: {snap.get_id()}") - - elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error( - "Async deletion is done, but leadership has changed (sync deletion is now blocked)") - - elif ret == 4: # No async delete request exists for this Snap - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("No async delete request exists for this snap") - set_snap_offline(snap) - - elif ret == -1: # Operation not permitted - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Operation not permitted") - process_snap_delete_try_again(snap) - - elif ret == -2: # No such file or directory - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("No such file or directory") - process_snap_delete_finish(snap, leader_node) - - elif ret == -5: # I/O error - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("I/O error") - process_snap_delete_try_again(snap) - - elif ret == -11: # Try again - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Try again") - process_snap_delete_try_again(snap) - - elif ret == -12: # Out of memory - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Out of memory") - process_snap_delete_try_again(snap) - - elif ret == -16: # Device or resource busy - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Device or resource busy") - process_snap_delete_try_again(snap) - - elif ret == -19: # No such device - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("No such device") - set_snap_offline(snap) - - elif ret == -35: # Leadership changed - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Leadership changed") - process_snap_delete_try_again(snap) - - elif ret == -36: # Failed to update lvol for deletion - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Failed to update snapshot for deletion") - process_snap_delete_try_again(snap) - - else: # Failed to update lvol for deletion - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Failed to update snapshot for deletion") - + elif snap.status == SnapShot.STATUS_IN_DELETION: + try: + process_snap_delete(snap, snode) + except Exception as e: + logger.error(e) time.sleep(constants.LVOL_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/storage_node_monitor.py b/simplyblock_core/services/storage_node_monitor.py index 17a7d0369..b3c04d27a 100644 --- a/simplyblock_core/services/storage_node_monitor.py +++ b/simplyblock_core/services/storage_node_monitor.py @@ -3,7 +3,6 @@ import time from datetime import datetime, timezone - from simplyblock_core import constants, db_controller, cluster_ops, storage_node_ops, utils from simplyblock_core.controllers import health_controller, device_controller, tasks_controller, storage_events from simplyblock_core.models.cluster import Cluster @@ -14,7 +13,6 @@ logger = utils.get_logger(__name__) - # get DB controller db = db_controller.DBController() @@ -74,13 +72,15 @@ def get_next_cluster_status(cluster_id): continue online_nodes += 1 # check for jm rep tasks: - ret = node.rpc_client().jc_get_jm_status(node.jm_vuid) - if ret: + try: + ret = node.rpc_client().jc_get_jm_status(node.jm_vuid) for jm in ret: if ret[jm] is False: # jm is not ready (has active replication task) jm_replication_tasks = True logger.warning("Replication task found!") break + except Exception: + logger.warning("Failed to get replication task!") elif node.status == StorageNode.STATUS_REMOVED: pass else: @@ -114,11 +114,12 @@ def get_next_cluster_status(cluster_id): k = cluster.distr_npcs # if number of devices in the cluster unavailable on DIFFERENT nodes > k --> I cannot read and in some cases cannot write (suspended) - if affected_nodes == k and (not cluster.strict_node_anti_affinity or online_nodes >= (n+k)): + if affected_nodes == k and (not cluster.strict_node_anti_affinity or online_nodes >= (n + k)): return Cluster.STATUS_DEGRADED elif jm_replication_tasks: return Cluster.STATUS_DEGRADED - elif (affected_nodes > k or online_devices < (n + k) or (online_nodes < (n+k) and cluster.strict_node_anti_affinity)): + elif (affected_nodes > k or online_devices < (n + k) or ( + online_nodes < (n + k) and cluster.strict_node_anti_affinity)): return Cluster.STATUS_SUSPENDED else: return Cluster.STATUS_ACTIVE @@ -132,11 +133,11 @@ def update_cluster_status(cluster_id): for task in db.get_job_tasks(cluster_id): if task.status != JobSchedule.STATUS_DONE and task.function_name in [ JobSchedule.FN_DEV_MIG, JobSchedule.FN_NEW_DEV_MIG, JobSchedule.FN_FAILED_DEV_MIG]: - if task.retry == 0: + if "migration" not in task.function_params: first_iter_task_pending += 1 cluster = db.get_cluster_by_id(cluster_id) - cluster.is_re_balancing = first_iter_task_pending > 0 + cluster.is_re_balancing = first_iter_task_pending > 0 cluster.write_to_db() current_cluster_status = cluster.status @@ -145,7 +146,7 @@ def update_cluster_status(cluster_id): return if current_cluster_status == Cluster.STATUS_DEGRADED and next_current_status == Cluster.STATUS_ACTIVE: - # if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_UNREADY] and cluster_current_status == Cluster.STATUS_ACTIVE: + # if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_UNREADY] and cluster_current_status == Cluster.STATUS_ACTIVE: # cluster_ops.cluster_activate(cluster_id, True) cluster_ops.set_cluster_status(cluster_id, Cluster.STATUS_ACTIVE) return @@ -186,7 +187,6 @@ def update_cluster_status(cluster_id): cluster_ops.set_cluster_status(cluster_id, next_current_status) - def set_node_online(node): if node.status != StorageNode.STATUS_ONLINE: @@ -211,24 +211,56 @@ def set_node_online(node): if online_devices_list: tasks_controller.add_device_mig_task(online_devices_list, node.cluster_id) -def set_node_offline(node, set_devs_offline=False): - if node.status != StorageNode.STATUS_UNREACHABLE: - # set node unavailable - storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_UNREACHABLE) + update_cluster_status(cluster_id) + - # if set_devs_offline: - # # set devices unavailable - # for dev in node.nvme_devices: - # if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY]: - # device_controller.device_set_unavailable(dev.get_id()) +def set_node_offline(node): + if node.status != StorageNode.STATUS_OFFLINE: + try: + storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_OFFLINE) + for dev in node.nvme_devices: + if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, + NVMeDevice.STATUS_CANNOT_ALLOCATE]: + device_controller.device_set_unavailable(dev.get_id()) + update_cluster_status(cluster_id) + # initiate restart + tasks_controller.add_node_to_auto_restart(node) + except Exception as e: + logger.debug("Setting node to OFFLINE state failed") + logger.error(e) + + +def set_node_unreachable(node): + if node.status != StorageNode.STATUS_UNREACHABLE: + try: + storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_UNREACHABLE) + update_cluster_status(cluster_id) + except Exception as e: + logger.debug("Setting node to UNREACHABLE state failed") + logger.error(e) + + +def set_node_schedulable(node): + if node.status != StorageNode.STATUS_SCHEDULABLE: + try: + storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_SCHEDULABLE) + # initiate shutdown + # initiate restart + tasks_controller.add_node_to_auto_restart(node) + for dev in node.nvme_devices: + if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, + NVMeDevice.STATUS_CANNOT_ALLOCATE]: + device_controller.device_set_unavailable(dev.get_id()) + update_cluster_status(cluster_id) + except Exception as e: + logger.debug("Setting node to SCHEDULABLE state failed") + logger.error(e) - # # set jm dev offline - # if node.jm_device.status != JMDevice.STATUS_UNAVAILABLE: - # device_controller.set_jm_device_state(node.jm_device.get_id(), JMDevice.STATUS_UNAVAILABLE) def set_node_down(node): if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_SUSPENDED]: storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_DOWN) + update_cluster_status(cluster_id) def node_rpc_timeout_check_and_report(node): @@ -242,179 +274,151 @@ def node_rpc_timeout_check_and_report(node): except Exception as e: logger.debug(e) # RPC timeout detected, send to cluster log - storage_events.snode_rpc_timeout(node, time.time()-start_time) + storage_events.snode_rpc_timeout(node, int(time.time() - start_time)) + return False -logger.info("Starting node monitor") -while True: - clusters = db.get_clusters() - for cluster in clusters: - cluster_id = cluster.get_id() - if cluster.status == Cluster.STATUS_IN_ACTIVATION: - logger.info(f"Cluster status is: {cluster.status}, skipping monitoring") - continue +def node_port_check_fun(snode): + node_port_check = True + if snode.lvstore_status == "ready": + ports = [snode.nvmf_port] + if snode.lvstore_stack_secondary_1: + for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): + if n.lvstore_status == "ready": + ports.append(n.lvol_subsys_port) + if not snode.is_secondary_node: + ports.append(snode.lvol_subsys_port) - nodes = db.get_storage_nodes_by_cluster_id(cluster_id) - for snode in nodes: + for port in ports: + try: + ret = health_controller.check_port_on_node(snode, port) + logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}") + node_port_check &= ret + except Exception: + logger.error("Check node port failed, connection error") - # get fresh node object, something could have changed until the last for loop is reached - snode = db.get_storage_node_by_id(snode.get_id()) + node_data_nic_ping_check = False + for data_nic in snode.data_nics: + if data_nic.ip4_address: + data_ping_check = health_controller._check_node_ping(data_nic.ip4_address) + logger.info(f"Check: ping data nic {data_nic.ip4_address} ... {data_ping_check}") + node_data_nic_ping_check |= data_ping_check - if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, - StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]: - logger.info(f"Node status is: {snode.status}, skipping") - continue + node_port_check &= node_data_nic_ping_check - if snode.status == StorageNode.STATUS_ONLINE and snode.lvstore_status == "in_creation": - logger.info(f"Node lvstore is in creation: {snode.get_id()}, skipping") - continue + return node_port_check - logger.info(f"Checking node {snode.hostname}") - # 1- check node ping - ping_check = health_controller._check_node_ping(snode.mgmt_ip) - logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") - if not ping_check: - time.sleep(1) - ping_check = health_controller._check_node_ping(snode.mgmt_ip) - logger.info(f"Check 2: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") +def check_node(snode): + snode = db.get_storage_node_by_id(snode.get_id()) - # 2- check node API - node_api_check = health_controller._check_node_api(snode.mgmt_ip) - logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}") + if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, + StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]: + logger.info(f"Node status is: {snode.status}, skipping") + return False - if snode.status == StorageNode.STATUS_SCHEDULABLE and not ping_check and not node_api_check: - continue + if snode.status == StorageNode.STATUS_ONLINE and snode.lvstore_status == "in_creation": + logger.info(f"Node lvstore is in creation: {snode.get_id()}, skipping") + return False - spdk_process = False - if node_api_check: - # 3- check spdk_process - spdk_process = health_controller._check_spdk_process_up(snode.mgmt_ip, snode.rpc_port) - logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {spdk_process}") + logger.info(f"Checking node {snode.hostname}") - # 4- check rpc - node_rpc_check = health_controller._check_node_rpc( - snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=5, retry=2) - logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") - if not node_rpc_check and snode.get_id() not in node_rpc_timeout_threads: - t = threading.Thread(target=node_rpc_timeout_check_and_report, args=(snode,)) - t.start() - node_rpc_timeout_threads[snode.get_id()] = t - - if ping_check and node_api_check and spdk_process and not node_rpc_check: - start_time = time.time() - while time.time() < start_time + 60: - node_rpc_check = health_controller._check_node_rpc( - snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=5, retry=2) - logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") - if node_rpc_check: - break + # 1- check node ping + ping_check = health_controller._check_node_ping(snode.mgmt_ip) + logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") + if not ping_check: + logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}: FAILED") + set_node_unreachable(snode) + return False - node_port_check = True - - if spdk_process and node_rpc_check and snode.lvstore_status == "ready": - ports = [snode.nvmf_port] - if snode.lvstore_stack_secondary_1: - for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): - if n.lvstore_status == "ready": - ports.append(n.lvol_subsys_port) - if not snode.is_secondary_node: - ports.append(snode.lvol_subsys_port) - - for port in ports: - ret = health_controller._check_port_on_node(snode, port) - logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}") - node_port_check &= ret - - node_data_nic_ping_check = False - for data_nic in snode.data_nics: - if data_nic.ip4_address: - data_ping_check = health_controller._check_node_ping(data_nic.ip4_address) - logger.info(f"Check: ping data nic {data_nic.ip4_address} ... {data_ping_check}") - node_data_nic_ping_check |= data_ping_check - - node_port_check &= node_data_nic_ping_check - - cluster = db.get_cluster_by_id(cluster.get_id()) - - # is_node_online = ping_check and spdk_process and node_rpc_check and node_port_check - is_node_online = spdk_process or node_rpc_check - if is_node_online: - - if snode.status == StorageNode.STATUS_UNREACHABLE: - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY, - Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]: - # tasks_controller.add_node_to_auto_restart(snode) - set_node_online(snode) - continue - - if not node_port_check: - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: - logger.error("Port check failed") - set_node_down(snode) - continue - - set_node_online(snode) - - # # check JM device - # if snode.jm_device: - # if snode.jm_device.status in [JMDevice.STATUS_ONLINE, JMDevice.STATUS_UNAVAILABLE]: - # ret = health_controller.check_jm_device(snode.jm_device.get_id()) - # if ret: - # logger.info(f"JM bdev is online: {snode.jm_device.get_id()}") - # if snode.jm_device.status != JMDevice.STATUS_ONLINE: - # device_controller.set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE) - # else: - # logger.error(f"JM bdev is offline: {snode.jm_device.get_id()}") - # if snode.jm_device.status != JMDevice.STATUS_UNAVAILABLE: - # device_controller.set_jm_device_state(snode.jm_device.get_id(), - # JMDevice.STATUS_UNAVAILABLE) - else: + # 2- check node API + try: + snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=10, retry=2) + ret, _ = snode_api.is_live() + logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {ret}") + if not ret: + logger.info("Check: node API failed, setting node unreachable") + set_node_unreachable(snode) + return False + except Exception as e: + logger.debug(e) + set_node_unreachable(snode) + return False - if not ping_check and not node_api_check and not spdk_process: - # restart on new node - storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_SCHEDULABLE) - - elif ping_check and node_api_check and (not spdk_process or not node_rpc_check): - # add node to auto restart - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY, - Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]: - if not spdk_process and not node_rpc_check: - logger.info("ping is fine, snodeapi is fine, But no spdk process and no rpc check, " - "So that we set device offline") - set_node_offline(snode, set_devs_offline=(not spdk_process and not node_rpc_check)) - try: - ret = snode.rpc_client(timeout=10).get_version() - if not ret: - logger.debug("False RPC response, adding node to auto restart") - tasks_controller.add_node_to_auto_restart(snode) - except Exception as e: - logger.debug("Timeout to get RPC response, skipping restart") - logger.error(e) - - elif not node_port_check: - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: - logger.error("Port check failed") - set_node_down(snode) - - else: - set_node_offline(snode, set_devs_offline=not spdk_process) - - if ping_check and node_api_check and spdk_process and not node_rpc_check: - # restart spdk proxy cont - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY, - Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]: - logger.info(f"Restarting spdk_proxy_{snode.rpc_port} on {snode.get_id()}") - snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=60, retry=1) - ret, err = snode_api.spdk_proxy_restart(snode.rpc_port) - if ret: - logger.info(f"Restarting spdk_proxy on {snode.get_id()} successfully") - continue - if err: - logger.error(err) + # 3- check spdk process through node API + try: + snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=20, retry=2) + is_up, _ = snode_api.spdk_process_is_up( snode.rpc_port, snode.cluster_id) + logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {bool(is_up)}") + if not is_up: + logger.info("Check: node API failed, setting node offline") + set_node_offline(snode) + return False + except Exception as e: + logger.debug(e) + return False + + # 4- check node rpc interface + node_rpc_check = health_controller._check_node_rpc( + snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=20, retry=2) + logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") + + if not node_rpc_check and snode.get_id() not in node_rpc_timeout_threads: + t = threading.Thread(target=node_rpc_timeout_check_and_report, args=(snode,)) + t.start() + node_rpc_timeout_threads[snode.get_id()] = t + + if not node_rpc_check: + logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}:FAILED") + set_node_schedulable(snode) + return False + + node_port_check = node_port_check_fun(snode) + + if not node_port_check: + cluster = db.get_cluster_by_id(snode.cluster_id) + if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: + logger.error("Port check failed") + set_node_down(snode) + return True - update_cluster_status(cluster_id) + set_node_online(snode) + + +def loop_for_node(snode): + # global logger + # logger = logging.getLogger() + # logger_handler = logging.StreamHandler(stream=sys.stdout) + # logger_handler.setFormatter(logging.Formatter(f'%(asctime)s: node:{snode.mgmt_ip} %(levelname)s: %(message)s')) + # logger.addHandler(logger_handler) + while True: + check_node(snode) + logger.info(f"Sleeping for {constants.NODE_MONITOR_INTERVAL_SEC} seconds") + time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) + + +logger.info("Starting node monitor") +threads_maps: dict[str, threading.Thread] = {} + +while True: + clusters = db.get_clusters() + for cluster in clusters: + cluster_id = cluster.get_id() + if cluster.status == Cluster.STATUS_IN_ACTIVATION: + logger.info(f"Cluster status is: {cluster.status}, skipping monitoring") + continue + + nodes = db.get_storage_nodes_by_cluster_id(cluster_id) + for node in nodes: + node_id = node.get_id() + if node_id not in threads_maps or threads_maps[node_id].is_alive() is False: + t = threading.Thread(target=loop_for_node, args=(node,)) + t.start() + threads_maps[node_id] = t - logger.info(f"Sleeping for {constants.NODE_MONITOR_INTERVAL_SEC} seconds") + try: + update_cluster_status(cluster_id) + except Exception: + logger.error("Error while updating cluster status") time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/tasks_runner_failed_migration.py b/simplyblock_core/services/tasks_runner_failed_migration.py index fce4fd8ef..e3baeb7f0 100644 --- a/simplyblock_core/services/tasks_runner_failed_migration.py +++ b/simplyblock_core/services/tasks_runner_failed_migration.py @@ -87,8 +87,12 @@ def task_runner(task): qos_high_priority = False if db.get_cluster_by_id(snode.cluster_id).is_qos_set(): qos_high_priority = True - rsp = rpc_client.distr_migration_failure_start( - distr_name, device.cluster_device_order, qos_high_priority, job_size=1024, jobs=constants.MIG_PARALLEL_JOBS) + try: + rsp = rpc_client.distr_migration_failure_start( + distr_name, device.cluster_device_order, qos_high_priority, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS) + except Exception as e: + logger.error(e) + rsp = False if not rsp: logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}") task.function_result = "Failed to start device migration task" diff --git a/simplyblock_core/services/tasks_runner_jc_comp.py b/simplyblock_core/services/tasks_runner_jc_comp.py index 676156af3..90933d955 100644 --- a/simplyblock_core/services/tasks_runner_jc_comp.py +++ b/simplyblock_core/services/tasks_runner_jc_comp.py @@ -57,6 +57,7 @@ if node.status != StorageNode.STATUS_ONLINE: msg = f"Node is {node.status}, retry task" logger.info(msg) + task.retry += 1 task.function_result = msg task.status = JobSchedule.STATUS_SUSPENDED task.write_to_db(db.kv_store) @@ -79,6 +80,7 @@ logger.info(msg) task.function_result = msg task.status = JobSchedule.STATUS_SUSPENDED + task.retry += 1 task.write_to_db(db.kv_store) continue @@ -86,7 +88,11 @@ jm_vuid = node.jm_vuid if "jm_vuid" in task.function_params: jm_vuid = task.function_params["jm_vuid"] - ret, err = rpc_client.jc_compression_start(jm_vuid=jm_vuid) + try: + ret, err = rpc_client.jc_compression_start(jm_vuid=jm_vuid) + except Exception as e: + logger.error(e) + continue if ret: task.function_result = f"JC {node.jm_vuid} compression resumed on node" task.status = JobSchedule.STATUS_DONE diff --git a/simplyblock_core/services/tasks_runner_migration.py b/simplyblock_core/services/tasks_runner_migration.py index fb085e4aa..c1abf823c 100644 --- a/simplyblock_core/services/tasks_runner_migration.py +++ b/simplyblock_core/services/tasks_runner_migration.py @@ -62,16 +62,6 @@ def task_runner(task): except Exception as e: logger.error(f"Failed to get online since: {e}") - for dev in node.nvme_devices: - if dev.status not in [NVMeDevice.STATUS_ONLINE, - NVMeDevice.STATUS_FAILED_AND_MIGRATED, - NVMeDevice.STATUS_CANNOT_ALLOCATE]: - task.function_result = f"Some dev status is {dev.status }, retrying" - task.status = JobSchedule.STATUS_SUSPENDED - task.retry += 1 - task.write_to_db(db.kv_store) - return False - task.status = JobSchedule.STATUS_RUNNING task.function_result = "" task.write_to_db(db.kv_store) @@ -93,8 +83,12 @@ def task_runner(task): qos_high_priority = False if db.get_cluster_by_id(snode.cluster_id).is_qos_set(): qos_high_priority = True - rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=1024, - jobs=constants.MIG_PARALLEL_JOBS) + try: + rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=constants.MIG_JOB_SIZE, + jobs=constants.MIG_PARALLEL_JOBS) + except Exception as e: + logger.error(e) + rsp = False if not rsp: logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}") task.function_result = "Failed to start device migration task, retry later" @@ -219,9 +213,12 @@ def _set_master_task_status(master_task, status): continue rpc_client = RPCClient( node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=2) - ret, err = rpc_client.jc_compression_start(jm_vuid=node.jm_vuid) - if err and "code" in err and err["code"] != -2: - logger.info("Failed to resume JC compression adding task...") - tasks_controller.add_jc_comp_resume_task(task.cluster_id, task.node_id, node.jm_vuid) + try: + ret, err = rpc_client.jc_compression_start(jm_vuid=node.jm_vuid) + if err and "code" in err and err["code"] != -2: + logger.info("Failed to resume JC compression adding task...") + tasks_controller.add_jc_comp_resume_task(task.cluster_id, task.node_id, node.jm_vuid) + except Exception as e: + logger.error(e) time.sleep(3) diff --git a/simplyblock_core/services/tasks_runner_new_dev_migration.py b/simplyblock_core/services/tasks_runner_new_dev_migration.py index f62a7f210..db4143eec 100644 --- a/simplyblock_core/services/tasks_runner_new_dev_migration.py +++ b/simplyblock_core/services/tasks_runner_new_dev_migration.py @@ -98,8 +98,12 @@ def task_runner(task): qos_high_priority = False if db.get_cluster_by_id(snode.cluster_id).is_qos_set(): qos_high_priority = True - rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=1024, - jobs=constants.MIG_PARALLEL_JOBS) + try: + rsp = rpc_client.distr_migration_expansion_start( + distr_name, qos_high_priority, job_size=constants.MIG_JOB_SIZE,jobs=constants.MIG_PARALLEL_JOBS) + except Exception as e: + logger.error(f"Failed to start migration : {e}") + rsp = False if not rsp: logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}") task.function_result = "Failed to start device migration task" diff --git a/simplyblock_core/services/tasks_runner_node_add.py b/simplyblock_core/services/tasks_runner_node_add.py index daeba918e..819e611d7 100644 --- a/simplyblock_core/services/tasks_runner_node_add.py +++ b/simplyblock_core/services/tasks_runner_node_add.py @@ -2,7 +2,7 @@ import time -from simplyblock_core import db_controller, storage_node_ops, utils +from simplyblock_core import db_controller, storage_node_ops, utils, constants from simplyblock_core.models.job_schedule import JobSchedule from simplyblock_core.models.cluster import Cluster @@ -13,46 +13,67 @@ db = db_controller.DBController() -logger.info("Starting Tasks runner...") -while True: +def process_task(task): + if task.canceled: + task.function_result = "canceled" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return False + + if task.retry >= task.max_retry: + task.function_result = "max retry reached" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return True + + if db.get_cluster_by_id(cl.get_id()).status == Cluster.STATUS_IN_ACTIVATION: + task.function_result = "Cluster is in_activation, waiting" + task.status = JobSchedule.STATUS_NEW + task.write_to_db(db.kv_store) + return False + + if task.status != JobSchedule.STATUS_RUNNING: + task.status = JobSchedule.STATUS_RUNNING + task.write_to_db(db.kv_store) + + try: + res = storage_node_ops.add_node(**task.function_params) + msg = f"Node add result: {res}" + logger.info(msg) + task.function_result = msg + if res: + task.status = JobSchedule.STATUS_DONE + else: + task.retry += 1 + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return True + except Exception as e: + logger.error(e) + return False + + +logger.info("Starting Tasks runner node add...") +while True: clusters = db.get_clusters() if not clusters: logger.error("No clusters found!") else: for cl in clusters: - if cl.status == Cluster.STATUS_IN_ACTIVATION: - continue - tasks = db.get_job_tasks(cl.get_id(), reverse=False) for task in tasks: - + delay_seconds = constants.TASK_EXEC_INTERVAL_SEC if task.function_name == JobSchedule.FN_NODE_ADD: - if task.status != JobSchedule.STATUS_DONE: - + while task.status != JobSchedule.STATUS_DONE: # get new task object because it could be changed from cancel task task = db.get_task_by_id(task.uuid) - - if task.canceled: - task.function_result = "canceled" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - continue - - if db.get_cluster_by_id(cl.get_id()).status == Cluster.STATUS_IN_ACTIVATION: - task.function_result = "Cluster is in_activation, waiting" - task.status = JobSchedule.STATUS_NEW - task.write_to_db(db.kv_store) - continue - - if task.status != JobSchedule.STATUS_RUNNING: - task.status = JobSchedule.STATUS_RUNNING - task.write_to_db(db.kv_store) - - res = storage_node_ops.add_node(**task.function_params) - logger.info(f"Node add result: {res}") - task.function_result = str(res) - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - - time.sleep(5) + res = process_task(task) + if res: + if task.status == JobSchedule.STATUS_DONE: + break + else: + delay_seconds *= 2 + time.sleep(delay_seconds) + + time.sleep(constants.TASK_EXEC_INTERVAL_SEC) diff --git a/simplyblock_core/services/tasks_runner_port_allow.py b/simplyblock_core/services/tasks_runner_port_allow.py index a39de42ab..5ca9b7fdd 100644 --- a/simplyblock_core/services/tasks_runner_port_allow.py +++ b/simplyblock_core/services/tasks_runner_port_allow.py @@ -3,13 +3,12 @@ from simplyblock_core import db_controller, utils, storage_node_ops, distr_controller -from simplyblock_core.controllers import tcp_ports_events, health_controller +from simplyblock_core.controllers import tcp_ports_events, health_controller, tasks_controller from simplyblock_core.fw_api_client import FirewallClient from simplyblock_core.models.job_schedule import JobSchedule from simplyblock_core.models.cluster import Cluster -from simplyblock_core.models.nvme_device import NVMeDevice +from simplyblock_core.models.nvme_device import NVMeDevice, RemoteDevice from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.snode_client import SNodeClient logger = utils.get_logger(__name__) @@ -17,9 +16,226 @@ db = db_controller.DBController() +def exec_port_allow_task(task): + # get new task object because it could be changed from cancel task + task = db.get_task_by_id(task.uuid) + + if task.canceled: + task.function_result = "canceled" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return + + node = db.get_storage_node_by_id(task.node_id) + + if not node: + task.function_result = "node not found" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return + + if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]: + msg = f"Node is {node.status}, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + # check node ping + ping_check = health_controller._check_node_ping(node.mgmt_ip) + logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}") + if not ping_check: + time.sleep(1) + ping_check = health_controller._check_node_ping(node.mgmt_ip) + logger.info(f"Check 2: ping mgmt ip {node.mgmt_ip} ... {ping_check}") + + if not ping_check: + msg = "Node ping is false, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + # check node ping + logger.info("connect to remote devices") + nodes = db.get_storage_nodes_by_cluster_id(node.cluster_id) + # connect to remote devs + try: + node_bdevs = node.rpc_client().get_bdevs() + logger.debug(node_bdevs) + if node_bdevs: + node_bdev_names = {} + for b in node_bdevs: + node_bdev_names[b['name']] = b + for al in b['aliases']: + node_bdev_names[al] = b + else: + node_bdev_names = {} + remote_devices = [] + for nd in nodes: + if nd.get_id() == node.get_id() or nd.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: + continue + logger.info(f"Connecting to node {nd.get_id()}") + for index, dev in enumerate(nd.nvme_devices): + + if dev.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, + NVMeDevice.STATUS_CANNOT_ALLOCATE]: + logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}") + continue + + if not dev.alceml_bdev: + raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") + + remote_device = RemoteDevice() + remote_device.uuid = dev.uuid + remote_device.alceml_name = dev.alceml_name + remote_device.node_id = dev.node_id + remote_device.size = dev.size + remote_device.nvmf_multipath = dev.nvmf_multipath + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.remote_bdev = storage_node_ops.connect_device( + f"remote_{dev.alceml_bdev}", dev, node, + bdev_names=list(node_bdev_names), reattach=False) + + remote_devices.append(remote_device) + if not remote_devices: + msg = "Node unable to connect to remote devs, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + else: + node = db.get_storage_node_by_id(task.node_id) + node.remote_devices = remote_devices + node.write_to_db() + + logger.info("connect to remote JM devices") + remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(node) + if not remote_jm_devices or len(remote_jm_devices) < 2: + msg = "Node unable to connect to remote JMs, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + else: + node = db.get_storage_node_by_id(task.node_id) + node.remote_jm_devices = remote_jm_devices + node.write_to_db() + + + except Exception as e: + logger.error(e) + msg = "Error when connect to remote devs, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + logger.info("Sending device status event") + for db_dev in node.nvme_devices: + distr_controller.send_dev_status_event(db_dev, db_dev.status, node) + + logger.info("Finished sending device status and now waiting 5s for JMs to connect") + time.sleep(5) + + sec_node = db.get_storage_node_by_id(node.secondary_node_id) + snode = db.get_storage_node_by_id(node.get_id()) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + try: + ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if ret: + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + # is_sec_node_leader = True + # check jc_compression status + jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(snode.jm_vuid) + retries = 10 + while jc_compression_is_active: + if retries <= 0: + logger.warning("Timeout waiting for JC compression task to finish") + break + retries -= 1 + logger.info( + f"JC compression task found on node: {sec_node.get_id()}, retrying in 60 seconds") + time.sleep(60) + jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status( + snode.jm_vuid) + except Exception as e: + logger.error(e) + return + + if node.lvstore_status == "ready": + lvstore_check = health_controller._check_node_lvstore(node.lvstore_stack, node, auto_fix=True) + if not lvstore_check: + msg = "Node LVolStore check fail, retry later" + logger.warning(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + if node.secondary_node_id: + primary_hublvol_check = health_controller._check_node_hublvol(node) + if not primary_hublvol_check: + msg = "Node hublvol check fail, retry later" + logger.warning(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + sec_node = db.get_storage_node_by_id(node.secondary_node_id) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + secondary_hublvol_check = health_controller._check_sec_node_hublvol(sec_node, auto_fix=True) + if not secondary_hublvol_check: + msg = "Secondary node hublvol check fail, retry later" + logger.warning(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + if task.status != JobSchedule.STATUS_RUNNING: + task.status = JobSchedule.STATUS_RUNNING + task.write_to_db(db.kv_store) + + try: + # wait for lvol sync delete + lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id) + while lvol_sync_del_found: + logger.info("Lvol sync delete task found, waiting") + time.sleep(3) + lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id) + + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + sec_rpc_client = sec_node.rpc_client() + sec_rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False, bs_nonleadership=True) + + except Exception as e: + logger.error(e) + return + + port_number = task.function_params["port_number"] + logger.info(f"Allow port {port_number} on node {node.get_id()}") + fw_api = FirewallClient(snode, timeout=5, retry=2) + port_type = "tcp" + if node.active_rdma: + port_type = "udp" + fw_api.firewall_set_port(port_number, port_type, "allow", node.rpc_port) + tcp_ports_events.port_allowed(node, port_number) + + task.function_result = f"Port {port_number} allowed on node" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + + logger.info("Starting Tasks runner...") while True: - clusters = db.get_clusters() if not clusters: logger.error("No clusters found!") @@ -27,207 +243,10 @@ for cl in clusters: if cl.status == Cluster.STATUS_IN_ACTIVATION: continue - tasks = db.get_job_tasks(cl.get_id(), reverse=False) for task in tasks: - if task.function_name == JobSchedule.FN_PORT_ALLOW: if task.status != JobSchedule.STATUS_DONE: - - # get new task object because it could be changed from cancel task - task = db.get_task_by_id(task.uuid) - - if task.canceled: - task.function_result = "canceled" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - continue - - node = db.get_storage_node_by_id(task.node_id) - - if not node: - task.function_result = "node not found" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - continue - - if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]: - msg = f"Node is {node.status}, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - # check node ping - ping_check = health_controller._check_node_ping(node.mgmt_ip) - logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}") - if not ping_check: - time.sleep(1) - ping_check = health_controller._check_node_ping(node.mgmt_ip) - logger.info(f"Check 2: ping mgmt ip {node.mgmt_ip} ... {ping_check}") - - if not ping_check: - msg = "Node ping is false, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - # check node ping - logger.info("connect to remote devices") - nodes = db.get_storage_nodes_by_cluster_id(node.cluster_id) - # connect to remote devs - try: - node_bdevs = node.rpc_client().get_bdevs() - logger.debug(node_bdevs) - if node_bdevs: - node_bdev_names = {} - for b in node_bdevs: - node_bdev_names[b['name']] = b - for al in b['aliases']: - node_bdev_names[al] = b - else: - node_bdev_names = {} - remote_devices = [] - for nd in nodes: - if nd.get_id() == node.get_id() or nd.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: - continue - logger.info(f"Connecting to node {nd.get_id()}") - for index, dev in enumerate(nd.nvme_devices): - - if dev.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, - NVMeDevice.STATUS_CANNOT_ALLOCATE]: - logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}") - continue - - if not dev.alceml_bdev: - raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") - - dev.remote_bdev = storage_node_ops.connect_device( - f"remote_{dev.alceml_bdev}", dev, node, - bdev_names=list(node_bdev_names), reattach=False) - - remote_devices.append(dev) - if not remote_devices: - msg = "Node unable to connect to remote devs, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - else: - node = db.get_storage_node_by_id(task.node_id) - node.remote_devices = remote_devices - node.write_to_db() - - logger.info("connect to remote JM devices") - remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(node) - if not remote_jm_devices or len(remote_jm_devices) < 2: - msg = "Node unable to connect to remote JMs, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - else: - node = db.get_storage_node_by_id(task.node_id) - node.remote_jm_devices = remote_jm_devices - node.write_to_db() - - - except Exception as e: - logger.error(e) - msg = "Error when connect to remote devs, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - logger.info("Sending device status event") - for db_dev in node.nvme_devices: - distr_controller.send_dev_status_event(db_dev, db_dev.status) - - logger.info("Finished sending device status and now waiting 5s for JMs to connect") - time.sleep(5) - - sec_node = db.get_storage_node_by_id(node.secondary_node_id) - snode = db.get_storage_node_by_id(node.get_id()) - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if ret: - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - # is_sec_node_leader = True - # check jc_compression status - jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(snode.jm_vuid) - retries = 10 - while jc_compression_is_active: - if retries <= 0: - logger.warning("Timeout waiting for JC compression task to finish") - break - retries -= 1 - logger.info( - f"JC compression task found on node: {sec_node.get_id()}, retrying in 60 seconds") - time.sleep(60) - jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status( - snode.jm_vuid) - - lvstore_check = True - if node.lvstore_status == "ready": - lvstore_check &= health_controller._check_node_lvstore(node.lvstore_stack, node, auto_fix=True) - if node.secondary_node_id: - lvstore_check &= health_controller._check_node_hublvol(node) - sec_node = db.get_storage_node_by_id(node.secondary_node_id) - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - lvstore_check &= health_controller._check_sec_node_hublvol(sec_node, auto_fix=True) - - if lvstore_check is False: - msg = "Node LVolStore check fail, retry later" - logger.warning(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - if task.status != JobSchedule.STATUS_RUNNING: - task.status = JobSchedule.STATUS_RUNNING - task.write_to_db(db.kv_store) - - not_deleted = [] - for bdev_name in snode.lvol_sync_del_queue: - logger.info(f"Sync delete bdev: {bdev_name} from node: {snode.get_id()}") - ret, err = snode.rpc_client().delete_lvol(bdev_name, del_async=True) - if not ret: - if "code" in err and err["code"] == -19: - logger.error(f"Sync delete completed with error: {err}") - else: - logger.error( - f"Failed to sync delete bdev: {bdev_name} from node: {snode.get_id()}") - not_deleted.append(bdev_name) - snode.lvol_sync_del_queue = not_deleted - snode.write_to_db() - - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - sec_rpc_client = sec_node.rpc_client() - sec_rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False, bs_nonleadership=True) - - port_number = task.function_params["port_number"] - snode_api = SNodeClient(f"{node.mgmt_ip}:5000", timeout=3, retry=2) - - logger.info(f"Allow port {port_number} on node {node.get_id()}") - - fw_api = FirewallClient(snode, timeout=5, retry=2) - port_type = "tcp" - if node.active_rdma: - port_type = "udp" - fw_api.firewall_set_port(port_number, port_type, "allow", node.rpc_port) - tcp_ports_events.port_allowed(node, port_number) - - task.function_result = f"Port {port_number} allowed on node" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) + exec_port_allow_task(task) time.sleep(5) diff --git a/simplyblock_core/services/tasks_runner_restart.py b/simplyblock_core/services/tasks_runner_restart.py index 2cfc82a53..e816f2b80 100644 --- a/simplyblock_core/services/tasks_runner_restart.py +++ b/simplyblock_core/services/tasks_runner_restart.py @@ -191,19 +191,26 @@ def task_runner_node(task): return False - # shutting down node - logger.info(f"Shutdown node {node.get_id()}") - ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True) - if ret: - logger.info("Node shutdown succeeded") - - time.sleep(3) + try: + # shutting down node + logger.info(f"Shutdown node {node.get_id()}") + ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True) + if ret: + logger.info("Node shutdown succeeded") + time.sleep(3) + except Exception as e: + logger.error(e) + return False - # resetting node - logger.info(f"Restart node {node.get_id()}") - ret = storage_node_ops.restart_storage_node(node.get_id(), force=True) - if ret: - logger.info("Node restart succeeded") + try: + # resetting node + logger.info(f"Restart node {node.get_id()}") + ret = storage_node_ops.restart_storage_node(node.get_id(), force=True) + if ret: + logger.info("Node restart succeeded") + except Exception as e: + logger.error(e) + return False time.sleep(3) node = db.get_storage_node_by_id(task.node_id) diff --git a/simplyblock_core/services/tasks_runner_sync_lvol_del.py b/simplyblock_core/services/tasks_runner_sync_lvol_del.py new file mode 100644 index 000000000..fbf0c1ee4 --- /dev/null +++ b/simplyblock_core/services/tasks_runner_sync_lvol_del.py @@ -0,0 +1,77 @@ +# coding=utf-8 +import time + + +from simplyblock_core import db_controller, utils +from simplyblock_core.models.job_schedule import JobSchedule +from simplyblock_core.models.cluster import Cluster +from simplyblock_core.models.storage_node import StorageNode + +logger = utils.get_logger(__name__) + +# get DB controller +db = db_controller.DBController() + + +logger.info("Starting Tasks runner...") +while True: + + clusters = db.get_clusters() + if not clusters: + logger.error("No clusters found!") + else: + for cl in clusters: + if cl.status == Cluster.STATUS_IN_ACTIVATION: + continue + + tasks = db.get_job_tasks(cl.get_id(), reverse=False) + for task in tasks: + + if task.function_name == JobSchedule.FN_LVOL_SYNC_DEL: + if task.status != JobSchedule.STATUS_DONE: + + # get new task object because it could be changed from cancel task + task = db.get_task_by_id(task.uuid) + + if task.canceled: + task.function_result = "canceled" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + continue + + node = db.get_storage_node_by_id(task.node_id) + + if not node: + task.function_result = "node not found" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + continue + + if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]: + msg = f"Node is {node.status}, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + continue + + if task.status != JobSchedule.STATUS_RUNNING: + task.status = JobSchedule.STATUS_RUNNING + task.write_to_db(db.kv_store) + + lvol_bdev_name = task.function_params["lvol_bdev_name"] + + logger.info(f"Sync delete bdev: {lvol_bdev_name} from node: {node.get_id()}") + ret, err = node.rpc_client().delete_lvol(lvol_bdev_name, del_async=True) + if not ret: + if "code" in err and err["code"] == -19: + logger.error(f"Sync delete completed with error: {err}") + else: + logger.error( + f"Failed to sync delete bdev: {lvol_bdev_name} from node: {node.get_id()}") + + task.function_result = f"bdev {lvol_bdev_name} deleted" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + + time.sleep(3) diff --git a/simplyblock_core/snode_client.py b/simplyblock_core/snode_client.py index 2e8504b08..f51742c68 100644 --- a/simplyblock_core/snode_client.py +++ b/simplyblock_core/snode_client.py @@ -40,8 +40,7 @@ def _request(self, method, path, payload=None): response = self.session.request(method, self.url+path, data=data, timeout=self.timeout, params=params) except Exception as e: - logger.error("Request failed: %s", e) - raise e + raise SNodeClientException(str(e)) logger.debug("Response: status_code: %s, content: %s", response.status_code, response.content) @@ -69,11 +68,15 @@ def _request(self, method, path, payload=None): if ret_code == 422: raise SNodeClientException(f"Request validation failed: '{response.text}'") - logger.error("Unknown http status: %s", ret_code) - return None, None + raise SNodeClientException(f"Unknown http status: {ret_code}") def is_live(self): - return self._request("GET", "/check") + try: + return self._request("GET", "check") + except SNodeClientException: + logger.warning("Failed to call snode/check, trying snode/info") + return self.info() + def info(self): return self._request("GET", "info") @@ -81,7 +84,7 @@ def info(self): def spdk_process_start(self, l_cores, spdk_mem, spdk_image=None, spdk_debug=None, cluster_ip=None, fdb_connection=None, namespace=None, server_ip=None, rpc_port=None, rpc_username=None, rpc_password=None, multi_threading_enabled=False, timeout=0, ssd_pcie=None, - total_mem=None, system_mem=None, cluster_mode=None): + total_mem=None, system_mem=None, cluster_mode=None, cluster_id=None): params = { "cluster_ip": cluster_ip, "server_ip": server_ip, @@ -113,6 +116,8 @@ def spdk_process_start(self, l_cores, spdk_mem, spdk_image=None, spdk_debug=None params["system_mem"] = system_mem if cluster_mode: params["cluster_mode"] = cluster_mode + if cluster_id: + params["cluster_id"] = cluster_id return self._request("POST", "spdk_process_start", params) def join_swarm(self, cluster_ip, join_token, db_connection, cluster_id): @@ -124,8 +129,8 @@ def join_swarm(self, cluster_ip, join_token, db_connection, cluster_id): # "db_connection": db_connection} # return self._request("POST", "join_swarm", params) - def spdk_process_kill(self, rpc_port): - return self._request("GET", "spdk_process_kill", {"rpc_port": rpc_port}) + def spdk_process_kill(self, rpc_port, cluster_id=None): + return self._request("GET", "spdk_process_kill", {"rpc_port": rpc_port, "cluster_id": cluster_id}) def leave_swarm(self): return True @@ -152,8 +157,8 @@ def bind_device_to_spdk(self, device_pci): params = {"device_pci": device_pci} return self._request("POST", "bind_device_to_spdk", params) - def spdk_process_is_up(self, rpc_port): - params = {"rpc_port": rpc_port} + def spdk_process_is_up(self, rpc_port, cluster_id): + params = {"rpc_port": rpc_port, "cluster_id": cluster_id} return self._request("GET", "spdk_process_is_up", params) def get_file_content(self, file_name): diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 3d32dd17a..23e057777 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -27,10 +27,11 @@ from simplyblock_core.models.iface import IFace from simplyblock_core.models.job_schedule import JobSchedule from simplyblock_core.models.lvol_model import LVol -from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice +from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice, RemoteJMDevice from simplyblock_core.models.snapshot import SnapShot from simplyblock_core.models.storage_node import StorageNode from simplyblock_core.models.cluster import Cluster +from simplyblock_core.prom_client import PromClient from simplyblock_core.rpc_client import RPCClient, RPCException from simplyblock_core.snode_client import SNodeClient, SNodeClientException from simplyblock_web import node_utils @@ -100,9 +101,6 @@ def connect_device(name: str, device: NVMeDevice, node: StorageNode, bdev_names: if device.nvmf_multipath: rpc_client.bdev_nvme_set_multipath_policy(bdev_name, "active_active") - # wait 5 seconds after controller attach - time.sleep(5) - if not bdev_name: msg = "Bdev name not returned from controller attach" logger.error(msg) @@ -224,9 +222,9 @@ def _create_jm_stack_on_raid(rpc_client, jm_nvme_bdevs, snode, after_restart): return False for iface in snode.data_nics: - logger.info(f"adding {iface.trtype} listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address)) - ret = rpc_client.listeners_create(subsystem_nqn, iface.trtype, iface.ip4_address, snode.nvmf_port) - ip_list.append(iface.ip4_address) + logger.info(f"adding {iface.trtype} listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address)) + ret = rpc_client.listeners_create(subsystem_nqn, iface.trtype, iface.ip4_address, snode.nvmf_port) + ip_list.append(iface.ip4_address) if len(ip_list) > 1: IP = ",".join(ip_list) @@ -413,8 +411,8 @@ def _create_storage_device_stack(rpc_client, nvme, snode, after_restart): return nvme -def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, jm_percent, partition_size=0): - nbd_device = rpc_client.nbd_start_disk(nvme.nvme_bdev) +def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, jm_percent, partition_size, nbd_index): + nbd_device = rpc_client.nbd_start_disk(nvme.nvme_bdev, f"/dev/nbd{nbd_index}") time.sleep(3) if not nbd_device: logger.error("Failed to start nbd dev") @@ -447,79 +445,84 @@ def _create_device_partitions(rpc_client, nvme, snode, num_partitions_per_dev, j def _prepare_cluster_devices_partitions(snode, devices): db_controller = DBController() - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password) - new_devices = [] - jm_devices = [] - dev_order = get_next_cluster_device_order(db_controller, snode.cluster_id) - bdevs_names = [d['name'] for d in rpc_client.get_bdevs()] + devices_to_partition = [] + thread_list = [] for index, nvme in enumerate(devices): if nvme.status == "not_found": continue - if nvme.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_NEW]: logger.debug(f"Device is skipped: {nvme.get_id()}, status: {nvme.status}") new_devices.append(nvme) continue - if nvme.is_partition: - dev_part = f"{nvme.nvme_bdev[:-2]}p1" - if dev_part in bdevs_names: - if dev_part not in jm_devices: - jm_devices.append(dev_part) - - new_device = _create_storage_device_stack(rpc_client, nvme, snode, after_restart=False) - if not new_device: - logger.error("failed to create dev stack") - return False - new_devices.append(new_device) - if new_device.status == NVMeDevice.STATUS_ONLINE: - device_events.device_create(new_device) - + t = threading.Thread(target=_create_storage_device_stack, args=(snode.rpc_client(), nvme, snode, False,)) + thread_list.append(t) + new_devices.append(nvme) + t.start() else: - # look for partitions - partitioned_devices = _search_for_partitions(rpc_client, nvme) - logger.debug("partitioned_devices") - logger.debug(partitioned_devices) - if len(partitioned_devices) == (1 + snode.num_partitions_per_dev): - logger.info("Partitioned devices found") - else: + devices_to_partition.append(nvme) + partitioned_devices = _search_for_partitions(snode.rpc_client(), nvme) + if len(partitioned_devices) != (1 + snode.num_partitions_per_dev): logger.info(f"Creating partitions for {nvme.nvme_bdev}") - _create_device_partitions(rpc_client, nvme, snode, snode.num_partitions_per_dev, snode.jm_percent, - snode.partition_size) - partitioned_devices = _search_for_partitions(rpc_client, nvme) - if len(partitioned_devices) == (1 + snode.num_partitions_per_dev): - logger.info("Device partitions created") - else: - logger.error("Failed to create partitions") - return False + t = threading.Thread( + target=_create_device_partitions, + args=(snode.rpc_client(), nvme, snode, snode.num_partitions_per_dev, + snode.jm_percent, snode.partition_size, index+1,)) + thread_list.append(t) + t.start() - jm_devices.append(partitioned_devices.pop(0).nvme_bdev) + for thread in thread_list: + thread.join() + thread_list = [] + for nvme in devices_to_partition: + partitioned_devices = _search_for_partitions(snode.rpc_client(), nvme) + if len(partitioned_devices) == (1 + snode.num_partitions_per_dev): + logger.info("Device partitions created") + # remove 1st partition for jm + partitioned_devices.pop(0) for dev in partitioned_devices: - ret = _create_storage_device_stack(rpc_client, dev, snode, after_restart=False) - if not ret: - logger.error("failed to create dev stack") - return False - if dev.status == NVMeDevice.STATUS_ONLINE: - if dev.cluster_device_order < 0: - dev.cluster_device_order = dev_order - dev_order += 1 - device_events.device_create(dev) + t = threading.Thread(target=_create_storage_device_stack, + args=(snode.rpc_client(), dev, snode, False,)) + thread_list.append(t) new_devices.append(dev) + t.start() + else: + logger.error("Failed to create partitions") + return False - snode.nvme_devices = new_devices + for thread in thread_list: + thread.join() + + # assign device order + dev_order = get_next_cluster_device_order(db_controller, snode.cluster_id) + for nvme in new_devices: + if nvme.status == NVMeDevice.STATUS_ONLINE: + if nvme.cluster_device_order < 0: + nvme.cluster_device_order = dev_order + dev_order += 1 + device_events.device_create(nvme) + + # create jm device + jm_devices = [] + bdevs_names = [d['name'] for d in snode.rpc_client().get_bdevs()] + for nvme in new_devices: + if nvme.status == NVMeDevice.STATUS_ONLINE: + dev_part = f"{nvme.nvme_bdev[:-2]}p1" + if dev_part in bdevs_names: + if dev_part not in jm_devices: + jm_devices.append(dev_part) if jm_devices: - jm_device = _create_jm_stack_on_raid(rpc_client, jm_devices, snode, after_restart=False) + jm_device = _create_jm_stack_on_raid(snode.rpc_client(), jm_devices, snode, after_restart=False) if not jm_device: logger.error("Failed to create JM device") return False snode.jm_device = jm_device + snode.nvme_devices = new_devices return True @@ -701,6 +704,8 @@ def _connect_to_remote_devs( allowed_node_statuses.append(StorageNode.STATUS_RESTARTING) allowed_dev_statuses.append(NVMeDevice.STATUS_UNAVAILABLE) + devices_to_connect = [] + connect_threads = [] nodes = db_controller.get_storage_nodes_by_cluster_id(this_node.cluster_id) # connect to remote devs for node_index, node in enumerate(nodes): @@ -715,12 +720,36 @@ def _connect_to_remote_devs( if not dev.alceml_bdev: raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") + devices_to_connect.append(dev) + t = threading.Thread( + target=connect_device, + args=(f"remote_{dev.alceml_bdev}", dev, this_node, node_bdev_names, reattach,)) + connect_threads.append(t) + t.start() - dev.remote_bdev = connect_device( - f"remote_{dev.alceml_bdev}", dev, this_node, - bdev_names=node_bdev_names, reattach=reattach, - ) - remote_devices.append(dev) + for t in connect_threads: + t.join() + + node_bdevs = rpc_client.get_bdevs() + if node_bdevs: + node_bdev_names = [b['name'] for b in node_bdevs] + + for dev in devices_to_connect: + remote_bdev = RemoteDevice() + remote_bdev.uuid = dev.uuid + remote_bdev.alceml_name = dev.alceml_name + remote_bdev.node_id = dev.node_id + remote_bdev.size = dev.size + remote_bdev.status = NVMeDevice.STATUS_ONLINE + remote_bdev.nvmf_multipath = dev.nvmf_multipath + for bdev in node_bdev_names: + if bdev.startswith(f"remote_{dev.alceml_bdev}"): + remote_bdev.remote_bdev = bdev + break + if not remote_bdev.remote_bdev: + logger.error(f"Failed to connect to remote device {dev.alceml_name}") + continue + remote_devices.append(remote_bdev) return remote_devices @@ -759,6 +788,10 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if jm_dev and jm_dev not in remote_devices: remote_devices.append(jm_dev) + logger.debug(f"remote_devices: {remote_devices}") + allowed_node_statuses = [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN, StorageNode.STATUS_RESTARTING] + allowed_dev_statuses = [NVMeDevice.STATUS_ONLINE] + new_devs = [] for jm_dev in remote_devices: if not jm_dev.jm_bdev: @@ -775,14 +808,30 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if not org_dev or org_dev in new_devs or org_dev_node and org_dev_node.get_id() == this_node.get_id(): continue + if org_dev_node is not None and org_dev_node.status not in allowed_node_statuses: + logger.warning(f"Skipping node:{org_dev_node.get_id()} with status: {org_dev_node.status}") + continue + + if org_dev is not None and org_dev.status not in allowed_dev_statuses: + logger.warning(f"Skipping device:{org_dev.get_id()} with status: {org_dev.status}") + continue + + remote_device = RemoteJMDevice() + remote_device.uuid = org_dev.uuid + remote_device.alceml_name = org_dev.alceml_name + remote_device.node_id = org_dev.node_id + remote_device.size = org_dev.size + remote_device.jm_bdev = org_dev.jm_bdev + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.nvmf_multipath = org_dev.nvmf_multipath try: - org_dev.remote_bdev = connect_device( - f"remote_{org_dev.jm_bdev}", org_dev, this_node, - bdev_names=node_bdev_names, reattach=True, + remote_device.remote_bdev = connect_device( + f"remote_{org_dev.jm_bdev}", org_dev, this_node, + bdev_names=node_bdev_names, reattach=True, ) except RuntimeError: logger.error(f'Failed to connect to {org_dev.get_id()}') - new_devs.append(org_dev) + new_devs.append(remote_device) return new_devs @@ -998,7 +1047,7 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list, namespace, mgmt_ip, rpc_port, rpc_user, rpc_pass, multi_threading_enabled=constants.SPDK_PROXY_MULTI_THREADING_ENABLED, timeout=constants.SPDK_PROXY_TIMEOUT, - ssd_pcie=ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode) + ssd_pcie=ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode, cluster_id=cluster_id) time.sleep(5) except Exception as e: @@ -1021,12 +1070,12 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list, logger.debug(f"Data nics ports are: {names}") for nic in names: device = node_info['network_interface'][nic] - base_ifc_cfg={ - 'uuid': str(uuid.uuid4()), - 'if_name': nic, - 'ip4_address': device['ip'], - 'status': device['status'], - 'net_type': device['net_type'],} + base_ifc_cfg = { + 'uuid': str(uuid.uuid4()), + 'if_name': nic, + 'ip4_address': device['ip'], + 'status': device['status'], + 'net_type': device['net_type'], } if fabric_rdma and snode_api.ifc_is_roce(nic): cfg = base_ifc_cfg.copy() cfg['trtype'] = "RDMA" @@ -1262,8 +1311,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list, logger.info("Setting Alcemls QOS weights") ret = rpc_client.alceml_set_qos_weights(qos_controller.get_qos_weights_list(cluster_id)) if not ret: - logger.error("Failed to set Alcemls QOS") - return False + logger.error("Failed to set Alcemls QOS") + return False logger.info("Connecting to remote devices") remote_devices = _connect_to_remote_devs(snode) @@ -1454,7 +1503,7 @@ def remove_storage_node(node_id, force_remove=False, force_migrate=False): if health_controller._check_node_api(snode.mgmt_ip): logger.info("Stopping SPDK container") snode_api = SNodeClient(snode.api_endpoint, timeout=20) - snode_api.spdk_process_kill(snode.rpc_port) + snode_api.spdk_process_kill(snode.rpc_port, snode.cluster_id) snode_api.leave_swarm() pci_address = [] for dev in snode.nvme_devices: @@ -1481,8 +1530,6 @@ def restart_storage_node( spdk_image=None, set_spdk_debug=None, small_bufsize=0, large_bufsize=0, force=False, node_ip=None, reattach_volume=False, clear_data=False, new_ssd_pcie=[], force_lvol_recreate=False): - db_controller = DBController() - kv_store = db_controller.kv_store db_controller = DBController() logger.info("Restarting storage node") @@ -1676,7 +1723,7 @@ def restart_storage_node( snode.l_cores, snode.spdk_mem, snode.spdk_image, spdk_debug, cluster_ip, fdb_connection, snode.namespace, snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, multi_threading_enabled=constants.SPDK_PROXY_MULTI_THREADING_ENABLED, timeout=constants.SPDK_PROXY_TIMEOUT, - ssd_pcie=snode.ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode) + ssd_pcie=snode.ssd_pcie, total_mem=total_mem, system_mem=minimum_sys_memory, cluster_mode=cluster.mode, cluster_id=snode.cluster_id) except Exception as e: logger.error(e) @@ -1900,23 +1947,6 @@ def restart_storage_node( db_dev.health_check = True device_events.device_restarted(db_dev) snode.write_to_db(db_controller.kv_store) - # - # # make other nodes connect to the new devices - # logger.info("Make other nodes connect to the node devices") - # snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id) - # for node in snodes: - # if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE: - # continue - # node.remote_devices = _connect_to_remote_devs(node, force_connect_restarting_nodes=True) - # node.write_to_db(kv_store) - # - # logger.info(f"Sending device status event") - # snode = db_controller.get_storage_node_by_id(snode.get_id()) - # for db_dev in snode.nvme_devices: - # distr_controller.send_dev_status_event(db_dev, db_dev.status) - # - # if snode.jm_device and snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_ONLINE]: - # device_controller.set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE) cluster = db_controller.get_cluster_by_id(snode.cluster_id) if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: @@ -1932,7 +1962,7 @@ def restart_storage_node( except RuntimeError: logger.error('Failed to connect to remote devices') return False - node.write_to_db(kv_store) + node.write_to_db() logger.info("Sending device status event") snode = db_controller.get_storage_node_by_id(snode.get_id()) @@ -1979,8 +2009,7 @@ def restart_storage_node( except RuntimeError: logger.error('Failed to connect to remote devices') return False - node.write_to_db(kv_store) - + node.write_to_db() logger.info("Sending device status event") snode = db_controller.get_storage_node_by_id(snode.get_id()) @@ -2137,28 +2166,13 @@ def list_storage_devices(node_id, is_json): "Health": snode.jm_device.health_check }) - for jm_id in snode.jm_ids: - try: - jm_device = db_controller.get_jm_device_by_id(jm_id) - except KeyError: - continue - - jm_devices.append({ - "UUID": jm_device.uuid, - "Name": jm_device.device_name, - "Size": utils.humanbytes(jm_device.size), - "Status": jm_device.status, - "IO Err": jm_device.io_error, - "Health": jm_device.health_check - }) - - for device in snode.remote_devices: - logger.debug(device) + for remote_device in snode.remote_devices: + logger.debug(remote_device) logger.debug("*" * 20) - name = device.alceml_name - status = device.status - if device.remote_bdev: - name = device.remote_bdev + name = remote_device.alceml_name + status = remote_device.status + if remote_device.remote_bdev: + name = remote_device.remote_bdev try: org_dev = db_controller.get_storage_device_by_id(device.get_id()) status = org_dev.status @@ -2166,22 +2180,22 @@ def list_storage_devices(node_id, is_json): pass remote_devices.append({ - "UUID": device.uuid, + "UUID": remote_device.uuid, "Name": name, - "Size": utils.humanbytes(device.size), - "Node ID": device.node_id, + "Size": utils.humanbytes(remote_device.size), + "Node ID": remote_device.node_id, "Status": status, }) - for device in snode.remote_jm_devices: - logger.debug(device) + for remote_jm_device in snode.remote_jm_devices: + logger.debug(remote_jm_device) logger.debug("*" * 20) remote_devices.append({ - "UUID": device.uuid, - "Name": device.remote_bdev, - "Size": utils.humanbytes(device.size), - "Node ID": device.node_id, - "Status": device.status, + "UUID": remote_jm_device.uuid, + "Name": remote_jm_device.remote_bdev, + "Size": utils.humanbytes(remote_jm_device.size), + "Node ID": remote_jm_device.node_id, + "Status": remote_jm_device.status, }) data: dict[str, List[Any]] = { @@ -2250,7 +2264,7 @@ def shutdown_storage_node(node_id, force=False): logger.info("Stopping SPDK") try: - SNodeClient(snode.api_endpoint, timeout=10, retry=10).spdk_process_kill(snode.rpc_port) + SNodeClient(snode.api_endpoint, timeout=10, retry=10).spdk_process_kill(snode.rpc_port, snode.cluster_id) except SNodeClientException: logger.error('Failed to kill SPDK') return False @@ -2358,34 +2372,32 @@ def suspend_storage_node(node_id, force=False): if snode.lvstore_stack_secondary_1: nodes = db_controller.get_primary_storage_nodes_by_secondary_node_id(node_id) if nodes: - for node in nodes: + for node in nodes: try: fw_api.firewall_set_port( node.hublvol.nvmf_port, port_type, "block", snode.rpc_port, is_reject=True) fw_api.firewall_set_port( node.lvol_subsys_port, port_type, "block", snode.rpc_port, is_reject=True) + time.sleep(0.5) + rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False) + rpc_client.bdev_distrib_force_to_non_leader(node.jm_vuid) except Exception as e: logger.error(e) return False - time.sleep(0.5) - rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False) - rpc_client.bdev_distrib_force_to_non_leader(node.jm_vuid) try: fw_api.firewall_set_port( snode.hublvol.nvmf_port, port_type, "block", snode.rpc_port, is_reject=True) fw_api.firewall_set_port( snode.lvol_subsys_port, port_type, "block", snode.rpc_port, is_reject=True) + time.sleep(0.5) + rpc_client.bdev_lvol_set_leader(snode.lvstore, leader=False) + rpc_client.bdev_distrib_force_to_non_leader(snode.jm_vuid) + time.sleep(1) except Exception as e: logger.error(e) return False - time.sleep(0.5) - rpc_client.bdev_lvol_set_leader(snode.lvstore, leader=False) - rpc_client.bdev_distrib_force_to_non_leader(snode.jm_vuid) - time.sleep(1) - - logger.info("Done") return True @@ -2429,7 +2441,7 @@ def resume_storage_node(node_id): return False if snode.enable_ha_jm: snode.remote_jm_devices = _connect_to_remote_jm_devs(snode) - snode.write_to_db(db_controller.kv_store) + snode.write_to_db() fw_api = FirewallClient(snode, timeout=20, retry=1) port_type = "tcp" @@ -2437,7 +2449,7 @@ def resume_storage_node(node_id): port_type = "udp" nodes = db_controller.get_primary_storage_nodes_by_secondary_node_id(node_id) if nodes: - for node in nodes: + for node in nodes: try: fw_api.firewall_set_port( node.lvol_subsys_port, port_type, "allow", snode.rpc_port) @@ -2465,20 +2477,11 @@ def resume_storage_node(node_id): def get_node_capacity(node_id, history, records_count=20, parse_sizes=True): db_controller = DBController() try: - this_node = db_controller.get_storage_node_by_id(node_id) + node = db_controller.get_storage_node_by_id(node_id) except KeyError: logger.error("Storage node Not found") return - if history: - records_number = utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 - - records = db_controller.get_node_capacity(this_node, records_number) cap_stats_keys = [ "date", "size_total", @@ -2488,6 +2491,8 @@ def get_node_capacity(node_id, history, records_count=20, parse_sizes=True): "size_util", "size_prov_util", ] + prom_client = PromClient(node.cluster_id) + records = prom_client.get_node_metrics(node_id, cap_stats_keys, history) new_records = utils.process_records(records, records_count, keys=cap_stats_keys) if not parse_sizes: @@ -2514,17 +2519,6 @@ def get_node_iostats_history(node_id, history, records_count=20, parse_sizes=Tru except KeyError: logger.error("node not found") return False - - if history: - records_number = utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 - - records = db_controller.get_node_stats(node, records_number) - io_stats_keys = [ "date", "read_bytes", @@ -2562,6 +2556,8 @@ def get_node_iostats_history(node_id, history, records_count=20, parse_sizes=Tru "write_latency_ticks", ] ) + prom_client = PromClient(node.cluster_id) + records = prom_client.get_node_metrics(node_id, io_stats_keys, history) # combine records new_records = utils.process_records(records, records_count, keys=io_stats_keys) @@ -3214,7 +3210,7 @@ def recreate_lvstore(snode, force=False): def _kill_app(): storage_events.snode_restart_failed(snode) snode_api = SNodeClient(snode.api_endpoint, timeout=5, retry=5) - snode_api.spdk_process_kill(snode.rpc_port) + snode_api.spdk_process_kill(snode.rpc_port, snode.cluster_id) set_node_status(snode.get_id(), StorageNode.STATUS_OFFLINE) # If LVol Store recovery failed then stop spdk process @@ -3388,7 +3384,7 @@ def get_sorted_ha_jms(current_node): continue mgmt_ips.append(jm_dev_to_mgmt_ip[jm_id]) out.append(jm_id) - return out[:constants.HA_JM_COUNT-1] + return out[:current_node.ha_jm_count - 1] def get_node_jm_names(current_node, remote_node=None): @@ -3419,7 +3415,7 @@ def get_node_jm_names(current_node, remote_node=None): if jm_dev.get_id() == jm_id: jm_list.append(jm_dev.remote_bdev) break - return jm_list[:constants.HA_JM_COUNT] + return jm_list[:current_node.ha_jm_count] def get_secondary_nodes(current_node): @@ -3604,6 +3600,15 @@ def create_lvstore(snode, ndcs, npcs, distr_bs, distr_chunk_bs, page_size_in_blo def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): + def _create_distr(snode, name, params): + try: + rpc_client.bdev_distrib_create(**params) + except Exception: + logger.error("Failed to create bdev distrib") + ret = distr_controller.send_cluster_map_to_distr(snode, name) + if not ret: + logger.error("Failed to send cluster map") + rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) db_controller = DBController() cluster = db_controller.get_cluster_by_id(snode.cluster_id) @@ -3620,11 +3625,11 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): else: node_bdev_names = [] + thread_list = [] for bdev in stack: type = bdev['type'] name = bdev['name'] params = bdev['params'] - if name in node_bdev_names: continue @@ -3640,23 +3645,21 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): snode.distrib_cpu_index = (snode.distrib_cpu_index + 1) % len(snode.distrib_cpu_cores) params['full_page_unmap'] = cluster.full_page_unmap - ret = rpc_client.bdev_distrib_create(**params) - if ret: - ret = distr_controller.send_cluster_map_to_distr(snode, name) - if not ret: - return False, "Failed to send cluster map" - # time.sleep(1) + t = threading.Thread(target=_create_distr, args=(snode, name, params,)) + thread_list.append(t) + t.start() + ret = True elif type == "bdev_lvstore" and lvstore_stack and not primary_node: - ret = rpc_client.create_lvstore(**params) - # if ret and snode.jm_vuid > 0: - # rpc_client.bdev_lvol_set_lvs_ops(snode.lvstore, snode.jm_vuid, snode.lvol_subsys_port) + ret = rpc_client.create_lvstore(**params) elif type == "bdev_ptnonexcl": ret = rpc_client.bdev_PT_NoExcl_create(**params) elif type == "bdev_raid": - + if thread_list: + for t in thread_list: + t.join() distribs_list = bdev["distribs_list"] strip_size_kb = params["strip_size_kb"] ret = rpc_client.bdev_raid_create(name, distribs_list, strip_size_kb=strip_size_kb) @@ -3674,6 +3677,9 @@ def _create_bdev_stack(snode, lvstore_stack=None, primary_node=None): _remove_bdev_stack(created_bdevs[::-1], rpc_client) return False, f"Failed to create BDev: {name}" + if thread_list: + for t in thread_list: + t.join() return True, None diff --git a/simplyblock_core/utils/__init__.py b/simplyblock_core/utils/__init__.py index 941414708..4ac48a8e3 100644 --- a/simplyblock_core/utils/__init__.py +++ b/simplyblock_core/utils/__init__.py @@ -725,6 +725,12 @@ def convert_size(size: Union[int, str], unit: str, round_up: bool = False) -> in raw = size / (base ** exponent) return math.ceil(raw) if round_up else int(raw) +def first_six_chars(s: str) -> str: + """ + Returns the first six characters of a given string. + If the string is shorter than six characters, returns the entire string. + """ + return s[:6] def nearest_upper_power_of_2(n): # Check if n is already a power of 2 @@ -2031,17 +2037,47 @@ def patch_prometheus_configmap(username: str, password: str): load_kube_config_with_fallback() v1 = client.CoreV1Api() - cm = v1.read_namespaced_config_map(name="sbcli-simplyblock-prometheus-config", namespace=constants.K8S_NAMESPACE) - prometheus_yml = cm.data.get("prometheus.yml", "") + try: + cm = v1.read_namespaced_config_map( + name="sbcli-simplyblock-prometheus-config", + namespace=constants.K8S_NAMESPACE + ) + except client.exceptions.ApiException as e: + logger.error(f"Failed to read ConfigMap: {e}") + return False - prometheus_yml = re.sub(r"username:*", f"username: '{username}'", prometheus_yml) - prometheus_yml = re.sub(r"password:*", f"password: '{password}'", prometheus_yml) + try: + prometheus_yml = cm.data.get("prometheus.yml", "") + if not prometheus_yml: + logger.error("prometheus.yml key not found in ConfigMap.") + return False + + try: + prometheus_yml = re.sub(r"username:.*", f"username: '{username}'", prometheus_yml) + prometheus_yml = re.sub(r"password:.*", f"password: '{password}'", prometheus_yml) + except re.error as e: + logger.error(f"Regex error while patching Prometheus YAML: {e}") + return False - patch_body = { - "data": { - "prometheus.yml": prometheus_yml + patch_body = { + "data": { + "prometheus.yml": prometheus_yml + } } - } - v1.patch_namespaced_config_map(name="sbcli-simplyblock-prometheus-config", namespace=constants.K8S_NAMESPACE, body=patch_body) - logger.info("Patched sbcli-simplyblock-prometheus-config ConfigMap with new credentials.") + v1.patch_namespaced_config_map( + name="sbcli-simplyblock-prometheus-config", + namespace=constants.K8S_NAMESPACE, + body=patch_body + ) + + logger.info("Patched sbcli-simplyblock-prometheus-config ConfigMap with new credentials.") + return True + + except client.exceptions.ApiException as e: + logger.error(f"Failed to patch ConfigMap: {e}") + return False + + except Exception as e: + logger.error(f"Unexpected error while patching ConfigMap: {e}") + return False diff --git a/simplyblock_web/api/internal/storage_node/docker.py b/simplyblock_web/api/internal/storage_node/docker.py index 8e18fc276..d1ee4f9f0 100644 --- a/simplyblock_web/api/internal/storage_node/docker.py +++ b/simplyblock_web/api/internal/storage_node/docker.py @@ -142,6 +142,7 @@ class SPDKParams(BaseModel): spdk_image: Optional[str] = Field(constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE) cluster_ip: Optional[str] = Field(default=None, pattern=utils.IP_PATTERN) cluster_mode: str + cluster_id: str @api.post('/spdk_process_start', responses={ diff --git a/simplyblock_web/api/internal/storage_node/kubernetes.py b/simplyblock_web/api/internal/storage_node/kubernetes.py index be3193138..d5e98eb1d 100644 --- a/simplyblock_web/api/internal/storage_node/kubernetes.py +++ b/simplyblock_web/api/internal/storage_node/kubernetes.py @@ -268,6 +268,7 @@ class SPDKParams(BaseModel): spdk_image: str = Field(constants.SIMPLY_BLOCK_SPDK_ULTRA_IMAGE) cluster_ip: str = Field(pattern=utils.IP_PATTERN) cluster_mode: str + cluster_id: str @api.post('/spdk_process_start', responses={ @@ -286,9 +287,10 @@ def spdk_process_start(body: SPDKParams): total_mem_mib = core_utils.convert_size(core_utils.parse_size(body.total_mem), 'MB') if body.total_mem else "" - if _is_pod_up(body.rpc_port) or _is_pod_present(body.rpc_port): + first_six_cluster_id = core_utils.first_six_chars(body.cluster_id) + if _is_pod_up(body.rpc_port, first_six_cluster_id) or _is_pod_present(body.rpc_port, first_six_cluster_id): logger.info("SPDK pod found, removing...") - query = utils.RPCPortParams(rpc_port=body.rpc_port) + query = utils.RPCPortParams(rpc_port=body.rpc_port, cluster_id=body.cluster_id) spdk_process_kill(query) node_prepration_job_name = "snode-spdk-job-" @@ -351,6 +353,7 @@ def spdk_process_start(body: SPDKParams): 'SIMPLYBLOCK_DOCKER_IMAGE': constants.SIMPLY_BLOCK_DOCKER_IMAGE, 'GRAYLOG_SERVER_IP': body.cluster_ip, 'MODE': body.cluster_mode, + 'CLUSTER_ID': first_six_cluster_id, 'SSD_PCIE': ssd_pcie_params, 'PCI_ALLOWED': ssd_pcie_list, 'TOTAL_HP': total_mem_mib @@ -420,9 +423,35 @@ def spdk_process_start(body: SPDKParams): logger.info(f"Job deleted: '{core_resp.metadata.name}' in namespace '{namespace}") elif core_isolate and openshift: + batch_v1 = core_utils.get_k8s_batch_client() + try: + batch_v1.read_namespaced_job( + name=node_prepration_core_name, + namespace=namespace + ) + logger.info(f"Existing Job '{node_prepration_core_name}' found — deleting it first...") + + batch_v1.delete_namespaced_job( + name=node_prepration_core_name, + namespace=namespace, + body=V1DeleteOptions( + propagation_policy='Foreground', + grace_period_seconds=0 + ) + ) + + node_utils_k8s.wait_for_job_deletion(node_prepration_core_name, namespace) + + logger.info(f"Old Job '{node_prepration_core_name}' fully deleted.") + + except ApiException as e: + if e.status == 404: + logger.info(f"No pre-existing Job '{node_prepration_core_name}' found. Proceeding.") + else: + raise + core_template = env.get_template('oc_storage_core_isolation.yaml.j2') core_yaml = yaml.safe_load(core_template.render(values)) - batch_v1 = core_utils.get_k8s_batch_client() core_resp = batch_v1.create_namespaced_job(namespace=namespace, body=core_yaml) msg = f"Job created: '{core_resp.metadata.name}' in namespace '{namespace}" logger.info(msg) @@ -463,7 +492,11 @@ def spdk_process_kill(query: utils.RPCPortParams): k8s_core_v1 = core_utils.get_k8s_core_client() try: namespace = node_utils_k8s.get_namespace() - pod_name = f"snode-spdk-pod-{query.rpc_port}" + if not query.cluster_id: + return utils.get_response(False, "param required: cluster_id") + + first_six_cluster_id = core_utils.first_six_chars(query.cluster_id) + pod_name = f"snode-spdk-pod-{query.rpc_port}-{first_six_cluster_id}" resp = k8s_core_v1.delete_namespaced_pod(pod_name, namespace) retries = 10 while retries > 0: @@ -486,9 +519,9 @@ def spdk_process_kill(query: utils.RPCPortParams): return utils.get_response(True) -def _is_pod_up(rpc_port): +def _is_pod_up(rpc_port, cluster_id): k8s_core_v1 = core_utils.get_k8s_core_client() - pod_name = f"snode-spdk-pod-{rpc_port}" + pod_name = f"snode-spdk-pod-{rpc_port}-{cluster_id}" try: resp = k8s_core_v1.list_namespaced_pod(node_utils_k8s.get_namespace()) for pod in resp.items: @@ -502,9 +535,9 @@ def _is_pod_up(rpc_port): return False return False -def _is_pod_present(rpc_port): +def _is_pod_present(rpc_port, cluster_id): k8s_core_v1 = core_utils.get_k8s_core_client() - pod_name = f"snode-spdk-pod-{rpc_port}" + pod_name = f"snode-spdk-pod-{rpc_port}-{cluster_id}" try: resp = k8s_core_v1.list_namespaced_pod(node_utils_k8s.get_namespace()) for pod in resp.items: @@ -525,7 +558,11 @@ def _is_pod_present(rpc_port): })}}}, }) def spdk_process_is_up(query: utils.RPCPortParams): - if _is_pod_up(query.rpc_port): + if not query.cluster_id: + return utils.get_response(False, "param required: cluster_id") + + first_six_cluster_id = core_utils.first_six_chars(query.cluster_id) + if _is_pod_up(query.rpc_port, first_six_cluster_id): return utils.get_response(True) else: return utils.get_response(False, "SPDK container is not running") diff --git a/simplyblock_web/api/v1/pool.py b/simplyblock_web/api/v1/pool.py index a24a9e9b7..3b4fe5f72 100644 --- a/simplyblock_web/api/v1/pool.py +++ b/simplyblock_web/api/v1/pool.py @@ -184,21 +184,10 @@ def pool_iostats(uuid, history): except KeyError: return utils.get_response_error(f"Pool not found: {uuid}", 404) - if history: - records_number = core_utils.parse_history_param(history) - if not records_number: - logger.error(f"Error parsing history string: {history}") - return False - else: - records_number = 20 - - out = db.get_pool_stats(pool, records_number) - records_count = 20 - new_records = core_utils.process_records(out, records_count) - + data = pool_controller.get_io_stats(uuid, history) ret = { "object_data": pool.get_clean_dict(), - "stats": new_records or [] + "stats": data or [] } return utils.get_response(ret) @@ -207,21 +196,13 @@ def pool_iostats(uuid, history): @bp.route('/pool/iostats-all-lvols/', methods=['GET']) def lvol_iostats(pool_uuid): try: - db.get_pool_by_id(pool_uuid) + pool = db.get_pool_by_id(pool_uuid) except KeyError: return utils.get_response_error(f"Pool not found: {pool_uuid}", 404) - ret = [] - for lvol in db.get_lvols_by_pool_id(pool_uuid): - - records_list = db.get_lvol_stats(lvol, limit=1) - - if records_list: - data = records_list[0].get_clean_dict() - else: - data = {} - ret.append({ - "object_data": lvol.get_clean_dict(), - "stats": data - }) + data = pool_controller.get_capacity(pool_uuid) + ret = { + "object_data": pool.get_clean_dict(), + "stats": data or [] + } return utils.get_response(ret) diff --git a/simplyblock_web/api/v2/pool.py b/simplyblock_web/api/v2/pool.py index c779f70ca..d34ce0b2a 100644 --- a/simplyblock_web/api/v2/pool.py +++ b/simplyblock_web/api/v2/pool.py @@ -122,5 +122,5 @@ def update(cluster: Cluster, pool: StoragePool, parameters: UpdatableStoragePool @instance_api.get('/iostats', name='clusters:storage-pools:iostats') def iostats(cluster: Cluster, pool: StoragePool, limit: int = 20): - records = db.get_pool_stats(pool, limit) - return core_utils.process_records(records, 20) + data = pool_controller.get_io_stats(pool.get_id(), history="") + return core_utils.process_records(data, 20) diff --git a/simplyblock_web/node_utils_k8s.py b/simplyblock_web/node_utils_k8s.py index 4626a89c9..b1440744d 100644 --- a/simplyblock_web/node_utils_k8s.py +++ b/simplyblock_web/node_utils_k8s.py @@ -5,6 +5,7 @@ import time from simplyblock_core.utils import get_k8s_batch_client +from kubernetes.client import ApiException node_name = os.environ.get("HOSTNAME") @@ -23,7 +24,7 @@ def get_namespace(): return out return default_namespace -def wait_for_job_completion(job_name, namespace, timeout=60): +def wait_for_job_completion(job_name, namespace, timeout=180): batch_v1 = get_k8s_batch_client() for _ in range(timeout): job = batch_v1.read_namespaced_job(job_name, namespace) @@ -33,3 +34,19 @@ def wait_for_job_completion(job_name, namespace, timeout=60): raise RuntimeError(f"Job '{job_name}' failed") time.sleep(3) raise TimeoutError(f"Timeout waiting for Job '{job_name}' to complete") + +def wait_for_job_deletion(job_name, namespace, timeout=60): + batch_v1 = get_k8s_batch_client() + + for _ in range(timeout): + try: + batch_v1.read_namespaced_job(job_name, namespace) + except ApiException as e: + if e.status == 404: + return True + else: + raise + + time.sleep(2) + + raise TimeoutError(f"Timeout waiting for Job '{job_name}' to be deleted") diff --git a/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2 b/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2 index 734d9c59e..74f66721d 100644 --- a/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2 +++ b/simplyblock_web/templates/oc_storage_core_isolation.yaml.j2 @@ -34,9 +34,18 @@ spec: - | set -e + MARKER="/var/simplyblock/.cpu_isolation_applied" + echo "--- Installing jq ---" apk add --no-cache jq + echo "--- Checking if node was already configured ---" + + if [[ -f "$MARKER" ]]; then + echo "[INFO] Node already configured. Skipping sleep and exiting..." + exit 0 + fi + echo "--- Reading isolated cores from config ---" CONFIG_FILE="/var/simplyblock/sn_config_file" @@ -105,4 +114,8 @@ spec: echo "[INFO] Init setup and CPU isolation complete." - echo "--- Init setup complete ---" + echo "[INFO] Marking node as configured." + touch "$MARKER" + + echo "[INFO] Node is rebooting. Sleeping indefinitely to stop pipeline..." + sleep infinity diff --git a/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 b/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 index f10478c75..e49aca2e2 100644 --- a/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 +++ b/simplyblock_web/templates/storage_deploy_spdk.yaml.j2 @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - name: snode-spdk-pod-{{ RPC_PORT }} + name: snode-spdk-pod-{{ RPC_PORT }}-{{ CLUSTER_ID }} namespace: {{ NAMESPACE }} labels: app: spdk-app-{{ RPC_PORT }} diff --git a/simplyblock_web/utils.py b/simplyblock_web/utils.py index b0d1795df..a610cd177 100644 --- a/simplyblock_web/utils.py +++ b/simplyblock_web/utils.py @@ -149,6 +149,7 @@ def error_handler(exception: Exception): class RPCPortParams(BaseModel): rpc_port: int = Field(constants.RPC_HTTP_PROXY_PORT, ge=0, le=65536) + cluster_id: Optional[str] class DeviceParams(BaseModel):