diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index cb5317fafe6..2fe0f75cc8a 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -19,15 +19,16 @@ #include "chk_internal.h" struct chk_pool_bundle { - d_list_t *cpb_head; - uuid_t cpb_uuid; - uint32_t *cpb_shard_nr; - d_rank_t cpb_rank; - struct chk_instance *cpb_ins; + d_list_t *cpb_head; + uuid_t cpb_uuid; + uint32_t *cpb_shard_nr; + d_rank_t cpb_rank; + bool cpd_has_dending; + struct chk_instance *cpb_ins; /* Pointer to the pool bookmark. */ - struct chk_bookmark *cpb_bk; - void *cpb_data; - chk_pool_free_data_t cpb_free_cb; + struct chk_bookmark *cpb_bk; + void *cpb_data; + chk_pool_free_data_t cpb_free_cb; }; static int @@ -84,6 +85,9 @@ chk_pool_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, memcpy(&cpr->cpr_bk, cpb->cpb_bk, sizeof(cpr->cpr_bk)); cpr->cpr_ins = cpb->cpb_ins; + if (cpb->cpd_has_dending) + cpr->cpr_has_pending = 1; + rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr); d_list_add_tail(&cpr->cpr_link, cpb->cpb_head); @@ -162,6 +166,9 @@ chk_pool_update(struct btr_instance *tins, struct btr_record *rec, cps->cps_data = cpb->cpb_data; cps->cps_free_cb = cpb->cpb_free_cb; + if (cpb->cpd_has_dending) + cpr->cpr_has_pending = 1; + d_list_add_tail(&cps->cps_link, &cpr->cpr_shard_list); cpr->cpr_shard_nr++; if (cpb->cpb_shard_nr != NULL) @@ -185,7 +192,6 @@ btr_ops_t chk_pool_ops = { struct chk_pending_bundle { struct chk_instance *cpb_ins; d_list_t *cpb_pool_head; - d_list_t *cpb_rank_head; uuid_t cpb_uuid; d_rank_t cpb_rank; uint32_t cpb_class; @@ -247,11 +253,6 @@ chk_pending_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, d_list_add_tail(&cpr->cpr_ins_link, &cpb->cpb_ins->ci_pending_list); - if (cpb->cpb_rank_head != NULL) - d_list_add_tail(&cpr->cpr_rank_link, cpb->cpb_rank_head); - else - D_INIT_LIST_HEAD(&cpr->cpr_rank_link); - rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr); d_list_add_tail(&cpr->cpr_pool_link, cpb->cpb_pool_head); @@ -279,7 +280,6 @@ chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args) rec->rec_off = UMOFF_NULL; d_list_del_init(&cpr->cpr_pool_link); - d_list_del_init(&cpr->cpr_rank_link); d_list_del_init(&cpr->cpr_ins_link); if (val_iov != NULL) { @@ -617,8 +617,8 @@ chk_pool_start_one(struct chk_instance *ins, uuid_t uuid, uint64_t gen) } cbk.cb_gen = gen; - rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, - dss_self_rank(), &cbk, ins, NULL, NULL, NULL, NULL); + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, dss_self_rank(), false, + &cbk, ins, NULL, NULL, NULL, NULL); out: return rc; @@ -702,8 +702,8 @@ chk_pools_load_list(struct chk_instance *ins, uint64_t gen, uint32_t flags, * persistently sometime later. */ cbk.cb_gen = gen; - rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, pools[i], - myrank, &cbk, ins, NULL, NULL, NULL, NULL); + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, pools[i], myrank, + false, &cbk, ins, NULL, NULL, NULL, NULL); if (rc != 0) break; @@ -765,8 +765,8 @@ chk_pools_load_from_db(struct sys_db *db, char *table, d_iov_t *key, void *args, * persistently sometime later. */ cbk.cb_gen = ctpa->ctpa_gen; - rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, - dss_self_rank(), &cbk, ins, NULL, NULL, NULL, NULL); + rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, dss_self_rank(), false, + &cbk, ins, NULL, NULL, NULL, NULL); if (rc == 0 && ctpa->ctpa_phase > cbk.cb_phase) ctpa->ctpa_phase = cbk.cb_phase; @@ -873,10 +873,9 @@ chk_pool_handle_notify(struct chk_instance *ins, struct chk_iv *iv) } int -chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank, - struct chk_bookmark *bk, struct chk_instance *ins, - uint32_t *shard_nr, void *data, chk_pool_free_data_t free_cb, - struct chk_pool_rec **cpr) +chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank, bool has_pending, + struct chk_bookmark *bk, struct chk_instance *ins, uint32_t *shard_nr, + void *data, chk_pool_free_data_t free_cb, struct chk_pool_rec **cpr) { struct chk_pool_bundle rbund; d_iov_t kiov; @@ -887,11 +886,12 @@ chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank rbund.cpb_head = head; rbund.cpb_shard_nr = shard_nr; uuid_copy(rbund.cpb_uuid, uuid); - rbund.cpb_rank = rank; - rbund.cpb_bk = bk; - rbund.cpb_ins = ins; - rbund.cpb_data = data; - rbund.cpb_free_cb = free_cb; + rbund.cpb_rank = rank; + rbund.cpd_has_dending = has_pending; + rbund.cpb_bk = bk; + rbund.cpb_ins = ins; + rbund.cpb_data = data; + rbund.cpb_free_cb = free_cb; d_iov_set(&riov, &rbund, sizeof(rbund)); d_iov_set(&kiov, uuid, sizeof(uuid_t)); @@ -931,30 +931,9 @@ chk_pool_shard_cleanup(struct chk_instance *ins) } } -int -chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr) -{ - d_iov_t kiov; - d_iov_t riov; - int rc; - - d_iov_set(&riov, NULL, 0); - d_iov_set(&kiov, &seq, sizeof(seq)); - - ABT_rwlock_rdlock(ins->ci_abt_lock); - rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov); - ABT_rwlock_unlock(ins->ci_abt_lock); - if (rc == 0) - *cpr = (struct chk_pending_rec *)riov.iov_buf; - else - *cpr = NULL; - - return rc; -} - -int -chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, - uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, +static int +chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, uuid_t uuid, uint64_t seq, + uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, struct chk_pending_rec **cpr) { struct chk_pending_bundle rbund; @@ -967,7 +946,6 @@ chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_he uuid_copy(rbund.cpb_uuid, uuid); rbund.cpb_pool_head = pool_head; - rbund.cpb_rank_head = rank_head; rbund.cpb_ins = ins; rbund.cpb_seq = seq; rbund.cpb_rank = rank; @@ -1060,6 +1038,163 @@ chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr) } int +chk_report(struct chk_instance *ins, struct chk_report_unit *cru, uint64_t *seq, int *decision) +{ + struct chk_pending_rec *cpr = NULL; + struct chk_pool_rec *pool = NULL; + char uuid_str[DAOS_UUID_STR_SIZE]; + d_iov_t kiov; + d_iov_t riov; + int rc; + + CHK_IS_READY(ins); + + if (cru->cru_result == 0 && ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) + cru->cru_result = CHK__CHECK_RESULT__DRY_RUN; + + if (*seq == 0) { +new_seq: + *seq = chk_report_seq_gen(ins); + } + + D_INFO("Report on %u (%s) with seq " DF_X64 " class %u, action %u, %s, result %d\n", + cru->cru_rank, ins->ci_is_leader ? "leader" : "engine", *seq, cru->cru_cla, + cru->cru_act, cru->cru_msg, cru->cru_result); + + if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { + if (cru->cru_pool == NULL) + D_GOTO(log, rc = -DER_INVAL); + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t)); + rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); + if (rc != 0) + goto log; + + pool = (struct chk_pool_rec *)riov.iov_buf; + + rc = chk_pending_add(ins, &pool->cpr_pending_list, *cru->cru_pool, *seq, + cru->cru_rank, cru->cru_cla, cru->cru_option_nr, + cru->cru_options, &cpr); + if (unlikely(rc == -DER_AGAIN)) + goto new_seq; + + if (rc != 0) + goto log; + } + + rc = chk_report_upcall(cru->cru_gen, *seq, cru->cru_cla, cru->cru_act, cru->cru_result, + cru->cru_rank, cru->cru_target, cru->cru_pool, cru->cru_pool_label, + cru->cru_cont, cru->cru_cont_label, cru->cru_obj, cru->cru_dkey, + cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options, + cru->cru_detail_nr, cru->cru_details); + /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ + if (rc == 0 && pool != NULL && + likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) { + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; + chk_uuid_unparse(ins, *cru->cru_pool, uuid_str); + rc = chk_bk_update_pool(&pool->cpr_bk, uuid_str); + } + +log: + if (rc != 0) { + D_ERROR("Failed to handle report from rank %u (%s) with seq " DF_X64 ", class %u, " + "action %u, handle_rc %d, report_rc %d\n", + cru->cru_rank, ins->ci_is_leader ? "leader" : "engine", *seq, cru->cru_cla, + cru->cru_act, cru->cru_result, rc); + goto out; + } + + if (decision == NULL || cpr == NULL) + goto out; + + D_ASSERT(cpr->cpr_busy); + + D_INFO("Need interaction for class %u with seq " DF_X64 "\n", cru->cru_cla, *seq); + + ABT_mutex_lock(cpr->cpr_mutex); + +again: + if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { + *decision = cpr->cpr_action; + ABT_mutex_unlock(cpr->cpr_mutex); + goto out; + } + + if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { + rc = 1; + ABT_mutex_unlock(cpr->cpr_mutex); + goto out; + } + + ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex); + + goto again; + +out: + if ((rc != 0 || decision != NULL) && cpr != NULL) + chk_pending_destroy(ins, cpr); + + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) { + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + chk_uuid_unparse(ins, *cru->cru_pool, uuid_str); + chk_bk_update_pool(&pool->cpr_bk, uuid_str); + } + + return rc; +} + +int +chk_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) +{ + struct chk_pending_rec *cpr = NULL; + int rc; + + CHK_IS_READY(ins); + + rc = chk_pending_del(ins, seq, &cpr); + if (rc == 0) { + /* The cpr will be destroyed by the waiter via chk_engine_report(). */ + D_ASSERT(cpr->cpr_busy); + + ABT_mutex_lock(cpr->cpr_mutex); + /* + * It is the control plane's duty to guarantee that the decision is a valid + * action from the report options. Otherwise, related inconsistency will be + * ignored. + */ + cpr->cpr_action = act; + ABT_cond_broadcast(cpr->cpr_cond); + ABT_mutex_unlock(cpr->cpr_mutex); + } + + return rc; +} + +int +chk_act(uint64_t seq, uint32_t act) +{ + int rc = -DER_INVAL; + + if (likely(act != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) { + if (chk_report_seq_leader(seq)) + rc = chk_leader_act(seq, act); + else + rc = chk_engine_act(seq, act); + } + + D_CDEBUG(rc != 0 && rc != -DER_NONEXIST && rc != -DER_NO_HDL, DLOG_ERR, DLOG_INFO, + "CHK repair on rank %u, act %u, seq " DF_X64 ": " DF_RC "\n", dss_self_rank(), act, + seq, DP_RC(rc)); + + if (rc == -DER_NONEXIST || rc == -DER_NO_HDL) + rc = 0; + + return rc; +} + +static int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop) { int changed; @@ -1080,6 +1215,52 @@ chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_p return changed; } +int +chk_set_policy(struct chk_instance *ins, uint64_t gen, uint32_t policy_nr, + struct chk_policy *policies) +{ + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_property *prop = &ins->ci_prop; + struct chk_pending_rec *pending; + struct chk_pending_rec *tmp; + int rc; + + CHK_IS_READY(ins); + + /* Do nothing if no check instance is running. */ + if ((ins->ci_is_leader && cbk->cb_magic != CHK_BK_MAGIC_LEADER) || + (!ins->ci_is_leader && cbk->cb_magic != CHK_BK_MAGIC_ENGINE) || + cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING || cbk->cb_gen != gen) + D_GOTO(out, rc = -DER_NOTAPPLICABLE); + + rc = chk_policy_refresh(policy_nr, policies, prop); + if (rc <= 0) + goto out; + + if (ins->ci_is_leader) { + rc = chk_set_policy_remote(ins->ci_ranks, cbk->cb_gen, policy_nr, policies); + if (rc != 0) + goto out; + } + + rc = chk_prop_update(prop, NULL); + if (rc != 0) + goto out; + + d_list_for_each_entry_safe(pending, tmp, &ins->ci_pending_list, cpr_ins_link) { + if (chk_is_valid_action(pending, ins->ci_prop.cp_policies[pending->cpr_class])) { + d_list_del(&pending->cpr_ins_link); + d_list_add_tail(&pending->cpr_ins_link, &ins->ci_interaction_filter_list); + } + } + +out: + DL_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, rc, "set policy on rank %d (%s) with gen " DF_U64, + dss_self_rank(), ins->ci_is_leader ? "leader" : "engine", gen); + + return rc == -DER_NOTAPPLICABLE ? 0 : rc; +} + int chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, struct chk_policy *policies, d_rank_list_t *ranks, struct chk_property *prop) diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index f4c0be91d58..b730faa055c 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -87,8 +87,6 @@ enum chk_pm_status { CPS_TGT_DOWN, }; -static int chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision); - static int chk_cont_hkey_size(void) { @@ -408,7 +406,7 @@ chk_engine_pm_orphan_ult(void *args) cru.cru_details = details; cru.cru_result = result; - rc = chk_engine_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE @@ -634,7 +632,7 @@ chk_engine_pm_dangling_ult(void *args) cru.cru_details = details; cru.cru_result = result; - rc = chk_engine_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" detects dangling %s entry in pool map for pool " @@ -794,7 +792,7 @@ chk_engine_handle_unknown_ult(void *args) cru.cru_msg = msg; cru.cru_result = 0; - rc = chk_engine_report(&cru, &seq, NULL); + rc = chk_report(ins, &cru, &seq, NULL); if (ccr != NULL) D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, @@ -1102,7 +1100,7 @@ chk_engine_bad_pool_label(struct chk_pool_rec *cpr, struct ds_pool_svc *svc) cru.cru_msg = msg; cru.cru_result = result; - rc = chk_engine_report(&cru, &seq, NULL); + rc = chk_report(ins, &cru, &seq, NULL); D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" detects corrupted label %s (MS) vs %s (PS) for pool " @@ -1332,7 +1330,7 @@ chk_engine_cont_orphan_ult(void *args) cru.cru_details = details; cru.cru_result = result; - rc = chk_engine_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" detects orphan container " @@ -1684,7 +1682,7 @@ chk_engine_cont_set_label_ult(void *args) cru.cru_details = details; cru.cru_result = result; - rc = chk_engine_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE" detects inconsistent container label for "DF_UUIDF"/"DF_UUIDF @@ -2180,15 +2178,17 @@ chk_engine_pool_ult(void *args) static void chk_engine_sched(void *args) { - struct chk_instance *ins = args; - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_pool_rec *cpr; - uint32_t ins_phase; - uint32_t ins_status; - uint32_t pool_status; - d_rank_t myrank = dss_self_rank(); - int done = 0; - int rc = 0; + struct chk_instance *ins = args; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pending_rec *pending; + struct chk_pool_rec *cpr; + uint32_t ins_phase; + uint32_t ins_status; + uint32_t pool_status; + uint32_t act; + d_rank_t myrank = dss_self_rank(); + int done = 0; + int rc = 0; D_INFO(DF_ENGINE" scheduler on rank %u entry at phase %u\n", DP_ENGINE(ins), myrank, cbk->cb_phase); @@ -2200,6 +2200,17 @@ chk_engine_sched(void *args) if (ins->ci_sched_exiting) D_GOTO(out, rc = 0); + if (!d_list_empty(&ins->ci_interaction_filter_list)) { + pending = d_list_pop_entry(&ins->ci_interaction_filter_list, + struct chk_pending_rec, cpr_ins_link); + act = ins->ci_prop.cp_policies[pending->cpr_class]; + if (pending->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT || + !chk_is_valid_action(pending, act)) + d_list_add_tail(&pending->cpr_ins_link, &ins->ci_pending_list); + else + chk_engine_act(pending->cpr_seq, act); + } + ins_phase = chk_pools_find_slowest(ins, &done); /* @@ -2935,62 +2946,10 @@ chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version) return rc; } -static int -chk_engine_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) -{ - struct chk_pending_rec *cpr = NULL; - int rc; - - rc = chk_pending_del(ins, seq, &cpr); - if (rc == 0) { - /* The cpr will be destroyed by the waiter via chk_engine_report(). */ - D_ASSERT(cpr->cpr_busy); - - ABT_mutex_lock(cpr->cpr_mutex); - /* - * It is the control plane's duty to guarantee that the decision is a valid - * action from the report options. Otherwise, related inconsistency will be - * ignored. - */ - cpr->cpr_action = act; - ABT_cond_broadcast(cpr->cpr_cond); - ABT_mutex_unlock(cpr->cpr_mutex); - } - - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - DF_ENGINE" on rank %u takes action for seq "DF_X64" with action %u: %d\n", - DP_ENGINE(ins), dss_self_rank(), seq, act, rc); - - return rc; -} - int -chk_engine_act(uint64_t gen, uint64_t seq, uint32_t act) +chk_engine_act(uint64_t seq, uint32_t act) { - struct chk_instance *ins = chk_engine; - int rc; - - CHK_IS_READY(ins); - - if (ins->ci_bk.cb_gen != gen) - D_GOTO(out, rc = -DER_NOTAPPLICABLE); - - /* The admin may input the wrong option, not acceptable. */ - if (unlikely(act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) { - D_ERROR("%u is not acceptable for interaction decision.\n", act); - D_GOTO(out, rc = -DER_INVAL); - } - - rc = chk_engine_act_internal(ins, seq, act); - if (rc == -DER_NONEXIST || rc == -DER_NO_HDL) - rc = 0; - -out: - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - DF_ENGINE " on rank %u takes act %u, seq " DF_X64 ", gen " DF_X64 ": " DF_RC "\n", - DP_ENGINE(ins), dss_self_rank(), act, seq, gen, DP_RC(rc)); - - return rc; + return chk_act_internal(chk_engine, seq, act); } static int @@ -3199,7 +3158,7 @@ chk_engine_pool_start(uint64_t gen, uuid_t uuid, uint32_t phase, uint32_t flags) } rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, dss_self_rank(), - &new, ins, NULL, NULL, NULL, &cpr); + false, &new, ins, NULL, NULL, NULL, &cpr); if (rc != 0) goto out; } else { @@ -3386,135 +3345,7 @@ chk_engine_pool_mbs(uint64_t gen, uuid_t uuid, uint32_t phase, const char *label int chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *policies) { - struct chk_instance *ins = chk_engine; - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_property *prop = &ins->ci_prop; - int rc = 0; - - CHK_IS_READY(ins); - - /* Do nothing if no (engine) check instance is running. */ - if (cbk->cb_magic != CHK_BK_MAGIC_ENGINE || cbk->cb_gen != gen || - cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) - D_GOTO(out, rc = -DER_NOTAPPLICABLE); - - rc = chk_policy_refresh(policy_nr, policies, prop); - if (rc > 0) - rc = chk_prop_update(prop, NULL); - -out: - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_ENGINE " set policy: " DF_RC "\n", DP_ENGINE(ins), - DP_RC(rc)); - - return rc == -DER_NOTAPPLICABLE ? 0 : rc; -} - -/* - * \return Positive value if interaction is interrupted, such as check stop. - * Zero on success. - * Negative value if error. - */ -static int -chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) -{ - struct chk_instance *ins = chk_engine; - struct chk_pending_rec *cpr = NULL; - struct chk_pool_rec *pool = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; - - D_ASSERT(cru->cru_pool != NULL); - - if (*seq == 0) { - -new_seq: - *seq = chk_report_seq_gen(ins); - } - - if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { - d_iov_set(&riov, NULL, 0); - d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t)); - rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); - if (rc != 0) - goto log; - - pool = (struct chk_pool_rec *)riov.iov_buf; - - rc = chk_pending_add(ins, &pool->cpr_pending_list, NULL, *cru->cru_pool, *seq, - cru->cru_rank, cru->cru_cla, cru->cru_option_nr, - cru->cru_options, &cpr); - if (unlikely(rc == -DER_AGAIN)) - goto new_seq; - - if (rc != 0) - goto log; - } - - rc = chk_report_remote(ins->ci_prop.cp_leader, ins->ci_bk.cb_gen, cru->cru_cla, - cru->cru_act, cru->cru_result, cru->cru_rank, cru->cru_target, - cru->cru_pool, cru->cru_pool_label, cru->cru_cont, - cru->cru_cont_label, cru->cru_obj, cru->cru_dkey, - cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options, - cru->cru_detail_nr, cru->cru_details, *seq); - if (unlikely(rc == -DER_AGAIN)) { - D_ASSERT(cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT); - D_ASSERT(cpr != NULL); - - chk_pending_destroy(ins, cpr); - cpr = NULL; - - goto new_seq; - } - - /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ - if (rc == 0 && pool != NULL && - likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; - -log: - DL_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, rc, - DF_ENGINE " on rank %u report with class %u, action %u, seq " DF_X64 ", %s, " - "handle_rc %d, report_rc %d", - DP_ENGINE(ins), cru->cru_rank, cru->cru_cla, cru->cru_act, *seq, cru->cru_msg, - cru->cru_result, rc); - - if (rc != 0 || cpr == NULL) - goto out; - - D_ASSERT(cpr->cpr_busy); - - D_INFO(DF_ENGINE" on rank %u need interaction for class %u\n", - DP_ENGINE(ins), cru->cru_rank, cru->cru_cla); - - ABT_mutex_lock(cpr->cpr_mutex); - -again: - if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { - *decision = cpr->cpr_action; - ABT_mutex_unlock(cpr->cpr_mutex); - goto out; - } - - if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { - rc = 1; - ABT_mutex_unlock(cpr->cpr_mutex); - goto out; - } - - ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex); - - goto again; - -out: - if (cpr != NULL) - chk_pending_destroy(ins, cpr); - - if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && - d_list_empty(&pool->cpr_pending_list)) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - - return rc; + return chk_set_policy(chk_engine, gen, policy_nr, policies); } int diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index debb3db3127..a6caa726539 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -45,7 +45,7 @@ X(CHK_MARK, \ 0, &CQF_chk_mark, ds_chk_mark_hdlr, &chk_mark_co_ops), \ X(CHK_ACT, \ - 0, &CQF_chk_act, ds_chk_act_hdlr, &chk_act_co_ops), \ + 0, &CQF_chk_act, ds_chk_act_hdlr, NULL), \ X(CHK_CONT_LIST, \ 0, &CQF_chk_cont_list, ds_chk_cont_list_hdlr, &chk_cont_list_co_ops), \ X(CHK_POOL_START, \ @@ -156,7 +156,7 @@ CRT_RPC_DECLARE(chk_query, DAOS_ISEQ_CHK_QUERY, DAOS_OSEQ_CHK_QUERY); CRT_RPC_DECLARE(chk_mark, DAOS_ISEQ_CHK_MARK, DAOS_OSEQ_CHK_MARK); /* - * CHK_ACT: + * CHK_ACT: obsolete - DAOS-18674. * From check leader to check engine to execute the admin specified repair action for former * reported inconsistency under interaction mode. */ @@ -228,7 +228,7 @@ CRT_RPC_DECLARE(chk_pool_start, DAOS_ISEQ_CHK_POOL_START, DAOS_OSEQ_CHK_POOL_STA CRT_RPC_DECLARE(chk_pool_mbs, DAOS_ISEQ_CHK_POOL_MBS, DAOS_OSEQ_CHK_POOL_MBS); /* - * CHK_REPORT: + * CHK_REPORT: obsolete - DAOS-18674. * From check engine to check leader to report the inconsistency and related repair action * and result. It can require to interact with the admin to make decision for how to handle * the inconsistency. @@ -318,15 +318,11 @@ CRT_RPC_DECLARE(chk_set_policy, DAOS_ISEQ_CHK_SET_POLICY, DAOS_OSEQ_CHK_SET_POLI #define CHK_LEADER_RANK (uint32_t)(-1) /* - * Keep the lowest 20-bits of DAOS engine rank in the check report sequence. - * If the count of DAOS engines exceeds 2 ^ 20, then different check engines - * may generate the same sequence for different check reports. Such conflict - * is not fatal for non-interaction report. As for interaction report, check - * leader will detect such report sequqnce conflict and ask related engine(s) - * to generate new sequence(s). + * The highest bit in CHK report sequence is reserved, the next 23 bits are for DAOS engine rank. + * The others for detailed sequence number. */ -#define CHK_REPORT_RANK_BIT 40 -#define CHK_REPORT_SEQ_MASK ((1ULL << CHK_REPORT_RANK_BIT) - 1) +#define CHK_REPORT_SEQ_BIT 40 +#define CHK_REPORT_SEQ_MASK ((1ULL << CHK_REPORT_SEQ_BIT) - 1) #define CHK_BTREE_ORDER 16 @@ -612,21 +608,12 @@ struct chk_pool_rec { /* The list of active ULTs that are handling some inconsistency and maybe blocked. */ d_list_t cpr_ult_list; uint32_t cpr_shard_nr; - uint32_t cpr_started:1, - cpr_start_post:1, - cpr_stop:1, - cpr_done:1, - cpr_skip:1, - cpr_immutable:1, - cpr_dangling:1, - cpr_for_orphan:1, - cpr_notified_exit:1, - cpr_destroyed:1, - cpr_healthy:1, - cpr_delay_label:1, - cpr_exist_on_ms:1, - cpr_not_export_ps:1, - cpr_map_refreshed:1; + + uint32_t cpr_started : 1, cpr_start_post : 1, cpr_stop : 1, cpr_done : 1, cpr_skip : 1, + cpr_immutable : 1, cpr_dangling : 1, cpr_for_orphan : 1, cpr_notified_exit : 1, + cpr_destroyed : 1, cpr_healthy : 1, cpr_delay_label : 1, cpr_exist_on_ms : 1, + cpr_not_export_ps : 1, cpr_map_refreshed : 1, cpr_has_pending : 1; + int cpr_advice; int cpr_refs; uuid_t cpr_uuid; @@ -645,8 +632,6 @@ struct chk_pool_rec { struct chk_pending_rec { /* Link into chk_pool_rec::cpr_pending_list. */ d_list_t cpr_pool_link; - /* Link into chk_rank_rec::crr_pending_list. */ - d_list_t cpr_rank_link; /* Link into chk_instance::ci_pending_list or chk_instance::ci_interaction_filter_list. */ d_list_t cpr_ins_link; uuid_t cpr_uuid; @@ -654,7 +639,7 @@ struct chk_pending_rec { d_rank_t cpr_rank; uint32_t cpr_class; uint32_t cpr_action; - uint32_t cpr_busy : 1, cpr_exiting : 1, cpr_on_leader : 1; + uint32_t cpr_busy : 1, cpr_exiting : 1; uint32_t cpr_option_nr; /* Currently, support at most three options, can be extended if necessary in future. */ uint32_t cpr_options[CHK_INTERACT_OPTION_MAX]; @@ -699,13 +684,12 @@ struct chk_dead_rank { extern struct crt_proto_format chk_proto_fmt; -extern struct crt_corpc_ops chk_start_co_ops; -extern struct crt_corpc_ops chk_stop_co_ops; -extern struct crt_corpc_ops chk_query_co_ops; -extern struct crt_corpc_ops chk_mark_co_ops; -extern struct crt_corpc_ops chk_act_co_ops; -extern struct crt_corpc_ops chk_cont_list_co_ops; -extern struct crt_corpc_ops chk_pool_start_co_ops; +extern struct crt_corpc_ops chk_start_co_ops; +extern struct crt_corpc_ops chk_stop_co_ops; +extern struct crt_corpc_ops chk_query_co_ops; +extern struct crt_corpc_ops chk_mark_co_ops; +extern struct crt_corpc_ops chk_cont_list_co_ops; +extern struct crt_corpc_ops chk_pool_start_co_ops; extern struct crt_corpc_ops chk_set_policy_co_ops; extern btr_ops_t chk_pool_ops; @@ -746,23 +730,22 @@ int chk_pools_update_bk(struct chk_instance *ins, uint32_t phase); int chk_pool_handle_notify(struct chk_instance *ins, struct chk_iv *iv); int chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank, - struct chk_bookmark *bk, struct chk_instance *ins, + bool has_pending, struct chk_bookmark *bk, struct chk_instance *ins, uint32_t *shard_nr, void *data, chk_pool_free_data_t free_cb, struct chk_pool_rec **cpr); void chk_pool_shard_cleanup(struct chk_instance *ins); -int chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr); - -int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, - uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, - uint32_t *options, struct chk_pending_rec **cpr); - int chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr); int chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr); -int chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop); +int chk_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act); + +int chk_report(struct chk_instance *ins, struct chk_report_unit *cru, uint64_t *seq, int *decision); + +int chk_set_policy(struct chk_instance *ins, uint64_t gen, uint32_t policy_nr, + struct chk_policy *policies); int chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, struct chk_policy *policies, d_rank_list_t *ranks, struct chk_property *prop); @@ -793,7 +776,7 @@ int chk_engine_query(uint64_t gen, int pool_nr, uuid_t pools[], uint32_t *ins_st int chk_engine_mark_rank_dead(uint64_t gen, d_rank_t rank, uint32_t version); -int chk_engine_act(uint64_t gen, uint64_t seq, uint32_t act); +int chk_engine_act(uint64_t seq, uint32_t act); int chk_engine_cont_list(uint64_t gen, uuid_t pool_uuid, uuid_t **conts, uint32_t *count); @@ -838,7 +821,7 @@ bool chk_is_on_leader(uint64_t gen, d_rank_t leader, bool known_leader); struct ds_iv_ns *chk_leader_get_iv_ns(void); -int chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision); +int chk_leader_act(uint64_t seq, uint32_t act); int chk_leader_notify(struct chk_iv *iv); @@ -868,9 +851,6 @@ int chk_query_remote(d_rank_list_t *rank_list, uint64_t gen, int pool_nr, uuid_t int chk_mark_remote(d_rank_list_t *rank_list, uint64_t gen, d_rank_t rank, uint32_t version); -int chk_act_remote(d_rank_list_t *rank_list, uint64_t gen, uint64_t seq, uint32_t cla, - uint32_t act, d_rank_t rank); - int chk_cont_list_remote(struct ds_pool *pool, uint64_t gen, chk_co_rpc_cb_t list_cb, void *args); int chk_pool_start_remote(d_rank_list_t *rank_list, uint64_t gen, uuid_t uuid, uint32_t phase, @@ -880,12 +860,6 @@ int chk_pool_mbs_remote(d_rank_t rank, uint32_t phase, uint64_t gen, uuid_t uuid uint64_t seq, uint32_t flags, uint32_t mbs_nr, struct chk_pool_mbs *mbs_array, int *svc_rc, struct rsvc_hint *svc_hint); -int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act, int result, - d_rank_t rank, uint32_t target, uuid_t *pool, char *pool_label, - uuid_t *cont, char *cont_label, daos_unit_oid_t *obj, daos_key_t *dkey, - daos_key_t *akey, char *msg, uint32_t option_nr, uint32_t *options, - uint32_t detail_nr, d_sg_list_t *details, uint64_t seq); - int chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, uint32_t *ns_ver, uint32_t *pool_nr, uuid_t **pools, d_rank_list_t **ranks); @@ -1000,7 +974,6 @@ static inline void chk_pending_destroy(struct chk_instance *ins, struct chk_pending_rec *cpr) { if (d_list_empty(&cpr->cpr_pool_link)) { - D_ASSERT(d_list_empty(&cpr->cpr_rank_link)); D_ASSERT(d_list_empty(&cpr->cpr_ins_link)); if (cpr->cpr_cond != ABT_COND_NULL) @@ -1269,7 +1242,7 @@ chk_report_seq_init(struct chk_instance *ins) else myrank = dss_self_rank(); - ins->ci_seq = (myrank << CHK_REPORT_RANK_BIT) | (d_hlc_get() >> (64 - CHK_REPORT_RANK_BIT)); + ins->ci_seq = (myrank << CHK_REPORT_SEQ_BIT) | (d_hlc_get() & CHK_REPORT_SEQ_MASK); /* Clear the highest bit. */ ins->ci_seq &= ~(1ULL << 63); @@ -1287,6 +1260,13 @@ chk_report_seq_gen(struct chk_instance *ins) return ins->ci_seq; } +static inline bool +chk_report_seq_leader(uint64_t seq) +{ + return (seq & ~CHK_REPORT_SEQ_MASK) == + (((uint64_t)CHK_LEADER_RANK << CHK_REPORT_SEQ_BIT) & ~(1ULL << 63)); +} + static inline void chk_uuid_unparse(struct chk_instance *ins, const uuid_t uuid, char *uuid_str) { diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 18be52d0ac0..c3ec4c3fc8f 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -39,12 +39,10 @@ struct chk_query_args { struct chk_rank_rec { /* Link into chk_instance::ci_rank_list. */ - d_list_t crr_link; - /* The list of chk_pending_rec. */ - d_list_t crr_pending_list; - d_rank_t crr_rank; - uint32_t crr_phase; - struct chk_instance *crr_ins; + d_list_t crr_link; + d_rank_t crr_rank; + uint32_t crr_phase; + struct chk_instance *crr_ins; }; struct chk_rank_bundle { @@ -81,7 +79,6 @@ chk_rank_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, if (crr == NULL) D_GOTO(out, rc = -DER_NOMEM); - D_INIT_LIST_HEAD(&crr->crr_pending_list); crr->crr_rank = crb->crb_rank; crr->crr_phase = crb->crb_phase; crr->crr_ins = crb->crb_ins; @@ -103,16 +100,10 @@ chk_rank_free(struct btr_instance *tins, struct btr_record *rec, void *args) rec->rec_off = UMOFF_NULL; d_list_del_init(&crr->crr_link); - if (val_iov != NULL) { + if (val_iov != NULL) d_iov_set(val_iov, crr, sizeof(*crr)); - } else { - /* - * This only happens when destroy the rank tree. At that time, - * the pending records tree has already been destroyed. - */ - D_ASSERT(d_list_empty(&crr->crr_pending_list)); + else D_FREE(crr); - } return 0; } @@ -179,34 +170,16 @@ chk_leader_get_iv_ns(void) static int chk_rank_del(struct chk_instance *ins, d_rank_t rank) { - struct chk_rank_rec *crr; - struct chk_pending_rec *cpr; - d_iov_t riov; - d_iov_t kiov; - int rc; - int rc1; + d_iov_t riov; + d_iov_t kiov; + int rc; d_iov_set(&riov, NULL, 0); d_iov_set(&kiov, &rank, sizeof(rank)); rc = dbtree_delete(ins->ci_rank_hdl, BTR_PROBE_EQ, &kiov, &riov); - if (rc != 0) - D_GOTO(out, rc = ((rc == -DER_NONEXIST || rc == -DER_NO_HDL) ? 0 : rc)); - - crr = (struct chk_rank_rec *)riov.iov_buf; - if (d_list_empty(&crr->crr_pending_list)) - goto out; - - /* Cleanup all pending records belong to this rank. */ - ABT_rwlock_wrlock(ins->ci_abt_lock); - while ((cpr = d_list_pop_entry(&crr->crr_pending_list, struct chk_pending_rec, - cpr_rank_link)) != NULL) { - rc1 = chk_pending_wakeup(ins, cpr); - if (rc1 != 0 && rc == 0) - rc = rc1; - } - ABT_rwlock_unlock(ins->ci_abt_lock); + if (rc == -DER_NONEXIST || rc == -DER_NO_HDL) + rc = 0; -out: return rc; } @@ -416,7 +389,7 @@ chk_leader_fail_pool(struct chk_pool_rec *cpr, int result) cru.cru_msg = "Some engine failed to report information for pool.\n"; cru.cru_result = result; - rc = chk_leader_report(&cru, &seq, NULL); + rc = chk_report(ins, &cru, &seq, NULL); D_WARN(DF_LEADER" some engine failed to report information for pool " DF_UUIDF", action %u, seq "DF_X64", remote_rc %d, report_rc %d\n", @@ -651,7 +624,7 @@ chk_leader_dangling_pool(struct chk_pool_rec *cpr) cru.cru_details = details; cru.cru_result = result; - rc = chk_leader_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, DF_LEADER" detects dangling pool "DF_UUIDF", action %u (%s), seq " @@ -851,7 +824,7 @@ chk_leader_orphan_pool(struct chk_pool_rec *cpr) cru.cru_details = details; cru.cru_result = result; - rc = chk_leader_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, DF_LEADER" detects orphan pool "DF_UUIDF", action %u (%s), seq " @@ -1155,7 +1128,7 @@ chk_leader_no_quorum_pool(struct chk_pool_rec *cpr) cru.cru_details = details; cru.cru_result = result; - rc = chk_leader_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, DF_LEADER" detects corrupted pool "DF_UUIDF", action %u (%s), seq " @@ -1567,7 +1540,7 @@ chk_leader_handle_pool_label(struct chk_pool_rec *cpr, struct ds_pool_clue *clue cru.cru_details = details; cru.cru_result = result; - rc = chk_leader_report(&cru, &seq, &decision); + rc = chk_report(ins, &cru, &seq, &decision); D_CDEBUG(result != 0 || rc < 0, DLOG_ERR, DLOG_INFO, DF_LEADER" detects corrupted label for pool "DF_UUIDF", action %u (%s), seq " @@ -1777,7 +1750,7 @@ chk_leader_handle_pools_list(struct chk_instance *ins) } rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, - clp[i].clp_uuid, CHK_LEADER_RANK, + clp[i].clp_uuid, CHK_LEADER_RANK, false, NULL /* bookmark */, ins, NULL /* shard_nr */, NULL /* data */, NULL, &cpr); if (rc != 0) { @@ -2830,7 +2803,7 @@ chk_leader_start_cb(struct chk_co_rpc_cb_args *cb_args) goto out; rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, clue->pc_uuid, - clue->pc_rank, NULL, ins, NULL, clue, + clue->pc_rank, false, NULL, ins, NULL, clue, chk_leader_free_clue, NULL); if (rc != 0) { chk_leader_free_clue(clue); @@ -3211,9 +3184,10 @@ chk_leader_query_cb(struct chk_co_rpc_cb_args *cb_args) if (rc != 0) goto out; - rc = chk_pool_add_shard(cqa->cqa_hdl, &cqa->cqa_list, shard->cqps_uuid, - shard->cqps_rank, NULL, cqa->cqa_ins, &cqa->cqa_count, - shard, chk_leader_free_shard, NULL); + rc = chk_pool_add_shard( + cqa->cqa_hdl, &cqa->cqa_list, shard->cqps_uuid, shard->cqps_rank, + shard->cqps_status == CHK__CHECK_POOL_STATUS__CPS_PENDING, NULL, cqa->cqa_ins, + &cqa->cqa_count, shard, chk_leader_free_shard, NULL); if (rc != 0) { chk_leader_free_shard(shard); goto out; @@ -3382,9 +3356,10 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, shard->cqps_phase = cpr->cpr_bk.cb_phase; shard->cqps_rank = CHK_LEADER_RANK; - rc = chk_pool_add_shard(cqa->cqa_hdl, &cqa->cqa_list, cpr->cpr_uuid, - CHK_LEADER_RANK, NULL, ins, &cqa->cqa_count, - shard, chk_leader_free_shard, NULL); + rc = chk_pool_add_shard( + cqa->cqa_hdl, &cqa->cqa_list, cpr->cpr_uuid, CHK_LEADER_RANK, + shard->cqps_status == CHK__CHECK_POOL_STATUS__CPS_PENDING, NULL, ins, + &cqa->cqa_count, shard, chk_leader_free_shard, NULL); if (rc != 0) goto out; } @@ -3426,13 +3401,19 @@ chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, * are still in running status. We summarize the status for the query * result to avoid confusing. It is just temporary solution, and will * be moved to control plane in the future - DAOS-13989. + * + * After supporting CHK leader switch, CHK engine can directly report + * to control plane instead of via CHK leader, then CHK leader may not + * know CHK interaction. Let's handle that when query - DAOS-18674. */ - if (cps->cps_rank != CHK_LEADER_RANK) { + if (cpr->cpr_has_pending == 0) shard->cqps_status = chk_pool_merge_status(shard->cqps_status, status); - if (shard->cqps_phase < phase) - shard->cqps_phase = phase; - } + else + shard->cqps_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; + + if (shard->cqps_phase < phase) + shard->cqps_phase = phase; rc = pool_cb(shard, idx++, buf); if (rc != 0) @@ -3460,263 +3441,16 @@ chk_leader_prop(chk_prop_cb_t prop_cb, void *buf) return prop_cb(buf, prop->cp_policies, CHK_POLICY_MAX - 1, prop->cp_flags); } -static int -chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) -{ - struct chk_pending_rec *pending = NULL; - struct chk_pool_rec *pool = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; - - rc = chk_pending_lookup(ins, seq, &pending); - if (rc != 0) - goto out; - - if (pending->cpr_on_leader) { - ABT_mutex_lock(pending->cpr_mutex); - /* - * It is the control plane's duty to guarantee that the decision is a valid - * action from the report options. Otherwise, related inconsistency will be ignored. - */ - pending->cpr_action = act; - ABT_cond_broadcast(pending->cpr_cond); - ABT_mutex_unlock(pending->cpr_mutex); - chk_pending_del(ins, seq, &pending); - } else { - d_iov_set(&riov, NULL, 0); - d_iov_set(&kiov, pending->cpr_uuid, sizeof(uuid_t)); - rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); - if (rc == 0) - pool = (struct chk_pool_rec *)riov.iov_buf; - - rc = chk_act_remote(ins->ci_ranks, ins->ci_bk.cb_gen, seq, pending->cpr_class, act, - pending->cpr_rank); - if (rc == 0) { - chk_pending_destroy(ins, pending); - - if (pool != NULL && - pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && - d_list_empty(&pool->cpr_pending_list)) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - } - } - -out: - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - DF_LEADER" takes action for report with seq "DF_X64", action %u: "DF_RC"\n", - DP_LEADER(ins), seq, act, DP_RC(rc)); - - return rc; -} - int chk_leader_act(uint64_t seq, uint32_t act) { - struct chk_instance *ins = chk_leader; - struct chk_bookmark *cbk = &ins->ci_bk; - int rc; - - CHK_IS_READY(ins); - - if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) - D_GOTO(out, rc = -DER_NOTLEADER); - - /* Tell control plane that no check instance is running via "-DER_NOTAPPLICABLE". */ - if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) - D_GOTO(out, rc = -DER_NOTAPPLICABLE); - - /* The admin may input the wrong option, not acceptable. */ - if (unlikely(act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) { - D_ERROR("%u is not acceptable for interaction decision.\n", act); - D_GOTO(out, rc = -DER_INVAL); - } - - rc = chk_leader_act_internal(ins, seq, act); - -out: - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - DF_LEADER " takes action for report with seq " DF_X64 ", action %u: %d\n", - DP_LEADER(ins), seq, act, rc); - - return rc; + return chk_act_internal(chk_leader, seq, act); } int chk_leader_set_policy(uint32_t policy_nr, struct chk_policy *policies) { - struct chk_instance *ins = chk_leader; - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_property *prop = &ins->ci_prop; - struct chk_pending_rec *pending; - struct chk_pending_rec *tmp; - int rc; - - CHK_IS_READY(ins); - - /* Do nothing if no (leader) check instance is running. */ - if (cbk->cb_magic != CHK_BK_MAGIC_LEADER || - cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) - D_GOTO(out, rc = -DER_NOTAPPLICABLE); - - rc = chk_policy_refresh(policy_nr, policies, prop); - if (rc <= 0) - goto out; - - rc = chk_set_policy_remote(ins->ci_ranks, cbk->cb_gen, policy_nr, policies); - if (rc != 0) - goto out; - - rc = chk_prop_update(prop, NULL); - if (rc != 0) - goto out; - - d_list_for_each_entry_safe(pending, tmp, &ins->ci_pending_list, cpr_ins_link) { - if (chk_is_valid_action(pending, ins->ci_prop.cp_policies[pending->cpr_class])) { - d_list_del(&pending->cpr_ins_link); - d_list_add_tail(&pending->cpr_ins_link, &ins->ci_interaction_filter_list); - } - } - -out: - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, DF_LEADER " set policy: " DF_RC "\n", DP_LEADER(ins), - DP_RC(rc)); - - return rc == -DER_NOTAPPLICABLE ? 0 : rc; -} - -/* - * \return Positive value if interaction is interrupted, such as check stop. - * Zero on success. - * Negative value if error. - */ -int -chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) -{ - struct chk_instance *ins = chk_leader; - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_pending_rec *cpr = NULL; - struct chk_pool_rec *pool = NULL; - struct chk_rank_rec *crr = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; - - CHK_IS_READY(ins); - - if (cbk->cb_magic != CHK_BK_MAGIC_LEADER) - D_GOTO(out, rc = -DER_NOTLEADER); - - /* Tell check engine that check leader is not running via "-DER_NOTAPPLICABLE". */ - if (cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING) - D_GOTO(out, rc = -DER_NOTAPPLICABLE); - - if (cru->cru_result == 0 && ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN) - cru->cru_result = CHK__CHECK_RESULT__DRY_RUN; - - if (*seq == 0) { - -new_seq: - *seq = chk_report_seq_gen(ins); - } - - D_INFO(DF_LEADER " handle %s report from rank %u with seq " DF_X64 " class %u, action %u, " - "%s, result %d\n", - DP_LEADER(ins), decision != NULL ? "local" : "remote", cru->cru_rank, *seq, - cru->cru_cla, cru->cru_act, cru->cru_msg, cru->cru_result); - - if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { - if (cru->cru_pool == NULL) - D_GOTO(log, rc = -DER_INVAL); - - d_iov_set(&riov, NULL, 0); - d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t)); - rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); - if (rc != 0) - goto log; - - pool = (struct chk_pool_rec *)riov.iov_buf; - - if (decision == NULL) { - d_iov_set(&riov, NULL, 0); - d_iov_set(&kiov, &cru->cru_rank, sizeof(cru->cru_rank)); - rc = dbtree_lookup(ins->ci_rank_hdl, &kiov, &riov); - if (rc != 0) - goto log; - - crr = (struct chk_rank_rec *)riov.iov_buf; - } - - rc = chk_pending_add(ins, &pool->cpr_pending_list, - crr != NULL ? &crr->crr_pending_list : NULL, *cru->cru_pool, - *seq, cru->cru_rank, cru->cru_cla, cru->cru_option_nr, - cru->cru_options, &cpr); - if (decision != NULL) { - if (unlikely(rc == -DER_AGAIN)) - goto new_seq; - - cpr->cpr_on_leader = 1; - } - - if (rc != 0) - goto log; - } - - rc = chk_report_upcall(cru->cru_gen, *seq, cru->cru_cla, cru->cru_act, cru->cru_result, - cru->cru_rank, cru->cru_target, cru->cru_pool, cru->cru_pool_label, - cru->cru_cont, cru->cru_cont_label, cru->cru_obj, cru->cru_dkey, - cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options, - cru->cru_detail_nr, cru->cru_details); - /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ - if (rc == 0 && pool != NULL && - likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; - -log: - if (rc != 0) { - D_ERROR(DF_LEADER" failed to handle %s report from rank %u with seq " - DF_X64", class %u, action %u, handle_rc %d, report_rc %d\n", - DP_LEADER(ins), decision != NULL ? "local" : "remote", cru->cru_rank, *seq, - cru->cru_cla, cru->cru_act, cru->cru_result, rc); - goto out; - } - - if (decision == NULL || cpr == NULL) - goto out; - - D_ASSERT(cpr->cpr_busy); - - D_INFO(DF_LEADER" need interaction for class %u with seq "DF_X64"\n", - DP_LEADER(ins), cru->cru_cla, *seq); - - ABT_mutex_lock(cpr->cpr_mutex); - -again: - if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) { - *decision = cpr->cpr_action; - ABT_mutex_unlock(cpr->cpr_mutex); - goto out; - } - - if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) { - rc = 1; - ABT_mutex_unlock(cpr->cpr_mutex); - goto out; - } - - ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex); - - goto again; - -out: - if ((rc != 0 || decision != NULL) && cpr != NULL) - chk_pending_destroy(ins, cpr); - - if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && - d_list_empty(&pool->cpr_pending_list)) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - - return rc; + return chk_set_policy(chk_leader, chk_leader->ci_bk.cb_gen, policy_nr, policies); } int diff --git a/src/chk/chk_rpc.c b/src/chk/chk_rpc.c index e250936dfc2..47a574f60bf 100644 --- a/src/chk/chk_rpc.c +++ b/src/chk/chk_rpc.c @@ -324,24 +324,6 @@ chk_mark_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) return 0; } -static int -chk_act_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) -{ - struct chk_act_in *in_source = crt_req_get(source); - struct chk_act_out *out_source = crt_reply_get(source); - struct chk_act_out *out_result = crt_reply_get(result); - - if (out_source->cao_status != 0) { - D_ERROR("Failed to check act with gen "DF_X64": "DF_RC"\n", - in_source->cai_gen, DP_RC(out_source->cao_status)); - - if (out_result->cao_status == 0) - out_result->cao_status = out_source->cao_status; - } - - return 0; -} - static int chk_cont_list_aggregator(crt_rpc_t *source, crt_rpc_t *result, void *priv) { @@ -466,11 +448,6 @@ struct crt_corpc_ops chk_mark_co_ops = { .co_pre_forward = NULL, }; -struct crt_corpc_ops chk_act_co_ops = { - .co_aggregate = chk_act_aggregator, - .co_pre_forward = NULL, -}; - struct crt_corpc_ops chk_cont_list_co_ops = { .co_aggregate = chk_cont_list_aggregator, .co_pre_forward = NULL, @@ -762,44 +739,6 @@ chk_mark_remote(d_rank_list_t *rank_list, uint64_t gen, d_rank_t rank, uint32_t return rc; } -int -chk_act_remote(d_rank_list_t *rank_list, uint64_t gen, uint64_t seq, uint32_t cla, uint32_t act, - d_rank_t rank) -{ - crt_rpc_t *req = NULL; - struct chk_act_in *cai; - struct chk_act_out *cao; - int rc; - - rc = chk_sg_rpc_prepare(rank, CHK_ACT, &req); - if (rc != 0) - goto out; - - cai = crt_req_get(req); - cai->cai_gen = gen; - cai->cai_seq = seq; - cai->cai_cla = cla; - cai->cai_act = act; - cai->cai_flags = 0; - - rc = dss_rpc_send(req); - if (rc != 0) - goto out; - - cao = crt_reply_get(req); - rc = cao->cao_status; - -out: - if (req != NULL) - crt_req_decref(req); - - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - "Rank %u take action for DAOS check with gen "DF_X64", seq "DF_X64": "DF_RC"\n", - rank, gen, seq, DP_RC(rc)); - - return rc; -} - int chk_cont_list_remote(struct ds_pool *pool, uint64_t gen, chk_co_rpc_cb_t list_cb, void *args) { @@ -939,84 +878,6 @@ chk_pool_mbs_remote(d_rank_t rank, uint32_t phase, uint64_t gen, uuid_t uuid, ch return rc; } -int chk_report_remote(d_rank_t leader, uint64_t gen, uint32_t cla, uint32_t act, int result, - d_rank_t rank, uint32_t target, uuid_t *pool, char *pool_label, uuid_t *cont, - char *cont_label, daos_unit_oid_t *obj, daos_key_t *dkey, daos_key_t *akey, - char *msg, uint32_t option_nr, uint32_t *options, uint32_t detail_nr, - d_sg_list_t *details, uint64_t seq) -{ - crt_rpc_t *req = NULL; - struct chk_report_in *cri; - struct chk_report_out *cro; - int rc; - - rc = chk_sg_rpc_prepare(leader, CHK_REPORT, &req); - if (rc != 0) - goto out; - - cri = crt_req_get(req); - cri->cri_gen = gen; - cri->cri_ics_class = cla; - cri->cri_ics_action = act; - cri->cri_ics_result = result; - cri->cri_rank = rank; - cri->cri_target = target; - cri->cri_seq = seq; - - if (pool != NULL) - uuid_copy(cri->cri_pool, *pool); - else - memset(cri->cri_pool, 0, sizeof(uuid_t)); - - cri->cri_pool_label = pool_label; - - if (cont != NULL) - uuid_copy(cri->cri_cont, *cont); - else - memset(cri->cri_cont, 0, sizeof(uuid_t)); - - cri->cri_cont_label = cont_label; - - if (obj != NULL) - cri->cri_obj = *obj; - else - memset(&cri->cri_obj, 0, sizeof(cri->cri_obj)); - - if (dkey != NULL) - cri->cri_dkey = *dkey; - else - memset(&cri->cri_dkey, 0, sizeof(cri->cri_dkey)); - - if (akey != NULL) - cri->cri_akey = *akey; - else - memset(&cri->cri_akey, 0, sizeof(cri->cri_akey)); - - cri->cri_msg = msg; - cri->cri_options.ca_count = option_nr; - cri->cri_options.ca_arrays = options; - cri->cri_details.ca_count = detail_nr; - cri->cri_details.ca_arrays = details; - - rc = dss_rpc_send(req); - if (rc != 0) - goto out; - - cro = crt_reply_get(req); - rc = cro->cro_status; - -out: - if (req != NULL) - crt_req_decref(req); - - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - "Rank %u report DAOS check to leader %u, gen "DF_X64", class %u, action %u, " - "result %d, "DF_UUIDF"/"DF_UUIDF", seq "DF_X64": "DF_RC"\n", rank, leader, - gen, cla, act, result, DP_UUID(pool), DP_UUID(cont), seq, DP_RC(rc)); - - return rc; -} - int chk_rejoin_remote(d_rank_t leader, uint64_t gen, d_rank_t rank, uuid_t iv_uuid, uint32_t *flags, uint32_t *ns_ver, uint32_t *pool_nr, uuid_t **pools, d_rank_list_t **ranks) diff --git a/src/chk/chk_srv.c b/src/chk/chk_srv.c index d50e3b59657..fba39295045 100644 --- a/src/chk/chk_srv.c +++ b/src/chk/chk_srv.c @@ -141,11 +141,8 @@ ds_chk_mark_hdlr(crt_rpc_t *rpc) static void ds_chk_act_hdlr(crt_rpc_t *rpc) { - struct chk_act_in *cai = crt_req_get(rpc); - struct chk_act_out *cao = crt_reply_get(rpc); - int rc; - - rc = chk_engine_act(cai->cai_gen, cai->cai_seq, cai->cai_act); + struct chk_act_out *cao = crt_reply_get(rpc); + int rc = -DER_NOTSUPPORTED; cao->cao_status = rc; rc = crt_reply_send(rpc); @@ -214,31 +211,8 @@ ds_chk_pool_mbs_hdlr(crt_rpc_t *rpc) static void ds_chk_report_hdlr(crt_rpc_t *rpc) { - struct chk_report_in *cri = crt_req_get(rpc); - struct chk_report_out *cro = crt_reply_get(rpc); - struct chk_report_unit cru; - int rc; - - cru.cru_gen = cri->cri_gen; - cru.cru_cla = cri->cri_ics_class; - cru.cru_act = cri->cri_ics_action; - cru.cru_target = cri->cri_target; - cru.cru_rank = cri->cri_rank; - cru.cru_option_nr = cri->cri_options.ca_count; - cru.cru_detail_nr = cri->cri_details.ca_count; - cru.cru_pool = &cri->cri_pool; - cru.cru_pool_label = cri->cri_pool_label; - cru.cru_cont = &cri->cri_cont; - cru.cru_cont_label = cri->cri_cont_label; - cru.cru_obj = &cri->cri_obj; - cru.cru_dkey = &cri->cri_dkey; - cru.cru_akey = &cri->cri_akey; - cru.cru_msg = cri->cri_msg; - cru.cru_options = cri->cri_options.ca_arrays; - cru.cru_details = cri->cri_details.ca_arrays; - cru.cru_result = cri->cri_ics_result; - - rc = chk_leader_report(&cru, &cri->cri_seq, NULL); + struct chk_report_out *cro = crt_reply_get(rpc); + int rc = -DER_NOTSUPPORTED; cro->cro_status = rc; rc = crt_reply_send(rpc); diff --git a/src/common/tests_dmg_helpers.c b/src/common/tests_dmg_helpers.c index c77bc880820..28888a55ee6 100644 --- a/src/common/tests_dmg_helpers.c +++ b/src/common/tests_dmg_helpers.c @@ -2007,12 +2007,24 @@ parse_check_query_pool(struct json_object *obj, uuid_t uuid, struct daos_check_i return rc; } +#define CHK_LEADER_RANK (uint32_t)(-1) +#define CHK_REPORT_SEQ_BIT 40 +#define CHK_REPORT_SEQ_MASK ((1ULL << CHK_REPORT_SEQ_BIT) - 1) + static int -parse_check_query_report(struct json_object *obj, struct daos_check_report_info *dcri) +parse_check_query_report(struct json_object *parent, struct daos_check_info *dci, uint32_t idx) { - struct json_object *tmp; - int rc; - int i; + struct daos_check_report_info *dcri = &dci->dci_reports[idx]; + struct json_object *obj; + struct json_object *tmp; + int rc; + int i; + + obj = json_object_array_get_idx(parent, idx); + if (unlikely(obj == NULL)) { + D_ERROR("Unable to extract report from check query result\n"); + return -DER_INVAL; + } rc = parse_dmg_uuid(obj, "pool_uuid", dcri->dcri_uuid); if (rc != 0) @@ -2042,6 +2054,17 @@ parse_check_query_report(struct json_object *obj, struct daos_check_report_info dcri->dcri_act = json_object_get_int(tmp); + if (!json_object_object_get_ex(obj, "rank", &tmp)) { + /* If JSON output does not contain "rank", then unparse it from dcri_seq. */ + if ((dcri->dcri_seq & ~CHK_REPORT_SEQ_MASK) == + (((uint64_t)CHK_LEADER_RANK << CHK_REPORT_SEQ_BIT) & ~(1ULL << 63))) + dcri->dcri_rank = dci->dci_leader; + else + dcri->dcri_rank = dcri->dcri_seq >> CHK_REPORT_SEQ_BIT; + } else { + dcri->dcri_rank = json_object_get_int(tmp); + } + if (!json_object_object_get_ex(obj, "result", &tmp)) dcri->dcri_result = 0; else @@ -2078,6 +2101,12 @@ parse_check_query_info(struct json_object *query_output, uint32_t pool_nr, uuid_ if (rc != 0) return rc; + if (!json_object_object_get_ex(query_output, "leader", &obj)) { + D_ERROR("Unable to extract leader from check query result\n"); + return -DER_INVAL; + } + + dci->dci_leader = json_object_get_int(obj); dci->dci_pool_nr = 0; if (pool_nr <= 0) @@ -2126,8 +2155,7 @@ parse_check_query_info(struct json_object *query_output, uint32_t pool_nr, uuid_ } for (i = 0; i < dci->dci_report_nr; i++) { - rc = parse_check_query_report(json_object_array_get_idx(obj, i), - &dci->dci_reports[i]); + rc = parse_check_query_report(obj, dci, i); if (rc != 0) return rc; } diff --git a/src/include/daos/tests_lib.h b/src/include/daos/tests_lib.h index 9bb15883b8c..461ee39ee05 100644 --- a/src/include/daos/tests_lib.h +++ b/src/include/daos/tests_lib.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2015-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -199,22 +199,24 @@ struct daos_check_pool_info { }; struct daos_check_report_info { - uuid_t dcri_uuid; - uint64_t dcri_seq; - uint32_t dcri_class; - uint32_t dcri_act; - int dcri_result; - int dcri_option_nr; - int dcri_options[4]; + uuid_t dcri_uuid; + uint64_t dcri_seq; + uint32_t dcri_class; + uint32_t dcri_act; + int dcri_rank; + int dcri_result; + int dcri_option_nr; + int dcri_options[3]; }; struct daos_check_info { - char *dci_status; - char *dci_phase; - int dci_pool_nr; - int dci_report_nr; - struct daos_check_pool_info *dci_pools; - struct daos_check_report_info *dci_reports; + char *dci_status; + char *dci_phase; + int dci_leader; + int dci_pool_nr; + int dci_report_nr; + struct daos_check_pool_info *dci_pools; + struct daos_check_report_info *dci_reports; }; /** Initialize an SGL with a variable number of IOVs and set the IOV buffers diff --git a/src/include/daos_srv/daos_chk.h b/src/include/daos_srv/daos_chk.h index 9c363c86c9c..e29681bce18 100644 --- a/src/include/daos_srv/daos_chk.h +++ b/src/include/daos_srv/daos_chk.h @@ -86,7 +86,7 @@ int chk_leader_query(int pool_nr, uuid_t pools[], chk_query_head_cb_t head_cb, int chk_leader_prop(chk_prop_cb_t prop_cb, void *buf); int - chk_leader_act(uint64_t seq, uint32_t act); +chk_act(uint64_t seq, uint32_t act); int chk_leader_set_policy(uint32_t policy_nr, struct chk_policy *policies); diff --git a/src/mgmt/srv_chk.c b/src/mgmt/srv_chk.c index 3dd937d9bff..4ffc316facf 100644 --- a/src/mgmt/srv_chk.c +++ b/src/mgmt/srv_chk.c @@ -119,7 +119,7 @@ ds_mgmt_check_prop(chk_prop_cb_t prop_cb, void *buf) int ds_mgmt_check_act(uint64_t seq, uint32_t act) { - return chk_leader_act(seq, act); + return chk_act(seq, act); } int diff --git a/src/tests/ftest/recovery/cat_recov_core.yaml b/src/tests/ftest/recovery/cat_recov_core.yaml index 73a5a583283..1e892259188 100644 --- a/src/tests/ftest/recovery/cat_recov_core.yaml +++ b/src/tests/ftest/recovery/cat_recov_core.yaml @@ -59,3 +59,5 @@ daos_tests: test_daos_cat_recov_core: DAOS_Cat_Recov_Core daos_test: test_daos_cat_recov_core: F + args: + test_daos_cat_recov_core: -s3 diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index 9f41c77c96f..75a677c6ef1 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -269,38 +269,43 @@ cr_rank_reint(uint32_t rank, bool start) return rc; } +static struct daos_check_report_info * +cr_locate_dcri(struct daos_check_info *dci, struct daos_check_report_info *base, uuid_t uuid); + static inline int -cr_rank_exclude(test_arg_t *arg, struct test_pool *pool, int *rank, bool wait) +cr_rank_exclude(test_arg_t *arg, struct daos_check_info *dci, struct test_pool *pool, int *rank, + bool wait) { - int count; - int rc; - int i; - int j; + struct daos_check_report_info *dcri; + int rc; + int i; + int j; D_ASSERT(pool->svc != NULL); /* - * The check leader (elected by control plane, usually on rank 0) and - * PS leader maybe on different ranks, do not exclude such two ranks. + * Do not exclude PS leader. Since we do not know which one is the leader, then the one to + * be excluded will be not any pool service replica. On the other hand, do not exclude the + * rank that needs to interact with user, this rank maybe (or maybe not) one of above pool + * service replicas. */ - count = pool->svc->rl_nr + 2; - if (!test_runable(arg, count)) { - print_message("Need enough targets (%u/%u vs %d) for test, skip\n", - arg->srv_nnodes, arg->srv_ntgts, count); - return 1; - } - for (i = 1, *rank = -1; i < count && *rank < 0; i++) { + dcri = cr_locate_dcri(dci, NULL, pool->pool_uuid); + + for (i = 0, *rank = -1; i < arg->srv_nnodes && *rank < 0; i++) { for (j = 0; j < pool->svc->rl_nr; j++) { if (pool->svc->rl_ranks[j] == i) break; } - if (j >= pool->svc->rl_nr) + if (j >= pool->svc->rl_nr && i != dci->dci_leader && i != dcri->dcri_rank) *rank = i; } - D_ASSERT(*rank >= 0); + if (unlikely(*rank < 0)) { + print_message("Not enough ranks (%u) for test, skip\n", arg->srv_nnodes); + return 1; + } rc = cr_debug_set_params(arg, DAOS_CHK_ENGINE_DEATH | DAOS_FAIL_ALWAYS); if (rc != 0) @@ -2896,7 +2901,7 @@ cr_engine_death(void **state) rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); assert_rc_equal(rc, 0); - rc = cr_rank_exclude(arg, &pool, &rank, true); + rc = cr_rank_exclude(arg, &dci, &pool, &rank, true); if (rc > 0) goto cleanup; assert_rc_equal(rc, 0); @@ -3000,7 +3005,7 @@ cr_engine_rejoin_succ(void **state) rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); assert_rc_equal(rc, 0); - rc = cr_rank_exclude(arg, &pool, &rank, false); + rc = cr_rank_exclude(arg, &dci, &pool, &rank, false); if (rc > 0) goto cleanup; assert_rc_equal(rc, 0); @@ -3116,7 +3121,7 @@ cr_engine_rejoin_fail(void **state) rc = cr_pool_verify(&dci, pool.pool_uuid, TCPS_PENDING, 1, &class, &action, NULL); assert_rc_equal(rc, 0); - rc = cr_rank_exclude(arg, &pool, &rank, true); + rc = cr_rank_exclude(arg, &dci, &pool, &rank, true); if (rc > 0) goto cleanup; assert_rc_equal(rc, 0); @@ -4071,7 +4076,7 @@ cr_scan_cont_parallel(void **state) FAULT_INJECTION_REQUIRED(); - print_message("CR30: scan multiple containers in parallel\n"); + print_message("CR32: scan multiple containers in parallel\n"); for (i = 0; i < 2; i++) { rc = cr_pool_create(state, &pools[i], true, TCC_NONE); @@ -4097,7 +4102,7 @@ cr_scan_cont_parallel(void **state) actions[i] = TCA_INTERACT; } - for (i = 0; i < 2; i++, once = false) { + for (i = 0, once = false; i < 2; i++) { again: cr_pool_wait(1, &pools[i].pool_uuid, &dcis[i]);