Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 236 additions & 55 deletions src/chk/chk_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@
#include "chk_internal.h"

struct chk_pool_bundle {
d_list_t *cpb_head;
uuid_t cpb_uuid;
uint32_t *cpb_shard_nr;
d_rank_t cpb_rank;
struct chk_instance *cpb_ins;
d_list_t *cpb_head;
uuid_t cpb_uuid;
uint32_t *cpb_shard_nr;
d_rank_t cpb_rank;
bool cpd_has_dending;
struct chk_instance *cpb_ins;
/* Pointer to the pool bookmark. */
struct chk_bookmark *cpb_bk;
void *cpb_data;
chk_pool_free_data_t cpb_free_cb;
struct chk_bookmark *cpb_bk;
void *cpb_data;
chk_pool_free_data_t cpb_free_cb;
};

static int
Expand Down Expand Up @@ -84,6 +85,9 @@ chk_pool_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov,
memcpy(&cpr->cpr_bk, cpb->cpb_bk, sizeof(cpr->cpr_bk));
cpr->cpr_ins = cpb->cpb_ins;

if (cpb->cpd_has_dending)
cpr->cpr_has_pending = 1;

rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr);
d_list_add_tail(&cpr->cpr_link, cpb->cpb_head);

Expand Down Expand Up @@ -162,6 +166,9 @@ chk_pool_update(struct btr_instance *tins, struct btr_record *rec,
cps->cps_data = cpb->cpb_data;
cps->cps_free_cb = cpb->cpb_free_cb;

if (cpb->cpd_has_dending)
cpr->cpr_has_pending = 1;

d_list_add_tail(&cps->cps_link, &cpr->cpr_shard_list);
cpr->cpr_shard_nr++;
if (cpb->cpb_shard_nr != NULL)
Expand All @@ -185,7 +192,6 @@ btr_ops_t chk_pool_ops = {
struct chk_pending_bundle {
struct chk_instance *cpb_ins;
d_list_t *cpb_pool_head;
d_list_t *cpb_rank_head;
uuid_t cpb_uuid;
d_rank_t cpb_rank;
uint32_t cpb_class;
Expand Down Expand Up @@ -247,11 +253,6 @@ chk_pending_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov,

d_list_add_tail(&cpr->cpr_ins_link, &cpb->cpb_ins->ci_pending_list);

if (cpb->cpb_rank_head != NULL)
d_list_add_tail(&cpr->cpr_rank_link, cpb->cpb_rank_head);
else
D_INIT_LIST_HEAD(&cpr->cpr_rank_link);

rec->rec_off = umem_ptr2off(&tins->ti_umm, cpr);
d_list_add_tail(&cpr->cpr_pool_link, cpb->cpb_pool_head);

Expand Down Expand Up @@ -279,7 +280,6 @@ chk_pending_free(struct btr_instance *tins, struct btr_record *rec, void *args)

rec->rec_off = UMOFF_NULL;
d_list_del_init(&cpr->cpr_pool_link);
d_list_del_init(&cpr->cpr_rank_link);
d_list_del_init(&cpr->cpr_ins_link);

if (val_iov != NULL) {
Expand Down Expand Up @@ -617,8 +617,8 @@ chk_pool_start_one(struct chk_instance *ins, uuid_t uuid, uint64_t gen)
}

cbk.cb_gen = gen;
rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid,
dss_self_rank(), &cbk, ins, NULL, NULL, NULL, NULL);
rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, dss_self_rank(), false,
&cbk, ins, NULL, NULL, NULL, NULL);

out:
return rc;
Expand Down Expand Up @@ -702,8 +702,8 @@ chk_pools_load_list(struct chk_instance *ins, uint64_t gen, uint32_t flags,
* persistently sometime later.
*/
cbk.cb_gen = gen;
rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, pools[i],
myrank, &cbk, ins, NULL, NULL, NULL, NULL);
rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, pools[i], myrank,
false, &cbk, ins, NULL, NULL, NULL, NULL);
if (rc != 0)
break;

Expand Down Expand Up @@ -765,8 +765,8 @@ chk_pools_load_from_db(struct sys_db *db, char *table, d_iov_t *key, void *args,
* persistently sometime later.
*/
cbk.cb_gen = ctpa->ctpa_gen;
rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid,
dss_self_rank(), &cbk, ins, NULL, NULL, NULL, NULL);
rc = chk_pool_add_shard(ins->ci_pool_hdl, &ins->ci_pool_list, uuid, dss_self_rank(), false,
&cbk, ins, NULL, NULL, NULL, NULL);
if (rc == 0 && ctpa->ctpa_phase > cbk.cb_phase)
ctpa->ctpa_phase = cbk.cb_phase;

Expand Down Expand Up @@ -873,10 +873,9 @@ chk_pool_handle_notify(struct chk_instance *ins, struct chk_iv *iv)
}

int
chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank,
struct chk_bookmark *bk, struct chk_instance *ins,
uint32_t *shard_nr, void *data, chk_pool_free_data_t free_cb,
struct chk_pool_rec **cpr)
chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank, bool has_pending,
struct chk_bookmark *bk, struct chk_instance *ins, uint32_t *shard_nr,
void *data, chk_pool_free_data_t free_cb, struct chk_pool_rec **cpr)
{
struct chk_pool_bundle rbund;
d_iov_t kiov;
Expand All @@ -887,11 +886,12 @@ chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t rank
rbund.cpb_head = head;
rbund.cpb_shard_nr = shard_nr;
uuid_copy(rbund.cpb_uuid, uuid);
rbund.cpb_rank = rank;
rbund.cpb_bk = bk;
rbund.cpb_ins = ins;
rbund.cpb_data = data;
rbund.cpb_free_cb = free_cb;
rbund.cpb_rank = rank;
rbund.cpd_has_dending = has_pending;
rbund.cpb_bk = bk;
rbund.cpb_ins = ins;
rbund.cpb_data = data;
rbund.cpb_free_cb = free_cb;

d_iov_set(&riov, &rbund, sizeof(rbund));
d_iov_set(&kiov, uuid, sizeof(uuid_t));
Expand Down Expand Up @@ -931,30 +931,9 @@ chk_pool_shard_cleanup(struct chk_instance *ins)
}
}

int
chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr)
{
d_iov_t kiov;
d_iov_t riov;
int rc;

d_iov_set(&riov, NULL, 0);
d_iov_set(&kiov, &seq, sizeof(seq));

ABT_rwlock_rdlock(ins->ci_abt_lock);
rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov);
ABT_rwlock_unlock(ins->ci_abt_lock);
if (rc == 0)
*cpr = (struct chk_pending_rec *)riov.iov_buf;
else
*cpr = NULL;

return rc;
}

int
chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid,
uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options,
static int
chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, uuid_t uuid, uint64_t seq,
uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options,
struct chk_pending_rec **cpr)
{
struct chk_pending_bundle rbund;
Expand All @@ -967,7 +946,6 @@ chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_he

uuid_copy(rbund.cpb_uuid, uuid);
rbund.cpb_pool_head = pool_head;
rbund.cpb_rank_head = rank_head;
rbund.cpb_ins = ins;
rbund.cpb_seq = seq;
rbund.cpb_rank = rank;
Expand Down Expand Up @@ -1060,6 +1038,163 @@ chk_pending_wakeup(struct chk_instance *ins, struct chk_pending_rec *cpr)
}

int
chk_report(struct chk_instance *ins, struct chk_report_unit *cru, uint64_t *seq, int *decision)
{
struct chk_pending_rec *cpr = NULL;
struct chk_pool_rec *pool = NULL;
char uuid_str[DAOS_UUID_STR_SIZE];
d_iov_t kiov;
d_iov_t riov;
int rc;

CHK_IS_READY(ins);

if (cru->cru_result == 0 && ins->ci_prop.cp_flags & CHK__CHECK_FLAG__CF_DRYRUN)
cru->cru_result = CHK__CHECK_RESULT__DRY_RUN;

if (*seq == 0) {
new_seq:
*seq = chk_report_seq_gen(ins);
}

D_INFO("Report on %u (%s) with seq " DF_X64 " class %u, action %u, %s, result %d\n",
cru->cru_rank, ins->ci_is_leader ? "leader" : "engine", *seq, cru->cru_cla,
cru->cru_act, cru->cru_msg, cru->cru_result);

if (cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) {
if (cru->cru_pool == NULL)
D_GOTO(log, rc = -DER_INVAL);

d_iov_set(&riov, NULL, 0);
d_iov_set(&kiov, cru->cru_pool, sizeof(uuid_t));
rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov);
if (rc != 0)
goto log;

pool = (struct chk_pool_rec *)riov.iov_buf;

rc = chk_pending_add(ins, &pool->cpr_pending_list, *cru->cru_pool, *seq,
cru->cru_rank, cru->cru_cla, cru->cru_option_nr,
cru->cru_options, &cpr);
if (unlikely(rc == -DER_AGAIN))
goto new_seq;

if (rc != 0)
goto log;
}

rc = chk_report_upcall(cru->cru_gen, *seq, cru->cru_cla, cru->cru_act, cru->cru_result,
cru->cru_rank, cru->cru_target, cru->cru_pool, cru->cru_pool_label,
cru->cru_cont, cru->cru_cont_label, cru->cru_obj, cru->cru_dkey,
cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options,
cru->cru_detail_nr, cru->cru_details);
/* Check cpr->cpr_action for the case of "dmg check repair" by race. */
if (rc == 0 && pool != NULL &&
likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) {
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING;
chk_uuid_unparse(ins, *cru->cru_pool, uuid_str);
rc = chk_bk_update_pool(&pool->cpr_bk, uuid_str);
}

log:
if (rc != 0) {
D_ERROR("Failed to handle report from rank %u (%s) with seq " DF_X64 ", class %u, "
"action %u, handle_rc %d, report_rc %d\n",
cru->cru_rank, ins->ci_is_leader ? "leader" : "engine", *seq, cru->cru_cla,
cru->cru_act, cru->cru_result, rc);
goto out;
}

if (decision == NULL || cpr == NULL)
goto out;

D_ASSERT(cpr->cpr_busy);

D_INFO("Need interaction for class %u with seq " DF_X64 "\n", cru->cru_cla, *seq);

ABT_mutex_lock(cpr->cpr_mutex);

again:
if (cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) {
*decision = cpr->cpr_action;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
}

if (!ins->ci_sched_running || ins->ci_sched_exiting || cpr->cpr_exiting) {
rc = 1;
ABT_mutex_unlock(cpr->cpr_mutex);
goto out;
}

ABT_cond_wait(cpr->cpr_cond, cpr->cpr_mutex);

goto again;

out:
if ((rc != 0 || decision != NULL) && cpr != NULL)
chk_pending_destroy(ins, cpr);

if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING &&
d_list_empty(&pool->cpr_pending_list)) {
pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING;
chk_uuid_unparse(ins, *cru->cru_pool, uuid_str);
chk_bk_update_pool(&pool->cpr_bk, uuid_str);
}

return rc;
}

int
chk_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act)
{
struct chk_pending_rec *cpr = NULL;
int rc;

CHK_IS_READY(ins);

rc = chk_pending_del(ins, seq, &cpr);
if (rc == 0) {
/* The cpr will be destroyed by the waiter via chk_engine_report(). */
D_ASSERT(cpr->cpr_busy);

ABT_mutex_lock(cpr->cpr_mutex);
/*
* It is the control plane's duty to guarantee that the decision is a valid
* action from the report options. Otherwise, related inconsistency will be
* ignored.
*/
cpr->cpr_action = act;
ABT_cond_broadcast(cpr->cpr_cond);
ABT_mutex_unlock(cpr->cpr_mutex);
}

return rc;
}

int
chk_act(uint64_t seq, uint32_t act)
{
int rc = -DER_INVAL;

if (likely(act != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) {
if (chk_report_seq_leader(seq))
rc = chk_leader_act(seq, act);
else
rc = chk_engine_act(seq, act);
}

D_CDEBUG(rc != 0 && rc != -DER_NONEXIST && rc != -DER_NO_HDL, DLOG_ERR, DLOG_INFO,
"CHK repair on rank %u, act %u, seq " DF_X64 ": " DF_RC "\n", dss_self_rank(), act,
seq, DP_RC(rc));

if (rc == -DER_NONEXIST || rc == -DER_NO_HDL)
rc = 0;

return rc;
}

static int
chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_property *prop)
{
int changed;
Expand All @@ -1080,6 +1215,52 @@ chk_policy_refresh(uint32_t policy_nr, struct chk_policy *policies, struct chk_p
return changed;
}

int
chk_set_policy(struct chk_instance *ins, uint64_t gen, uint32_t policy_nr,
struct chk_policy *policies)
{
struct chk_bookmark *cbk = &ins->ci_bk;
struct chk_property *prop = &ins->ci_prop;
struct chk_pending_rec *pending;
struct chk_pending_rec *tmp;
int rc;

CHK_IS_READY(ins);

/* Do nothing if no check instance is running. */
if ((ins->ci_is_leader && cbk->cb_magic != CHK_BK_MAGIC_LEADER) ||
(!ins->ci_is_leader && cbk->cb_magic != CHK_BK_MAGIC_ENGINE) ||
cbk->cb_ins_status != CHK__CHECK_INST_STATUS__CIS_RUNNING || cbk->cb_gen != gen)
D_GOTO(out, rc = -DER_NOTAPPLICABLE);

rc = chk_policy_refresh(policy_nr, policies, prop);
if (rc <= 0)
goto out;

if (ins->ci_is_leader) {
rc = chk_set_policy_remote(ins->ci_ranks, cbk->cb_gen, policy_nr, policies);
if (rc != 0)
goto out;
}

rc = chk_prop_update(prop, NULL);
if (rc != 0)
goto out;

d_list_for_each_entry_safe(pending, tmp, &ins->ci_pending_list, cpr_ins_link) {
if (chk_is_valid_action(pending, ins->ci_prop.cp_policies[pending->cpr_class])) {
d_list_del(&pending->cpr_ins_link);
d_list_add_tail(&pending->cpr_ins_link, &ins->ci_interaction_filter_list);
}
}

out:
DL_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, rc, "set policy on rank %d (%s) with gen " DF_U64,
dss_self_rank(), ins->ci_is_leader ? "leader" : "engine", gen);

return rc == -DER_NOTAPPLICABLE ? 0 : rc;
}

int
chk_prop_prepare(d_rank_t leader, uint32_t flags, uint32_t policy_nr, struct chk_policy *policies,
d_rank_list_t *ranks, struct chk_property *prop)
Expand Down
Loading
Loading