Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -357,10 +357,7 @@ private Long getCachedBackend(String cluster, long tableId) {
return null;
}
Backend backend = Env.getCurrentSystemInfo().getBackend(backendId);
if (backend != null && backend.isAlive() && !backend.isDecommissioned()
&& (!Config.isCloudMode() || !backend.isDecommissioning())
&& (!Config.isCloudMode() || cluster == null
|| cluster.equals(backend.getCloudClusterName()))) {
if (isBackendAvailable(backend, cluster)) {
return backend.getId();
} else {
tableToBeMap.remove(encode(cluster, tableId));
Expand All @@ -372,13 +369,26 @@ private Long getCachedBackend(String cluster, long tableId) {
return null;
}

private boolean isBackendAvailable(Backend backend, String cluster) {
if (backend == null || !backend.isAlive() || backend.isDecommissioned() || !backend.isLoadAvailable()) {
return false;
}
if (!Config.isCloudMode()) {
return true;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Observability issue] Now that isBackendAvailable() rejects backends based on isLoadAvailable() (which checks isLoadDisabled and isShutDown), the error messages in selectBackendForCloudGroupCommitInternal (line ~303) and selectBackendForLocalGroupCommitInternal (line ~338) should include isLoadDisabled and isShutDown fields. Currently they only log alive, active, decommissioned (and decommissioning in the cloud version).

Without this, when all backends are alive but load-disabled, operators will see a misleading error showing all backends as healthy.

Suggested addition to the error message map lambdas:

+ ", loadDisabled=" + be.isLoadDisabled() + ", shutdown=" + be.isShutDown()

(Note: isShutDown() is private — you may need to add isLoadAvailable() to the message instead, or make isShutDown accessible.)

// for cloud mode
if (backend.isDecommissioning()) {
return false;
}
return cluster == null || cluster.equals(backend.getCloudClusterName());
}

@Nullable
private Long getRandomBackend(String cluster, long tableId, List<Backend> backends) {
OlapTable table = (OlapTable) Env.getCurrentEnv().getInternalCatalog().getTableByTableId(tableId);
Collections.shuffle(backends);
for (Backend backend : backends) {
if (backend.isAlive() && !backend.isDecommissioned() && (!Config.isCloudMode()
|| !backend.isDecommissioning())) {
if (isBackendAvailable(backend, cluster)) {
tableToBeMap.put(encode(cluster, tableId), backend.getId());
tableToPressureMap.put(tableId,
new SlidingWindowCounter(table.getGroupCommitIntervalMs() / 1000 + 1));
Expand Down
Loading