Skip to content

Commit

Permalink
[BugFix] Fix routine load job unable to automatically resume in share…
Browse files Browse the repository at this point in the history
…d_data mode (StarRocks#39327)

Signed-off-by: wyb <wybb86@gmail.com>
Co-authored-by: Kevin Cai <caixh.kevin@gmail.com>
  • Loading branch information
wyb and kevincai authored Jan 18, 2024
1 parent b328767 commit 272edd6
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
8 changes: 7 additions & 1 deletion fe/fe-core/src/main/java/com/starrocks/lake/StarOSAgent.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import com.staros.util.LockCloseable;
import com.starrocks.common.Config;
import com.starrocks.common.DdlException;
import com.starrocks.common.InternalErrorCode;
import com.starrocks.common.UserException;
import com.starrocks.server.GlobalStateMgr;
import com.starrocks.system.ComputeNode;
Expand Down Expand Up @@ -585,7 +586,12 @@ public long getPrimaryComputeNodeIdByShard(long shardId) throws UserException {
public long getPrimaryComputeNodeIdByShard(long shardId, long workerGroupId) throws UserException {
Set<Long> backendIds = getAllBackendIdsByShard(shardId, workerGroupId, true);
if (backendIds.isEmpty()) {
throw new UserException("Failed to get primary backend. shard id: " + shardId);
// If BE stops, routine load task may catch UserException during load plan,
// and the job state will changed to PAUSED.
// The job will automatically recover from PAUSED to RUNNING if the error code is REPLICA_FEW_ERR
// when all BEs become alive.
throw new UserException(InternalErrorCode.REPLICA_FEW_ERR,
"Failed to get primary backend. shard id: " + shardId);
}
return backendIds.iterator().next();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.


package com.starrocks.lake;

import com.google.common.collect.Lists;
Expand All @@ -26,6 +25,7 @@
import com.staros.proto.WorkerGroupDetailInfo;
import com.staros.proto.WorkerInfo;
import com.staros.proto.WorkerState;
import com.starrocks.common.InternalErrorCode;
import com.starrocks.common.UserException;
import com.starrocks.common.jmockit.Deencapsulation;
import com.starrocks.server.GlobalStateMgr;
Expand Down Expand Up @@ -170,4 +170,50 @@ public void testGetBackendIdsByShardMissingStarletPort() throws StarClientExcept
workerToBackend.clear();
}
}

@Test
public void testGetPrimaryComputeNodeIdByShard() throws StarClientException, UserException {
String workerHost = "127.0.0.1";
int workerStarletPort = 9070;
int workerHeartbeatPort = 9050;
long shardId = 10L;

WorkerInfo workerInfo = WorkerInfo.newBuilder()
.setIpPort(String.format("%s:%d", workerHost, workerStarletPort))
.setWorkerId(1L)
.setWorkerState(WorkerState.ON)
.putWorkerProperties("be_heartbeat_port", String.valueOf(workerHeartbeatPort))
.putWorkerProperties("be_brpc_port", "8060")
.build();

ReplicaInfo replica = ReplicaInfo.newBuilder()
.setReplicaRole(ReplicaRole.PRIMARY)
.setWorkerInfo(workerInfo.toBuilder().build())
.build();

ShardInfo shardInfo0 = ShardInfo.newBuilder().setShardId(shardId)
.addReplicaInfo(replica)
.build();

ShardInfo shardInfo1 = ShardInfo.newBuilder().setShardId(shardId)
.build();

new Expectations() {
{
client.getShardInfo("1", Lists.newArrayList(shardId), StarOSAgent.DEFAULT_WORKER_GROUP_ID);
minTimes = 0;
returns(Lists.newArrayList(shardInfo0), Lists.newArrayList(shardInfo1));
}
};

Deencapsulation.setField(starosAgent, "serviceId", "1");
Map<Long, Long> workerToBackend = Maps.newHashMap();
workerToBackend.put(1L, 2L);
Deencapsulation.setField(starosAgent, "workerToBackend", workerToBackend);

Assert.assertEquals(2, starosAgent.getPrimaryComputeNodeIdByShard(shardId));
UserException exception =
Assert.assertThrows(UserException.class, () -> starosAgent.getPrimaryComputeNodeIdByShard(shardId));
Assert.assertEquals(InternalErrorCode.REPLICA_FEW_ERR, exception.getErrorCode());
}
}

0 comments on commit 272edd6

Please sign in to comment.