---初始化项目
This commit is contained in:
40
powerjob-server/powerjob-server-core/pom.xml
Normal file
40
powerjob-server/powerjob-server-core/pom.xml
Normal file
@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>powerjob-server</artifactId>
|
||||
<groupId>tech.powerjob</groupId>
|
||||
<version>5.1.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>powerjob-server-core</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>8</maven.compiler.source>
|
||||
<maven.compiler.target>8</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>tech.powerjob</groupId>
|
||||
<artifactId>powerjob-server-extension</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>tech.powerjob</groupId>
|
||||
<artifactId>powerjob-server-remote</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>tech.powerjob</groupId>
|
||||
<artifactId>powerjob-server-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>tech.powerjob</groupId>
|
||||
<artifactId>powerjob-server-persistence</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@ -0,0 +1,235 @@
|
||||
package tech.powerjob.server.core;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import tech.powerjob.common.RemoteConstant;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.enums.ExecuteType;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.ProcessorType;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.request.ServerScheduleJobReq;
|
||||
import tech.powerjob.remote.framework.base.URL;
|
||||
import tech.powerjob.server.common.Holder;
|
||||
import tech.powerjob.server.common.module.WorkerInfo;
|
||||
import tech.powerjob.server.core.instance.InstanceManager;
|
||||
import tech.powerjob.server.core.instance.InstanceMetadataService;
|
||||
import tech.powerjob.server.core.lock.UseCacheLock;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.remote.transporter.TransportService;
|
||||
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
|
||||
import tech.powerjob.server.remote.worker.selector.TaskTrackerSelectorService;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static tech.powerjob.common.enums.InstanceStatus.*;
|
||||
|
||||
|
||||
/**
|
||||
* 派送服务(将任务从Server派发到Worker)
|
||||
*
|
||||
* @author tjq
|
||||
* @author Echo009
|
||||
* @since 2020/4/5
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DispatchService {
|
||||
|
||||
private final TransportService transportService;
|
||||
|
||||
private final WorkerClusterQueryService workerClusterQueryService;
|
||||
|
||||
private final InstanceManager instanceManager;
|
||||
|
||||
private final InstanceMetadataService instanceMetadataService;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final TaskTrackerSelectorService taskTrackerSelectorService;
|
||||
|
||||
/**
|
||||
* 异步重新派发
|
||||
*
|
||||
* @param instanceId 实例 ID
|
||||
*/
|
||||
@UseCacheLock(type = "processJobInstance", key = "#instanceId", concurrencyLevel = 1024)
|
||||
public void redispatchAsync(Long instanceId, int originStatus) {
|
||||
// 将状态重置为等待派发
|
||||
instanceInfoRepository.updateStatusAndGmtModifiedByInstanceIdAndOriginStatus(instanceId, originStatus, InstanceStatus.WAITING_DISPATCH.getV(), new Date());
|
||||
}
|
||||
|
||||
/**
|
||||
* 异步批量重新派发,不加锁
|
||||
*/
|
||||
public void redispatchBatchAsyncLockFree(List<Long> instanceIdList, int originStatus) {
|
||||
// 将状态重置为等待派发
|
||||
instanceInfoRepository.updateStatusAndGmtModifiedByInstanceIdListAndOriginStatus(instanceIdList, originStatus, InstanceStatus.WAITING_DISPATCH.getV(), new Date());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 将任务从Server派发到Worker(TaskTracker)
|
||||
* 只会派发当前状态为等待派发的任务实例
|
||||
* **************************************************
|
||||
* 2021-02-03 modify by Echo009
|
||||
* 1、移除参数 当前运行次数、工作流实例ID、实例参数
|
||||
* 更改为从当前任务实例中获取获取以上信息
|
||||
* 2、移除运行次数相关的(runningTimes)处理逻辑
|
||||
* 迁移至 {@link InstanceManager#updateStatus} 中处理
|
||||
* **************************************************
|
||||
*
|
||||
* @param jobInfo 任务的元信息
|
||||
* @param instanceId 任务实例ID
|
||||
* @param instanceInfoOptional 任务实例信息,可选
|
||||
* @param overloadOptional 超载信息,可选
|
||||
*/
|
||||
@UseCacheLock(type = "processJobInstance", key = "#jobInfo.getMaxInstanceNum() > 0 || T(tech.powerjob.common.enums.TimeExpressionType).FREQUENT_TYPES.contains(#jobInfo.getTimeExpressionType()) ? #jobInfo.getId() : #instanceId", concurrencyLevel = 1024)
|
||||
public void dispatch(JobInfoDO jobInfo, Long instanceId, Optional<InstanceInfoDO> instanceInfoOptional, Optional<Holder<Boolean>> overloadOptional) {
|
||||
// 允许从外部传入实例信息,减少 io 次数
|
||||
// 检查当前任务是否被取消
|
||||
InstanceInfoDO instanceInfo = instanceInfoOptional.orElseGet(() -> instanceInfoRepository.findByInstanceId(instanceId));
|
||||
Long jobId = instanceInfo.getJobId();
|
||||
if (CANCELED.getV() == instanceInfo.getStatus()) {
|
||||
log.info("[Dispatcher-{}|{}] cancel dispatch due to instance has been canceled", jobId, instanceId);
|
||||
return;
|
||||
}
|
||||
// 已经被派发过则不再派发
|
||||
// fix 并发场景下重复派发的问题
|
||||
if (instanceInfo.getStatus() != WAITING_DISPATCH.getV()) {
|
||||
log.info("[Dispatcher-{}|{}] cancel dispatch due to instance has been dispatched", jobId, instanceId);
|
||||
return;
|
||||
}
|
||||
// 任务信息已经被删除
|
||||
if (jobInfo.getId() == null) {
|
||||
log.warn("[Dispatcher-{}|{}] cancel dispatch due to job(id={}) has been deleted!", jobId, instanceId, jobId);
|
||||
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, "can't find job by id " + jobId);
|
||||
return;
|
||||
}
|
||||
|
||||
Date now = new Date();
|
||||
String dbInstanceParams = instanceInfo.getInstanceParams() == null ? "" : instanceInfo.getInstanceParams();
|
||||
log.info("[Dispatcher-{}|{}] start to dispatch job: {};instancePrams: {}.", jobId, instanceId, jobInfo, dbInstanceParams);
|
||||
|
||||
// 查询当前运行的实例数
|
||||
long current = System.currentTimeMillis();
|
||||
Integer maxInstanceNum = jobInfo.getMaxInstanceNum();
|
||||
// 秒级任务只派发到一台机器,具体的 maxInstanceNum 由 TaskTracker 控制
|
||||
if (TimeExpressionType.FREQUENT_TYPES.contains(jobInfo.getTimeExpressionType())) {
|
||||
maxInstanceNum = 1;
|
||||
}
|
||||
|
||||
// 0 代表不限制在线任务,还能省去一次 DB 查询
|
||||
if (maxInstanceNum > 0) {
|
||||
// 不统计 WAITING_DISPATCH 的状态:使用 OpenAPI 触发的延迟任务不应该统计进去(比如 delay 是 1 天)
|
||||
// 由于不统计 WAITING_DISPATCH,所以这个 runningInstanceCount 不包含本任务自身
|
||||
long runningInstanceCount = instanceInfoRepository.countByJobIdAndStatusIn(jobId, Lists.newArrayList(WAITING_WORKER_RECEIVE.getV(), RUNNING.getV()));
|
||||
// 超出最大同时运行限制,不执行调度
|
||||
if (runningInstanceCount >= maxInstanceNum) {
|
||||
String result = String.format(SystemInstanceResult.TOO_MANY_INSTANCES, runningInstanceCount, maxInstanceNum);
|
||||
log.warn("[Dispatcher-{}|{}] cancel dispatch job due to too much instance is running ({} > {}).", jobId, instanceId, runningInstanceCount, maxInstanceNum);
|
||||
instanceInfoRepository.update4TriggerFailed(instanceId, FAILED.getV(), current, current, RemoteConstant.EMPTY_ADDRESS, result, now);
|
||||
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// 获取当前最合适的 worker 列表
|
||||
List<WorkerInfo> suitableWorkers = workerClusterQueryService.geAvailableWorkers(jobInfo);
|
||||
|
||||
if (CollectionUtils.isEmpty(suitableWorkers)) {
|
||||
log.warn("[Dispatcher-{}|{}] cancel dispatch job due to no worker available", jobId, instanceId);
|
||||
instanceInfoRepository.update4TriggerFailed(instanceId, FAILED.getV(), current, current, RemoteConstant.EMPTY_ADDRESS, SystemInstanceResult.NO_WORKER_AVAILABLE, now);
|
||||
|
||||
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, SystemInstanceResult.NO_WORKER_AVAILABLE);
|
||||
return;
|
||||
}
|
||||
// 判断是否超载,在所有可用 worker 超载的情况下直接跳过当前任务
|
||||
suitableWorkers = filterOverloadWorker(suitableWorkers);
|
||||
if (suitableWorkers.isEmpty()) {
|
||||
// 直接取消派发,减少一次数据库 io
|
||||
overloadOptional.ifPresent(booleanHolder -> booleanHolder.set(true));
|
||||
log.warn("[Dispatcher-{}|{}] cancel to dispatch job due to all worker is overload", jobId, instanceId);
|
||||
return;
|
||||
}
|
||||
List<String> workerIpList = suitableWorkers.stream().map(WorkerInfo::getAddress).collect(Collectors.toList());
|
||||
// 构造任务调度请求
|
||||
ServerScheduleJobReq req = constructServerScheduleJobReq(jobInfo, instanceInfo, workerIpList);
|
||||
|
||||
// 发送请求(不可靠,需要一个后台线程定期轮询状态)
|
||||
WorkerInfo taskTracker = taskTrackerSelectorService.select(jobInfo, instanceInfo, suitableWorkers);
|
||||
String taskTrackerAddress = taskTracker.getAddress();
|
||||
|
||||
URL workerUrl = ServerURLFactory.dispatchJob2Worker(taskTrackerAddress);
|
||||
transportService.tell(taskTracker.getProtocol(), workerUrl, req);
|
||||
log.info("[Dispatcher-{}|{}] send schedule request to TaskTracker[protocol:{},address:{}] successfully: {}.", jobId, instanceId, taskTracker.getProtocol(), taskTrackerAddress, req);
|
||||
|
||||
// 修改状态
|
||||
instanceInfoRepository.update4TriggerSucceed(instanceId, WAITING_WORKER_RECEIVE.getV(), current, taskTrackerAddress, now, instanceInfo.getStatus());
|
||||
// 装载缓存
|
||||
instanceMetadataService.loadJobInfo(instanceId, jobInfo);
|
||||
}
|
||||
|
||||
private List<WorkerInfo> filterOverloadWorker(List<WorkerInfo> suitableWorkers) {
|
||||
|
||||
List<WorkerInfo> res = new ArrayList<>(suitableWorkers.size());
|
||||
for (WorkerInfo suitableWorker : suitableWorkers) {
|
||||
if (suitableWorker.overload()){
|
||||
continue;
|
||||
}
|
||||
res.add(suitableWorker);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构造任务调度请求
|
||||
*/
|
||||
private ServerScheduleJobReq constructServerScheduleJobReq(JobInfoDO jobInfo, InstanceInfoDO instanceInfo, List<String> finalWorkersIpList) {
|
||||
// 构造请求
|
||||
ServerScheduleJobReq req = new ServerScheduleJobReq();
|
||||
BeanUtils.copyProperties(jobInfo, req);
|
||||
// 传入 JobId
|
||||
req.setJobId(jobInfo.getId());
|
||||
// 传入 InstanceParams
|
||||
if (StringUtils.isEmpty(instanceInfo.getInstanceParams())) {
|
||||
req.setInstanceParams(null);
|
||||
} else {
|
||||
req.setInstanceParams(instanceInfo.getInstanceParams());
|
||||
}
|
||||
// 覆盖静态参数
|
||||
if (!StringUtils.isEmpty(instanceInfo.getJobParams())) {
|
||||
req.setJobParams(instanceInfo.getJobParams());
|
||||
}
|
||||
req.setInstanceId(instanceInfo.getInstanceId());
|
||||
req.setAllWorkerAddress(finalWorkersIpList);
|
||||
req.setMaxWorkerCount(jobInfo.getMaxWorkerCount());
|
||||
|
||||
// 设置工作流ID
|
||||
req.setWfInstanceId(instanceInfo.getWfInstanceId());
|
||||
|
||||
req.setExecuteType(ExecuteType.of(jobInfo.getExecuteType()).name());
|
||||
req.setProcessorType(ProcessorType.of(jobInfo.getProcessorType()).name());
|
||||
|
||||
req.setTimeExpressionType(TimeExpressionType.of(jobInfo.getTimeExpressionType()).name());
|
||||
if (jobInfo.getInstanceTimeLimit() != null) {
|
||||
req.setInstanceTimeoutMS(jobInfo.getInstanceTimeLimit());
|
||||
}
|
||||
req.setThreadConcurrency(jobInfo.getConcurrency());
|
||||
req.setMeta(instanceInfo.getMeta());
|
||||
return req;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,53 @@
|
||||
package tech.powerjob.server.core.alarm;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Queues;
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.utils.SysUtils;
|
||||
import tech.powerjob.server.extension.alarm.Alarm;
|
||||
import tech.powerjob.server.extension.alarm.AlarmTarget;
|
||||
import tech.powerjob.server.extension.alarm.Alarmable;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* 报警服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/19
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class AlarmCenter {
|
||||
|
||||
private final ExecutorService POOL;
|
||||
|
||||
private final List<Alarmable> BEANS = Lists.newLinkedList();
|
||||
|
||||
public AlarmCenter(List<Alarmable> alarmables) {
|
||||
int cores = SysUtils.availableProcessors();
|
||||
ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("AlarmPool-%d").build();
|
||||
POOL = new ThreadPoolExecutor(cores, cores, 5, TimeUnit.MINUTES, Queues.newLinkedBlockingQueue(), factory);
|
||||
|
||||
alarmables.forEach(bean -> {
|
||||
BEANS.add(bean);
|
||||
log.info("[AlarmCenter] bean(className={},obj={}) register to AlarmCenter successfully!", bean.getClass().getName(), bean);
|
||||
});
|
||||
}
|
||||
|
||||
public void alarmFailed(Alarm alarm, List<AlarmTarget> alarmTargets) {
|
||||
POOL.execute(() -> BEANS.forEach(alarmable -> {
|
||||
try {
|
||||
alarmable.onFailed(alarm, alarmTargets);
|
||||
}catch (Exception e) {
|
||||
log.warn("[AlarmCenter] alarm failed.", e);
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
package tech.powerjob.server.core.alarm;
|
||||
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import tech.powerjob.common.utils.CollectionUtils;
|
||||
import tech.powerjob.server.extension.alarm.AlarmTarget;
|
||||
import tech.powerjob.server.persistence.remote.model.UserInfoDO;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* AlarmUtils
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2023/7/31
|
||||
*/
|
||||
public class AlarmUtils {
|
||||
|
||||
public static List<AlarmTarget> convertUserInfoList2AlarmTargetList(List<UserInfoDO> userInfoDOS) {
|
||||
if (CollectionUtils.isEmpty(userInfoDOS)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return userInfoDOS.stream().map(AlarmUtils::convertUserInfo2AlarmTarget).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static AlarmTarget convertUserInfo2AlarmTarget(UserInfoDO userInfoDO) {
|
||||
AlarmTarget alarmTarget = new AlarmTarget();
|
||||
BeanUtils.copyProperties(userInfoDO, alarmTarget);
|
||||
|
||||
alarmTarget.setName(userInfoDO.getUsername());
|
||||
return alarmTarget;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,114 @@
|
||||
package tech.powerjob.server.core.alarm.impl;
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.OmsConstant;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.utils.NetUtils;
|
||||
import tech.powerjob.server.common.PowerJobServerConfigKey;
|
||||
import tech.powerjob.server.common.SJ;
|
||||
import tech.powerjob.server.extension.alarm.AlarmTarget;
|
||||
import tech.powerjob.server.extension.alarm.Alarmable;
|
||||
import tech.powerjob.server.extension.alarm.Alarm;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 钉钉告警服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/8/6
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DingTalkAlarmService implements Alarmable {
|
||||
|
||||
private final Environment environment;
|
||||
|
||||
private Long agentId;
|
||||
private DingTalkUtils dingTalkUtils;
|
||||
private Cache<String, String> mobile2UserIdCache;
|
||||
|
||||
private static final int CACHE_SIZE = 8192;
|
||||
/**
|
||||
* 防止缓存击穿
|
||||
*/
|
||||
private static final String EMPTY_TAG = "EMPTY";
|
||||
|
||||
@Override
|
||||
public void onFailed(Alarm alarm, List<AlarmTarget> targetUserList) {
|
||||
if (dingTalkUtils == null) {
|
||||
return;
|
||||
}
|
||||
Set<String> userIds = Sets.newHashSet();
|
||||
targetUserList.forEach(user -> {
|
||||
String phone = user.getPhone();
|
||||
if (StringUtils.isEmpty(phone)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
String userId = mobile2UserIdCache.get(phone, () -> {
|
||||
try {
|
||||
return dingTalkUtils.fetchUserIdByMobile(phone);
|
||||
} catch (PowerJobException ignore) {
|
||||
return EMPTY_TAG;
|
||||
} catch (Exception ignore) {
|
||||
return null;
|
||||
}
|
||||
});
|
||||
if (!EMPTY_TAG.equals(userId)) {
|
||||
userIds .add(userId);
|
||||
}
|
||||
}catch (Exception ignore) {
|
||||
}
|
||||
});
|
||||
userIds.remove(null);
|
||||
|
||||
if (!userIds.isEmpty()) {
|
||||
String userListStr = SJ.COMMA_JOINER.skipNulls().join(userIds);
|
||||
List<DingTalkUtils.MarkdownEntity> markdownEntities = Lists.newLinkedList();
|
||||
markdownEntities.add(new DingTalkUtils.MarkdownEntity("server", NetUtils.getLocalHost()));
|
||||
String content = alarm.fetchContent().replaceAll(OmsConstant.LINE_SEPARATOR, OmsConstant.COMMA);
|
||||
markdownEntities.add(new DingTalkUtils.MarkdownEntity("content", content));
|
||||
|
||||
try {
|
||||
dingTalkUtils.sendMarkdownAsync(alarm.fetchTitle(), markdownEntities, userListStr, agentId);
|
||||
}catch (Exception e) {
|
||||
log.error("[DingTalkAlarmService] send ding message failed, reason is {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
String agentId = environment.getProperty(PowerJobServerConfigKey.DING_AGENT_ID);
|
||||
String appKey = environment.getProperty(PowerJobServerConfigKey.DING_APP_KEY);
|
||||
String appSecret = environment.getProperty(PowerJobServerConfigKey.DING_APP_SECRET);
|
||||
|
||||
log.info("[DingTalkAlarmService] init with appKey:{},appSecret:{},agentId:{}", appKey, appSecret, agentId);
|
||||
|
||||
if (StringUtils.isAnyBlank(agentId, appKey, appSecret)) {
|
||||
log.warn("[DingTalkAlarmService] cannot get agentId, appKey, appSecret at the same time, this service is unavailable");
|
||||
return;
|
||||
}
|
||||
if (!StringUtils.isNumeric(agentId)) {
|
||||
log.warn("[DingTalkAlarmService] DingTalkAlarmService is unavailable due to invalid agentId: {}", agentId);
|
||||
return;
|
||||
}
|
||||
this.agentId = Long.valueOf(agentId);
|
||||
dingTalkUtils = new DingTalkUtils(appKey, appSecret);
|
||||
mobile2UserIdCache = CacheBuilder.newBuilder().maximumSize(CACHE_SIZE).softValues().build();
|
||||
log.info("[DingTalkAlarmService] init DingTalkAlarmService successfully!");
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,131 @@
|
||||
package tech.powerjob.server.core.alarm.impl;
|
||||
|
||||
import com.dingtalk.api.DefaultDingTalkClient;
|
||||
import com.dingtalk.api.DingTalkClient;
|
||||
import com.dingtalk.api.request.OapiGettokenRequest;
|
||||
import com.dingtalk.api.request.OapiMessageCorpconversationAsyncsendV2Request;
|
||||
import com.dingtalk.api.request.OapiUserGetByMobileRequest;
|
||||
import com.dingtalk.api.response.OapiGettokenResponse;
|
||||
import com.dingtalk.api.response.OapiUserGetByMobileResponse;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.http.HttpMethod;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* 钉钉工具类
|
||||
* 工作通知消息:https://ding-doc.dingtalk.com/doc#/serverapi2/pgoxpy
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/8/8
|
||||
*/
|
||||
@Slf4j
|
||||
public class DingTalkUtils implements Closeable {
|
||||
|
||||
private String accessToken;
|
||||
|
||||
private final DingTalkClient sendMsgClient;
|
||||
private final DingTalkClient accessTokenClient;
|
||||
private final DingTalkClient userIdClient;
|
||||
private final ScheduledExecutorService scheduledPool;
|
||||
|
||||
private static final long FLUSH_ACCESS_TOKEN_RATE = 6000;
|
||||
private static final String GET_TOKEN_URL = "https://oapi.dingtalk.com/gettoken";
|
||||
private static final String SEND_URL = "https://oapi.dingtalk.com/topapi/message/corpconversation/asyncsend_v2";
|
||||
private static final String GET_USER_ID_URL = "https://oapi.dingtalk.com/user/get_by_mobile";
|
||||
|
||||
|
||||
public DingTalkUtils(String appKey, String appSecret) {
|
||||
|
||||
this.sendMsgClient = new DefaultDingTalkClient(SEND_URL);
|
||||
this.accessTokenClient = new DefaultDingTalkClient(GET_TOKEN_URL);
|
||||
this.userIdClient = new DefaultDingTalkClient(GET_USER_ID_URL);
|
||||
|
||||
refreshAccessToken(appKey, appSecret);
|
||||
|
||||
if (StringUtils.isEmpty(accessToken)) {
|
||||
throw new PowerJobException("fetch AccessToken failed, please check your appKey & appSecret");
|
||||
}
|
||||
|
||||
scheduledPool = Executors.newSingleThreadScheduledExecutor();
|
||||
scheduledPool.scheduleAtFixedRate(() -> refreshAccessToken(appKey, appSecret), FLUSH_ACCESS_TOKEN_RATE, FLUSH_ACCESS_TOKEN_RATE, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 AccessToken,AccessToken 是调用其他接口的基础,有效期 7200 秒,需要不断刷新
|
||||
* @param appKey 应用 appKey
|
||||
* @param appSecret 应用 appSecret
|
||||
*/
|
||||
private void refreshAccessToken(String appKey, String appSecret) {
|
||||
try {
|
||||
OapiGettokenRequest req = new OapiGettokenRequest();
|
||||
req.setAppkey(appKey);
|
||||
req.setAppsecret(appSecret);
|
||||
req.setHttpMethod(HttpMethod.GET.name());
|
||||
OapiGettokenResponse rsp = accessTokenClient.execute(req);
|
||||
|
||||
if (rsp.isSuccess()) {
|
||||
accessToken = rsp.getAccessToken();
|
||||
}else {
|
||||
log.warn("[DingTalkUtils] flush accessToken failed with req({}),code={},msg={}.", req.getTextParams(), rsp.getErrcode(), rsp.getErrmsg());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[DingTalkUtils] flush accessToken failed.", e);
|
||||
}
|
||||
}
|
||||
|
||||
public String fetchUserIdByMobile(String mobile) throws Exception {
|
||||
OapiUserGetByMobileRequest request = new OapiUserGetByMobileRequest();
|
||||
request.setMobile(mobile);
|
||||
|
||||
OapiUserGetByMobileResponse execute = userIdClient.execute(request, accessToken);
|
||||
if (execute.isSuccess()) {
|
||||
return execute.getUserid();
|
||||
}
|
||||
log.info("[DingTalkUtils] fetch userId by mobile({}) failed,reason is {}.", mobile, execute.getErrmsg());
|
||||
throw new PowerJobException("fetch userId by phone number failed, reason is " + execute.getErrmsg());
|
||||
}
|
||||
|
||||
public void sendMarkdownAsync(String title, List<MarkdownEntity> entities, String userList, Long agentId) throws Exception {
|
||||
OapiMessageCorpconversationAsyncsendV2Request request = new OapiMessageCorpconversationAsyncsendV2Request();
|
||||
request.setUseridList(userList);
|
||||
request.setAgentId(agentId);
|
||||
request.setToAllUser(false);
|
||||
|
||||
OapiMessageCorpconversationAsyncsendV2Request.Msg msg = new OapiMessageCorpconversationAsyncsendV2Request.Msg();
|
||||
|
||||
StringBuilder mdBuilder=new StringBuilder();
|
||||
mdBuilder.append("## ").append(title).append("\n");
|
||||
for (MarkdownEntity entity:entities){
|
||||
mdBuilder.append("#### ").append(entity.title).append("\n");
|
||||
mdBuilder.append("> ").append(entity.detail).append("\n\n");
|
||||
}
|
||||
|
||||
msg.setMsgtype("markdown");
|
||||
msg.setMarkdown(new OapiMessageCorpconversationAsyncsendV2Request.Markdown());
|
||||
msg.getMarkdown().setTitle(title);
|
||||
msg.getMarkdown().setText(mdBuilder.toString());
|
||||
request.setMsg(msg);
|
||||
|
||||
sendMsgClient.execute(request, accessToken);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
scheduledPool.shutdownNow();
|
||||
}
|
||||
|
||||
@AllArgsConstructor
|
||||
public static final class MarkdownEntity {
|
||||
private final String title;
|
||||
private final String detail;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,62 @@
|
||||
package tech.powerjob.server.core.alarm.impl;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import tech.powerjob.server.extension.alarm.AlarmTarget;
|
||||
import tech.powerjob.server.extension.alarm.Alarm;
|
||||
import tech.powerjob.server.extension.alarm.Alarmable;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.mail.SimpleMailMessage;
|
||||
import org.springframework.mail.javamail.JavaMailSender;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* 邮件通知服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/30
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class MailAlarmService implements Alarmable {
|
||||
|
||||
@Resource
|
||||
private Environment environment;
|
||||
|
||||
private JavaMailSender javaMailSender;
|
||||
|
||||
@Value("${spring.mail.username:''}")
|
||||
private String from;
|
||||
|
||||
@Override
|
||||
public void onFailed(Alarm alarm, List<AlarmTarget> targetUserList) {
|
||||
if (CollectionUtils.isEmpty(targetUserList) || javaMailSender == null || StringUtils.isEmpty(from)) {
|
||||
return;
|
||||
}
|
||||
|
||||
SimpleMailMessage sm = new SimpleMailMessage();
|
||||
try {
|
||||
sm.setFrom(from);
|
||||
sm.setTo(targetUserList.stream().map(AlarmTarget::getEmail).filter(Objects::nonNull).filter(email -> !email.isEmpty()).toArray(String[]::new));
|
||||
sm.setSubject(alarm.fetchTitle());
|
||||
sm.setText(alarm.fetchContent());
|
||||
|
||||
javaMailSender.send(sm);
|
||||
}catch (Exception e) {
|
||||
log.warn("[MailAlarmService] send mail failed, reason is {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Autowired(required = false)
|
||||
public void setJavaMailSender(JavaMailSender javaMailSender) {
|
||||
this.javaMailSender = javaMailSender;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,58 @@
|
||||
package tech.powerjob.server.core.alarm.impl;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import tech.powerjob.common.OmsConstant;
|
||||
import tech.powerjob.common.utils.HttpUtils;
|
||||
import tech.powerjob.server.extension.alarm.AlarmTarget;
|
||||
import tech.powerjob.server.extension.alarm.Alarm;
|
||||
import tech.powerjob.server.extension.alarm.Alarmable;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.RequestBody;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* http 回调报警
|
||||
*
|
||||
* @author tjq
|
||||
* @since 11/14/20
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class WebHookAlarmService implements Alarmable {
|
||||
|
||||
private static final String HTTP_PROTOCOL_PREFIX = "http://";
|
||||
private static final String HTTPS_PROTOCOL_PREFIX = "https://";
|
||||
|
||||
@Override
|
||||
public void onFailed(Alarm alarm, List<AlarmTarget> targetUserList) {
|
||||
if (CollectionUtils.isEmpty(targetUserList)) {
|
||||
return;
|
||||
}
|
||||
targetUserList.forEach(user -> {
|
||||
String webHook = user.getWebHook();
|
||||
if (StringUtils.isEmpty(webHook)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 自动添加协议头
|
||||
if (!webHook.startsWith(HTTP_PROTOCOL_PREFIX) && !webHook.startsWith(HTTPS_PROTOCOL_PREFIX)) {
|
||||
webHook = HTTP_PROTOCOL_PREFIX + webHook;
|
||||
}
|
||||
|
||||
MediaType jsonType = MediaType.parse(OmsConstant.JSON_MEDIA_TYPE);
|
||||
RequestBody requestBody = RequestBody.create(jsonType, JSONObject.toJSONString(alarm));
|
||||
|
||||
try {
|
||||
String response = HttpUtils.post(webHook, requestBody);
|
||||
log.info("[WebHookAlarmService] invoke webhook[url={}] successfully, response is {}", webHook, response);
|
||||
}catch (Exception e) {
|
||||
log.warn("[WebHookAlarmService] invoke webhook[url={}] failed!", webHook, e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,86 @@
|
||||
package tech.powerjob.server.core.alarm.module;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.experimental.Accessors;
|
||||
import tech.powerjob.server.extension.alarm.Alarm;
|
||||
|
||||
/**
|
||||
* 任务执行失败告警对象
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/30
|
||||
*/
|
||||
@Data
|
||||
@Accessors(chain = true)
|
||||
public class JobInstanceAlarm implements Alarm {
|
||||
/**
|
||||
* 应用ID
|
||||
*/
|
||||
private long appId;
|
||||
/**
|
||||
* 任务ID
|
||||
*/
|
||||
private long jobId;
|
||||
/**
|
||||
* 任务实例ID
|
||||
*/
|
||||
private long instanceId;
|
||||
/**
|
||||
* 任务名称
|
||||
*/
|
||||
private String jobName;
|
||||
/**
|
||||
* 任务自带的参数
|
||||
*/
|
||||
private String jobParams;
|
||||
/**
|
||||
* 时间表达式类型(CRON/API/FIX_RATE/FIX_DELAY)
|
||||
*/
|
||||
private Integer timeExpressionType;
|
||||
/**
|
||||
* 时间表达式,CRON/NULL/LONG/LONG
|
||||
*/
|
||||
private String timeExpression;
|
||||
/**
|
||||
* 执行类型,单机/广播/MR
|
||||
*/
|
||||
private Integer executeType;
|
||||
/**
|
||||
* 执行器类型,Java/Shell
|
||||
*/
|
||||
private Integer processorType;
|
||||
/**
|
||||
* 执行器信息
|
||||
*/
|
||||
private String processorInfo;
|
||||
|
||||
/**
|
||||
* 任务实例参数
|
||||
*/
|
||||
private String instanceParams;
|
||||
/**
|
||||
* 执行结果
|
||||
*/
|
||||
private String result;
|
||||
/**
|
||||
* 预计触发时间
|
||||
*/
|
||||
private Long expectedTriggerTime;
|
||||
/**
|
||||
* 实际触发时间
|
||||
*/
|
||||
private Long actualTriggerTime;
|
||||
/**
|
||||
* 结束时间
|
||||
*/
|
||||
private Long finishedTime;
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private String taskTrackerAddress;
|
||||
|
||||
@Override
|
||||
public String fetchTitle() {
|
||||
return "PowerJob AlarmService: Job Running Failed";
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
package tech.powerjob.server.core.alarm.module;
|
||||
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import lombok.Data;
|
||||
import tech.powerjob.server.extension.alarm.Alarm;
|
||||
|
||||
/**
|
||||
* 工作流执行失败告警对象
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/6/12
|
||||
*/
|
||||
@Data
|
||||
public class WorkflowInstanceAlarm implements Alarm {
|
||||
|
||||
private String workflowName;
|
||||
|
||||
/**
|
||||
* 任务所属应用的ID,冗余提高查询效率
|
||||
*/
|
||||
private Long appId;
|
||||
private Long workflowId;
|
||||
/**
|
||||
* workflowInstanceId(任务实例表都使用单独的ID作为主键以支持潜在的分表需求)
|
||||
*/
|
||||
private Long wfInstanceId;
|
||||
/**
|
||||
* workflow 状态(WorkflowInstanceStatus)
|
||||
*/
|
||||
private Integer status;
|
||||
|
||||
private PEWorkflowDAG peWorkflowDAG;
|
||||
private String result;
|
||||
|
||||
/**
|
||||
* 实际触发时间
|
||||
*/
|
||||
private Long actualTriggerTime;
|
||||
/**
|
||||
* 结束时间
|
||||
*/
|
||||
private Long finishedTime;
|
||||
|
||||
/**
|
||||
* 时间表达式类型(CRON/API/FIX_RATE/FIX_DELAY)
|
||||
*/
|
||||
private Integer timeExpressionType;
|
||||
/**
|
||||
* 时间表达式,CRON/NULL/LONG/LONG
|
||||
*/
|
||||
private String timeExpression;
|
||||
|
||||
@Override
|
||||
public String fetchTitle() {
|
||||
return "PowerJob AlarmService: Workflow Running Failed";
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,514 @@
|
||||
package tech.powerjob.server.core.container;
|
||||
|
||||
import com.google.common.collect.ArrayListMultimap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Sets;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.filefilter.FileFilterUtils;
|
||||
import org.apache.commons.io.filefilter.IOFileFilter;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.apache.commons.lang3.time.DateFormatUtils;
|
||||
import org.apache.maven.shared.invoker.DefaultInvocationRequest;
|
||||
import org.apache.maven.shared.invoker.DefaultInvoker;
|
||||
import org.apache.maven.shared.invoker.InvocationRequest;
|
||||
import org.apache.maven.shared.invoker.Invoker;
|
||||
import org.eclipse.jgit.api.CloneCommand;
|
||||
import org.eclipse.jgit.api.Git;
|
||||
import org.eclipse.jgit.lib.Ref;
|
||||
import org.eclipse.jgit.lib.Repository;
|
||||
import org.eclipse.jgit.transport.CredentialsProvider;
|
||||
import org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import tech.powerjob.common.OmsConstant;
|
||||
import tech.powerjob.common.exception.ImpossibleException;
|
||||
import tech.powerjob.common.model.DeployedContainerInfo;
|
||||
import tech.powerjob.common.model.GitRepoInfo;
|
||||
import tech.powerjob.common.request.ServerDeployContainerRequest;
|
||||
import tech.powerjob.common.request.ServerDestroyContainerRequest;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.common.utils.NetUtils;
|
||||
import tech.powerjob.common.utils.SegmentLock;
|
||||
import tech.powerjob.remote.framework.base.URL;
|
||||
import tech.powerjob.server.common.constants.ContainerSourceType;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.module.WorkerInfo;
|
||||
import tech.powerjob.server.common.utils.OmsFileUtils;
|
||||
import tech.powerjob.server.extension.LockService;
|
||||
import tech.powerjob.server.extension.dfs.*;
|
||||
import tech.powerjob.server.persistence.remote.model.ContainerInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository;
|
||||
import tech.powerjob.server.persistence.storage.Constants;
|
||||
import tech.powerjob.server.remote.server.redirector.DesignateServer;
|
||||
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
|
||||
import tech.powerjob.server.remote.transporter.TransportService;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import javax.websocket.RemoteEndpoint;
|
||||
import javax.websocket.Session;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 容器服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/5/16
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class ContainerService {
|
||||
|
||||
@Resource
|
||||
private Environment environment;
|
||||
@Resource
|
||||
private LockService lockService;
|
||||
@Resource
|
||||
private ContainerInfoRepository containerInfoRepository;
|
||||
@Resource
|
||||
private DFsService dFsService;
|
||||
@Resource
|
||||
private TransportService transportService;
|
||||
|
||||
@Resource
|
||||
private WorkerClusterQueryService workerClusterQueryService;
|
||||
|
||||
// 下载用的分段锁
|
||||
private final SegmentLock segmentLock = new SegmentLock(4);
|
||||
// 并发部署的机器数量
|
||||
private static final int DEPLOY_BATCH_NUM = 50;
|
||||
// 部署间隔
|
||||
private static final long DEPLOY_MIN_INTERVAL = 10 * 60 * 1000L;
|
||||
// 最长部署时间
|
||||
private static final long DEPLOY_MAX_COST_TIME = 10 * 60 * 1000L;
|
||||
|
||||
/**
|
||||
* 保存容器
|
||||
* @param container 容器保存请求
|
||||
*/
|
||||
public void save(ContainerInfoDO container) {
|
||||
|
||||
|
||||
Long originId = container.getId();
|
||||
if (originId != null) {
|
||||
// just validate
|
||||
containerInfoRepository.findById(originId).orElseThrow(() -> new IllegalArgumentException("can't find container by id: " + originId));
|
||||
} else {
|
||||
container.setGmtCreate(new Date());
|
||||
}
|
||||
container.setGmtModified(new Date());
|
||||
|
||||
// 文件上传形式的 sourceInfo 为该文件的 md5 值,Git形式的 md5 在部署阶段生成
|
||||
if (container.getSourceType() == ContainerSourceType.FatJar.getV()) {
|
||||
container.setVersion(container.getSourceInfo());
|
||||
}else {
|
||||
container.setVersion("init");
|
||||
}
|
||||
containerInfoRepository.saveAndFlush(container);
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除容器(通知 Worker 销毁容器 & 删除数据库)
|
||||
* @param appId 应用ID,用于保护性判断
|
||||
* @param containerId 容器ID
|
||||
*/
|
||||
public void delete(Long appId, Long containerId) {
|
||||
|
||||
ContainerInfoDO container = containerInfoRepository.findById(containerId).orElseThrow(() -> new IllegalArgumentException("can't find container by id: " + containerId));
|
||||
|
||||
if (!Objects.equals(appId, container.getAppId())) {
|
||||
throw new RuntimeException("Permission Denied!");
|
||||
}
|
||||
|
||||
ServerDestroyContainerRequest destroyRequest = new ServerDestroyContainerRequest(container.getId());
|
||||
workerClusterQueryService.getAllAliveWorkers(container.getAppId()).forEach(workerInfo -> {
|
||||
final URL url = ServerURLFactory.destroyContainer2Worker(workerInfo.getAddress());
|
||||
transportService.tell(workerInfo.getProtocol(), url, destroyRequest);
|
||||
});
|
||||
|
||||
log.info("[ContainerService] delete container: {}.", container);
|
||||
// 软删除
|
||||
container.setStatus(SwitchableStatus.DELETED.getV());
|
||||
container.setGmtModified(new Date());
|
||||
containerInfoRepository.saveAndFlush(container);
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传用于部署的容器的 Jar 文件
|
||||
* @param file 接受的文件
|
||||
* @return 该文件的 md5 值
|
||||
* @throws IOException 异常
|
||||
*/
|
||||
public String uploadContainerJarFile(MultipartFile file) throws IOException {
|
||||
|
||||
log.info("[ContainerService] start to uploadContainerJarFile, fileName={},size={}", file.getName(), file.getSize());
|
||||
|
||||
String workerDirStr = OmsFileUtils.genTemporaryWorkPath();
|
||||
String tmpFileStr = workerDirStr + "tmp.jar";
|
||||
|
||||
File workerDir = new File(workerDirStr);
|
||||
File tmpFile = new File(tmpFileStr);
|
||||
|
||||
try {
|
||||
// 下载到本地
|
||||
FileUtils.forceMkdirParent(tmpFile);
|
||||
file.transferTo(tmpFile);
|
||||
|
||||
// 生成MD5,这兄弟耗时有点小严重
|
||||
String md5 = OmsFileUtils.md5(tmpFile);
|
||||
String fileName = genContainerJarName(md5);
|
||||
|
||||
// 上传到 DFS,这兄弟耗时也有点小严重,导致这个接口整体比较慢...不过也没必要开线程去处理
|
||||
FileLocation fl = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(fileName);
|
||||
StoreRequest storeRequest = new StoreRequest().setLocalFile(tmpFile).setFileLocation(fl);
|
||||
dFsService.store(storeRequest);
|
||||
|
||||
// 将文件拷贝到正确的路径
|
||||
String finalFileStr = OmsFileUtils.genContainerJarPath() + fileName;
|
||||
File finalFile = new File(finalFileStr);
|
||||
if (finalFile.exists()) {
|
||||
FileUtils.forceDelete(finalFile);
|
||||
}
|
||||
FileUtils.moveFile(tmpFile, finalFile);
|
||||
|
||||
log.info("[ContainerService] uploadContainerJarFile successfully,md5={}", md5);
|
||||
return md5;
|
||||
|
||||
} catch (Throwable t) {
|
||||
log.error("[ContainerService] uploadContainerJarFile failed!", t);
|
||||
ExceptionUtils.rethrow(t);
|
||||
throw new ImpossibleException();
|
||||
} finally {
|
||||
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(workerDir));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取构建容器所需要的 Jar 文件
|
||||
* @param version 版本
|
||||
* @return 本地Jar文件
|
||||
*/
|
||||
public File fetchContainerJarFile(String version) {
|
||||
|
||||
String fileName = genContainerJarName(version);
|
||||
String filePath = OmsFileUtils.genContainerJarPath() + fileName;
|
||||
File localFile = new File(filePath);
|
||||
|
||||
if (localFile.exists()) {
|
||||
return localFile;
|
||||
}
|
||||
|
||||
FileLocation fileLocation = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(fileName);
|
||||
try {
|
||||
Optional<FileMeta> fileMetaOpt = dFsService.fetchFileMeta(fileLocation);
|
||||
if (fileMetaOpt.isPresent()) {
|
||||
dFsService.download(new DownloadRequest().setFileLocation(fileLocation).setTarget(localFile));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[ContainerService] fetchContainerJarFile from dsf failed, version: {}", version, e);
|
||||
}
|
||||
|
||||
return localFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* 部署容器
|
||||
* @param containerId 容器ID
|
||||
* @param session WebSocket Session
|
||||
* @throws Exception 异常
|
||||
*/
|
||||
public void deploy(Long containerId, Session session) throws Exception {
|
||||
|
||||
String deployLock = "containerDeployLock-" + containerId;
|
||||
RemoteEndpoint.Async remote = session.getAsyncRemote();
|
||||
// 最长部署时间:10分钟
|
||||
boolean lock = lockService.tryLock(deployLock, DEPLOY_MAX_COST_TIME);
|
||||
if (!lock) {
|
||||
remote.sendText("SYSTEM: acquire deploy lock failed, maybe other user is deploying, please wait until the running deploy task finished.");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
Optional<ContainerInfoDO> containerInfoOpt = containerInfoRepository.findById(containerId);
|
||||
if (!containerInfoOpt.isPresent()) {
|
||||
remote.sendText("SYSTEM: can't find container by id: " + containerId);
|
||||
return;
|
||||
}
|
||||
ContainerInfoDO container = containerInfoOpt.get();
|
||||
|
||||
Date lastDeployTime = container.getLastDeployTime();
|
||||
if (lastDeployTime != null) {
|
||||
if ((System.currentTimeMillis() - lastDeployTime.getTime()) < DEPLOY_MIN_INTERVAL) {
|
||||
remote.sendText("SYSTEM: [warn] deploy too frequent, last deploy time is: " + DateFormatUtils.format(lastDeployTime, OmsConstant.TIME_PATTERN));
|
||||
}
|
||||
}
|
||||
|
||||
// 准备文件
|
||||
File jarFile = prepareJarFile(container, session);
|
||||
if (jarFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
double sizeMB = 1.0 * jarFile.length() / FileUtils.ONE_MB;
|
||||
remote.sendText(String.format("SYSTEM: the jarFile(size=%fMB) is prepared and ready to be deployed to the worker.", sizeMB));
|
||||
|
||||
// 修改数据库,更新 MD5和最新部署时间
|
||||
Date now = new Date();
|
||||
container.setGmtModified(now);
|
||||
container.setLastDeployTime(now);
|
||||
containerInfoRepository.saveAndFlush(container);
|
||||
remote.sendText(String.format("SYSTEM: update current container version=%s successfully!", container.getVersion()));
|
||||
|
||||
// 开始部署(需要分批进行)
|
||||
final List<WorkerInfo> allAliveWorkers = workerClusterQueryService.getAllAliveWorkers(container.getAppId());
|
||||
if (allAliveWorkers.isEmpty()) {
|
||||
remote.sendText("SYSTEM: there is no worker available now, deploy failed!");
|
||||
return;
|
||||
}
|
||||
|
||||
String port = environment.getProperty("local.server.port");
|
||||
String downloadURL = String.format("http://%s:%s/container/downloadJar?version=%s", NetUtils.getLocalHost(), port, container.getVersion());
|
||||
ServerDeployContainerRequest req = new ServerDeployContainerRequest(containerId, container.getContainerName(), container.getVersion(), downloadURL);
|
||||
long sleepTime = calculateSleepTime(jarFile.length());
|
||||
|
||||
AtomicInteger count = new AtomicInteger();
|
||||
allAliveWorkers.forEach(workerInfo -> {
|
||||
|
||||
final URL url = ServerURLFactory.deployContainer2Worker(workerInfo.getAddress());
|
||||
transportService.tell(workerInfo.getProtocol(), url, req);
|
||||
|
||||
remote.sendText("SYSTEM: send deploy request to " + url.getAddress());
|
||||
|
||||
if (count.incrementAndGet() % DEPLOY_BATCH_NUM == 0) {
|
||||
CommonUtils.executeIgnoreException(() -> Thread.sleep(sleepTime));
|
||||
}
|
||||
});
|
||||
|
||||
remote.sendText("SYSTEM: deploy finished, congratulations!");
|
||||
|
||||
}finally {
|
||||
lockService.unlock(deployLock);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取部署信息
|
||||
* @param appId 容器所属应用ID
|
||||
* @param containerId 容器ID
|
||||
* @return 拼接好的可阅读字符串
|
||||
*/
|
||||
@DesignateServer
|
||||
public String fetchDeployedInfo(Long appId, Long containerId) {
|
||||
List<DeployedContainerInfo> infoList = workerClusterQueryService.getDeployedContainerInfos(appId, containerId);
|
||||
|
||||
Set<String> aliveWorkers = workerClusterQueryService.getAllAliveWorkers(appId)
|
||||
.stream()
|
||||
.map(WorkerInfo::getAddress)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
Set<String> deployedList = Sets.newLinkedHashSet();
|
||||
Multimap<String, DeployedContainerInfo> version2DeployedContainerInfoList = ArrayListMultimap.create();
|
||||
infoList.forEach(info -> {
|
||||
String targetWorkerAddress = info.getWorkerAddress();
|
||||
if (aliveWorkers.contains(targetWorkerAddress)) {
|
||||
deployedList.add(targetWorkerAddress);
|
||||
version2DeployedContainerInfoList.put(info.getVersion(), info);
|
||||
}
|
||||
});
|
||||
|
||||
Set<String> unDeployedList = Sets.newHashSet(aliveWorkers);
|
||||
unDeployedList.removeAll(deployedList);
|
||||
|
||||
StringBuilder sb = new StringBuilder("========== DeployedInfo ==========").append(System.lineSeparator());
|
||||
|
||||
// 集群分裂,各worker版本不统一,问题很大
|
||||
if (version2DeployedContainerInfoList.keySet().size() > 1) {
|
||||
sb.append("WARN: there exists multi version container now, please redeploy to fix this problem").append(System.lineSeparator());
|
||||
}
|
||||
|
||||
version2DeployedContainerInfoList.asMap().forEach((version, deployedContainerInfos) -> {
|
||||
sb.append("[version] ").append(version).append(System.lineSeparator());
|
||||
deployedContainerInfos.forEach(deployedContainerInfo -> sb.append(String.format("Address: %s, DeployedTime: %s", deployedContainerInfo.getWorkerAddress(), CommonUtils.formatTime(deployedContainerInfo.getDeployedTime()))).append(System.lineSeparator()));
|
||||
});
|
||||
|
||||
// 当前在线未部署机器
|
||||
if (!CollectionUtils.isEmpty(unDeployedList)) {
|
||||
sb.append("WARN: there exists unDeployed worker(PowerJob will auto fix when some job need to process)").append(System.lineSeparator());
|
||||
sb.append("unDeployed worker list ==> ").append(unDeployedList).append(System.lineSeparator());
|
||||
}
|
||||
|
||||
if (CollectionUtils.isEmpty(deployedList)) {
|
||||
sb.append("no worker deployed this container now~");
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private File prepareJarFile(ContainerInfoDO container, Session session) throws Exception {
|
||||
|
||||
RemoteEndpoint.Async remote = session.getAsyncRemote();
|
||||
// 获取Jar,Git需要先 clone成Jar计算MD5,JarFile则直接下载
|
||||
ContainerSourceType sourceType = ContainerSourceType.of(container.getSourceType());
|
||||
if (sourceType == ContainerSourceType.Git) {
|
||||
|
||||
String workerDirStr = OmsFileUtils.genTemporaryWorkPath();
|
||||
File workerDir = new File(workerDirStr);
|
||||
FileUtils.forceMkdir(workerDir);
|
||||
|
||||
try {
|
||||
// git clone
|
||||
remote.sendText("SYSTEM: start to git clone the code repo, using config: " + container.getSourceInfo());
|
||||
GitRepoInfo gitRepoInfo = JsonUtils.parseObject(container.getSourceInfo(), GitRepoInfo.class);
|
||||
|
||||
CloneCommand cloneCommand = Git.cloneRepository()
|
||||
.setDirectory(workerDir)
|
||||
.setURI(gitRepoInfo.getRepo())
|
||||
.setBranch(gitRepoInfo.getBranch());
|
||||
if (!StringUtils.isEmpty(gitRepoInfo.getUsername())) {
|
||||
CredentialsProvider credentialsProvider = new UsernamePasswordCredentialsProvider(gitRepoInfo.getUsername(), gitRepoInfo.getPassword());
|
||||
cloneCommand.setCredentialsProvider(credentialsProvider);
|
||||
}
|
||||
cloneCommand.call();
|
||||
|
||||
// 获取最新的 commitId 作为版本
|
||||
String oldVersion = container.getVersion();
|
||||
try (Repository repository = Git.open(workerDir).getRepository()) {
|
||||
Ref head = repository.getRefDatabase().findRef("HEAD");
|
||||
container.setVersion(head.getObjectId().getName());
|
||||
}
|
||||
|
||||
if (container.getVersion().equals(oldVersion)) {
|
||||
remote.sendText(String.format("SYSTEM: this commitId(%s) is the same as the last.", oldVersion));
|
||||
}else {
|
||||
remote.sendText(String.format("SYSTEM: new version detected, from %s to %s.", oldVersion, container.getVersion()));
|
||||
}
|
||||
remote.sendText("SYSTEM: git clone successfully, star to compile the project.");
|
||||
|
||||
// mvn clean package -DskipTests -U
|
||||
Invoker mvnInvoker = new DefaultInvoker();
|
||||
InvocationRequest ivkReq = new DefaultInvocationRequest();
|
||||
// -U:强制让Maven检查所有SNAPSHOT依赖更新,确保集成基于最新的状态
|
||||
// -e:如果构建出现异常,该参数能让Maven打印完整的stack trace
|
||||
// -B:让Maven使用批处理模式构建项目,能够避免一些需要人工参与交互而造成的挂起状态
|
||||
ivkReq.setGoals(Lists.newArrayList("clean", "package", "-DskipTests", "-U", "-e", "-B"));
|
||||
ivkReq.setBaseDirectory(workerDir);
|
||||
ivkReq.setOutputHandler(remote::sendText);
|
||||
ivkReq.setBatchMode(true);
|
||||
|
||||
mvnInvoker.execute(ivkReq);
|
||||
|
||||
String targetDirStr = workerDirStr + "/target";
|
||||
File targetDir = new File(targetDirStr);
|
||||
IOFileFilter fileFilter = FileFilterUtils.asFileFilter((dir, name) -> name.endsWith("jar-with-dependencies.jar"));
|
||||
Collection<File> jarFile = FileUtils.listFiles(targetDir, fileFilter, null);
|
||||
|
||||
if (CollectionUtils.isEmpty(jarFile)) {
|
||||
remote.sendText("SYSTEM: can't find packaged jar(maybe maven build failed), so deploy failed.");
|
||||
return null;
|
||||
}
|
||||
|
||||
File jarWithDependency = jarFile.iterator().next();
|
||||
|
||||
String jarFileName = genContainerJarName(container.getVersion());
|
||||
|
||||
FileLocation dfsFL = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(jarFileName);
|
||||
Optional<FileMeta> dfsMetaOpt = dFsService.fetchFileMeta(dfsFL);
|
||||
if (dfsMetaOpt.isPresent()) {
|
||||
remote.sendText("SYSTEM: find the jar resource in remote successfully, so it's no need to upload anymore.");
|
||||
} else {
|
||||
remote.sendText("SYSTEM: can't find the jar resource in remote, maybe this is a new version, start to upload new version.");
|
||||
dFsService.store(new StoreRequest().setFileLocation(dfsFL).setLocalFile(jarWithDependency));
|
||||
remote.sendText("SYSTEM: upload to GridFS successfully~");
|
||||
}
|
||||
|
||||
// 将文件从临时工作目录移动到正式目录
|
||||
String localFileStr = OmsFileUtils.genContainerJarPath() + jarFileName;
|
||||
File localFile = new File(localFileStr);
|
||||
if (localFile.exists()) {
|
||||
FileUtils.forceDelete(localFile);
|
||||
}
|
||||
FileUtils.copyFile(jarWithDependency, localFile);
|
||||
|
||||
return localFile;
|
||||
} catch (Throwable t) {
|
||||
log.error("[ContainerService] prepareJarFile failed for container: {}", container, t);
|
||||
remote.sendText("SYSTEM: [ERROR] prepare jar file failed: " + ExceptionUtils.getStackTrace(t));
|
||||
} finally {
|
||||
// 删除工作区数据
|
||||
FileUtils.forceDelete(workerDir);
|
||||
}
|
||||
}
|
||||
|
||||
// 先查询本地是否存在目标 Jar 文件
|
||||
String jarFileName = genContainerJarName(container.getVersion());
|
||||
String localFileStr = OmsFileUtils.genContainerJarPath() + jarFileName;
|
||||
File localFile = new File(localFileStr);
|
||||
if (localFile.exists()) {
|
||||
remote.sendText("SYSTEM: find the jar file in local disk.");
|
||||
return localFile;
|
||||
}
|
||||
|
||||
// 从 MongoDB 下载
|
||||
remote.sendText(String.format("SYSTEM: try to find the jarFile(%s) in GridFS", jarFileName));
|
||||
downloadJarFromGridFS(jarFileName, localFile);
|
||||
remote.sendText("SYSTEM: download jar file from GridFS successfully~");
|
||||
return localFile;
|
||||
}
|
||||
|
||||
private void downloadJarFromGridFS(String mongoFileName, File targetFile) {
|
||||
|
||||
int lockId = mongoFileName.hashCode();
|
||||
try {
|
||||
segmentLock.lockInterruptibleSafe(lockId);
|
||||
|
||||
if (targetFile.exists()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
FileLocation dfsFL = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(mongoFileName);
|
||||
Optional<FileMeta> dfsMetaOpt = dFsService.fetchFileMeta(dfsFL);
|
||||
if (!dfsMetaOpt.isPresent()) {
|
||||
log.warn("[ContainerService] can't find container's jar file({}) in gridFS.", mongoFileName);
|
||||
return;
|
||||
}
|
||||
|
||||
FileUtils.forceMkdirParent(targetFile);
|
||||
|
||||
dFsService.download(new DownloadRequest().setTarget(targetFile).setFileLocation(dfsFL));
|
||||
}catch (Exception e) {
|
||||
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(targetFile));
|
||||
ExceptionUtils.rethrow(e);
|
||||
}
|
||||
|
||||
}finally {
|
||||
segmentLock.unlock(lockId);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static String genContainerJarName(String version) {
|
||||
return String.format("oms-container-%s.jar", version);
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算 sleep 时间(每10M睡眠1S + 1)
|
||||
* @param fileLength 文件的字节数
|
||||
* @return sleep 时间
|
||||
*/
|
||||
private long calculateSleepTime(long fileLength) {
|
||||
return (fileLength / FileUtils.ONE_MB / 10 + 1) * 1000;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,112 @@
|
||||
package tech.powerjob.server.core.container;
|
||||
|
||||
import tech.powerjob.common.ContainerConstant;
|
||||
import net.lingala.zip4j.ZipFile;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import tech.powerjob.server.common.utils.OmsFileUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* oms-worker container 生成器
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/5/15
|
||||
*/
|
||||
public class ContainerTemplateGenerator {
|
||||
|
||||
private static final String ORIGIN_FILE_NAME = "oms-template-origin";
|
||||
|
||||
/**
|
||||
* 生成 container 的模版文件
|
||||
* @param group pom group标签
|
||||
* @param artifact pom artifact标签
|
||||
* @param name pom name标签
|
||||
* @param packageName 包名
|
||||
* @param javaVersion Java版本
|
||||
* @return 压缩包
|
||||
* @throws IOException 异常
|
||||
*/
|
||||
public static File generate(String group, String artifact, String name, String packageName, Integer javaVersion) throws IOException {
|
||||
|
||||
String workerDir = OmsFileUtils.genTemporaryWorkPath();
|
||||
File originJar = new File(workerDir + "tmp.jar");
|
||||
String tmpPath = workerDir + "/unzip/";
|
||||
|
||||
// CentOS 7 上 getResource 会报 FileNotFoundException,原因不详...
|
||||
try (InputStream is = ContainerTemplateGenerator.class.getClassLoader().getResourceAsStream(ORIGIN_FILE_NAME + ".zip")) {
|
||||
Objects.requireNonNull(is, "generate container template failed, can't find zip file in classpath.");
|
||||
FileUtils.copyInputStreamToFile(is, originJar);
|
||||
}
|
||||
|
||||
ZipFile zipFile = new ZipFile(originJar);
|
||||
zipFile.extractAll(tmpPath);
|
||||
String rootPath = tmpPath + ORIGIN_FILE_NAME;
|
||||
|
||||
// 1. 修改 pom.xml (按行读,读取期间更改,然后回写)
|
||||
String pomPath = rootPath + "/pom.xml";
|
||||
|
||||
String line;
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
try (BufferedReader br = new BufferedReader(new FileReader(pomPath))) {
|
||||
while ((line = br.readLine()) != null) {
|
||||
|
||||
if (line.contains("<groupId>groupId</groupId>")) {
|
||||
buffer.append(" <groupId>").append(group).append("</groupId>");
|
||||
}else if (line.contains("<artifactId>artifactId</artifactId>")) {
|
||||
buffer.append(" <artifactId>").append(artifact).append("</artifactId>");
|
||||
}else if (line.contains("<name>name</name>")) {
|
||||
buffer.append(" <name>").append(name).append("</name>");
|
||||
}else if (line.contains("<maven.compiler.source>")) {
|
||||
buffer.append(" <maven.compiler.source>").append(javaVersion).append("</maven.compiler.source>");
|
||||
}else if (line.contains("<maven.compiler.target>")) {
|
||||
buffer.append(" <maven.compiler.target>").append(javaVersion).append("</maven.compiler.target>");
|
||||
} else {
|
||||
buffer.append(line);
|
||||
}
|
||||
buffer.append(System.lineSeparator());
|
||||
}
|
||||
}
|
||||
OmsFileUtils.string2File(buffer.toString(), new File(pomPath));
|
||||
|
||||
// 2. 新建目录
|
||||
String packagePath = StringUtils.replace(packageName, ".", "/");
|
||||
String absPath = rootPath + "/src/main/java/" + packagePath;
|
||||
FileUtils.forceMkdir(new File(absPath));
|
||||
|
||||
// 3. 修改 Spring 配置文件
|
||||
String resourcePath = rootPath + "/src/main/resources/";
|
||||
String springXMLPath = resourcePath + ContainerConstant.SPRING_CONTEXT_FILE_NAME;
|
||||
buffer.setLength(0);
|
||||
|
||||
try (BufferedReader br = new BufferedReader(new FileReader(springXMLPath))) {
|
||||
while ((line = br.readLine()) != null) {
|
||||
|
||||
if (line.contains("<context:component-scan base-package=\"")) {
|
||||
buffer.append(" <context:component-scan base-package=\"").append(packageName).append("\"/>");
|
||||
}else {
|
||||
buffer.append(line);
|
||||
}
|
||||
buffer.append(System.lineSeparator());
|
||||
}
|
||||
}
|
||||
OmsFileUtils.string2File(buffer.toString(), new File(springXMLPath));
|
||||
|
||||
// 4. 写入 packageName,便于容器加载用户类
|
||||
String propertiesPath = resourcePath + ContainerConstant.CONTAINER_PROPERTIES_FILE_NAME;
|
||||
String properties = ContainerConstant.CONTAINER_PACKAGE_NAME_KEY + "=" + packageName;
|
||||
OmsFileUtils.string2File(properties, new File(propertiesPath));
|
||||
|
||||
// 5. 再打包回去
|
||||
String finPath = tmpPath + "template.zip";
|
||||
ZipFile finZip = new ZipFile(finPath);
|
||||
finZip.addFolder(new File(rootPath));
|
||||
|
||||
// 6. 删除源文件
|
||||
FileUtils.forceDelete(originJar);
|
||||
|
||||
return finZip.getFile();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,18 @@
|
||||
package tech.powerjob.server.core.evaluator;
|
||||
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/10
|
||||
*/
|
||||
public interface Evaluator {
|
||||
/**
|
||||
* 使用给定输入计算表达式
|
||||
*
|
||||
* @param expression 可执行的表达式
|
||||
* @param input 输入
|
||||
* @return 计算结果
|
||||
*/
|
||||
Object evaluate(String expression, Object input);
|
||||
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
package tech.powerjob.server.core.evaluator;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.script.Bindings;
|
||||
import javax.script.ScriptEngine;
|
||||
import javax.script.ScriptEngineManager;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/10
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class GroovyEvaluator implements Evaluator {
|
||||
|
||||
private static final ScriptEngine ENGINE = new ScriptEngineManager().getEngineByName("groovy");
|
||||
|
||||
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public Object evaluate(String expression, Object input) {
|
||||
Bindings bindings = ENGINE.createBindings();
|
||||
bindings.put("context", input);
|
||||
return ENGINE.eval(expression, bindings);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,171 @@
|
||||
package tech.powerjob.server.core.handler;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.core.env.Environment;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.request.*;
|
||||
import tech.powerjob.common.response.AskResponse;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.common.utils.NetUtils;
|
||||
import tech.powerjob.remote.framework.actor.Handler;
|
||||
import tech.powerjob.remote.framework.actor.ProcessType;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.module.WorkerInfo;
|
||||
import tech.powerjob.server.common.utils.SpringUtils;
|
||||
import tech.powerjob.server.monitor.MonitorService;
|
||||
import tech.powerjob.server.monitor.events.w2s.TtReportInstanceStatusEvent;
|
||||
import tech.powerjob.server.monitor.events.w2s.WorkerHeartbeatEvent;
|
||||
import tech.powerjob.server.monitor.events.w2s.WorkerLogReportEvent;
|
||||
import tech.powerjob.server.persistence.remote.model.ContainerInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static tech.powerjob.common.RemoteConstant.*;
|
||||
|
||||
/**
|
||||
* wrapper monitor for IWorkerRequestHandler
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2022/9/11
|
||||
*/
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public abstract class AbWorkerRequestHandler implements IWorkerRequestHandler {
|
||||
|
||||
|
||||
protected final MonitorService monitorService;
|
||||
|
||||
protected final Environment environment;
|
||||
|
||||
protected final ContainerInfoRepository containerInfoRepository;
|
||||
|
||||
private final WorkerClusterQueryService workerClusterQueryService;
|
||||
|
||||
protected abstract void processWorkerHeartbeat0(WorkerHeartbeat heartbeat, WorkerHeartbeatEvent event);
|
||||
|
||||
protected abstract AskResponse processTaskTrackerReportInstanceStatus0(TaskTrackerReportInstanceStatusReq req, TtReportInstanceStatusEvent event) throws Exception;
|
||||
|
||||
protected abstract void processWorkerLogReport0(WorkerLogReportReq req, WorkerLogReportEvent event);
|
||||
|
||||
|
||||
@Override
|
||||
@Handler(path = S4W_HANDLER_WORKER_HEARTBEAT, processType = ProcessType.NO_BLOCKING)
|
||||
public void processWorkerHeartbeat(WorkerHeartbeat heartbeat) {
|
||||
long startMs = System.currentTimeMillis();
|
||||
WorkerHeartbeatEvent event = new WorkerHeartbeatEvent()
|
||||
.setAppName(heartbeat.getAppName())
|
||||
.setAppId(heartbeat.getAppId())
|
||||
.setVersion(heartbeat.getVersion())
|
||||
.setProtocol(heartbeat.getProtocol())
|
||||
.setTag(heartbeat.getTag())
|
||||
.setWorkerAddress(heartbeat.getWorkerAddress())
|
||||
.setDelayMs(startMs - heartbeat.getHeartbeatTime())
|
||||
.setScore(heartbeat.getSystemMetrics().getScore());
|
||||
processWorkerHeartbeat0(heartbeat, event);
|
||||
monitorService.monitor(event);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Handler(path = S4W_HANDLER_REPORT_INSTANCE_STATUS, processType = ProcessType.BLOCKING)
|
||||
public AskResponse processTaskTrackerReportInstanceStatus(TaskTrackerReportInstanceStatusReq req) {
|
||||
long startMs = System.currentTimeMillis();
|
||||
TtReportInstanceStatusEvent event = new TtReportInstanceStatusEvent()
|
||||
.setAppId(req.getAppId())
|
||||
.setJobId(req.getJobId())
|
||||
.setInstanceId(req.getInstanceId())
|
||||
.setWfInstanceId(req.getWfInstanceId())
|
||||
.setInstanceStatus(InstanceStatus.of(req.getInstanceStatus()))
|
||||
.setDelayMs(startMs - req.getReportTime())
|
||||
.setServerProcessStatus(TtReportInstanceStatusEvent.Status.SUCCESS);
|
||||
try {
|
||||
return processTaskTrackerReportInstanceStatus0(req, event);
|
||||
} catch (Exception e) {
|
||||
event.setServerProcessStatus(TtReportInstanceStatusEvent.Status.FAILED);
|
||||
log.error("[WorkerRequestHandler] processTaskTrackerReportInstanceStatus failed for request: {}", req, e);
|
||||
return AskResponse.failed(ExceptionUtils.getMessage(e));
|
||||
} finally {
|
||||
event.setServerProcessCost(System.currentTimeMillis() - startMs);
|
||||
monitorService.monitor(event);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@Handler(path = S4W_HANDLER_REPORT_LOG, processType = ProcessType.NO_BLOCKING)
|
||||
public void processWorkerLogReport(WorkerLogReportReq req) {
|
||||
|
||||
WorkerLogReportEvent event = new WorkerLogReportEvent()
|
||||
.setWorkerAddress(req.getWorkerAddress())
|
||||
.setLogNum(req.getInstanceLogContents().size());
|
||||
try {
|
||||
processWorkerLogReport0(req, event);
|
||||
event.setStatus(WorkerLogReportEvent.Status.SUCCESS);
|
||||
} catch (RejectedExecutionException re) {
|
||||
event.setStatus(WorkerLogReportEvent.Status.REJECTED);
|
||||
} catch (Throwable t) {
|
||||
event.setStatus(WorkerLogReportEvent.Status.EXCEPTION);
|
||||
log.warn("[WorkerRequestHandler] process worker report failed!", t);
|
||||
} finally {
|
||||
monitorService.monitor(event);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@Handler(path = S4W_HANDLER_QUERY_JOB_CLUSTER, processType = ProcessType.BLOCKING)
|
||||
public AskResponse processWorkerQueryExecutorCluster(WorkerQueryExecutorClusterReq req) {
|
||||
AskResponse askResponse;
|
||||
|
||||
Long jobId = req.getJobId();
|
||||
Long appId = req.getAppId();
|
||||
|
||||
JobInfoRepository jobInfoRepository = SpringUtils.getBean(JobInfoRepository.class);
|
||||
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(jobId);
|
||||
if (jobInfoOpt.isPresent()) {
|
||||
JobInfoDO jobInfo = jobInfoOpt.get();
|
||||
if (!jobInfo.getAppId().equals(appId)) {
|
||||
askResponse = AskResponse.failed("Permission Denied!");
|
||||
}else {
|
||||
List<String> sortedAvailableWorker = workerClusterQueryService.geAvailableWorkers(jobInfo)
|
||||
.stream().map(WorkerInfo::getAddress).collect(Collectors.toList());
|
||||
askResponse = AskResponse.succeed(sortedAvailableWorker);
|
||||
}
|
||||
}else {
|
||||
askResponse = AskResponse.failed("can't find jobInfo by jobId: " + jobId);
|
||||
}
|
||||
return askResponse;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Handler(path = S4W_HANDLER_WORKER_NEED_DEPLOY_CONTAINER, processType = ProcessType.BLOCKING)
|
||||
public AskResponse processWorkerNeedDeployContainer(WorkerNeedDeployContainerRequest req) {
|
||||
String port = environment.getProperty("local.server.port");
|
||||
|
||||
Optional<ContainerInfoDO> containerInfoOpt = containerInfoRepository.findById(req.getContainerId());
|
||||
AskResponse askResponse = new AskResponse();
|
||||
if (!containerInfoOpt.isPresent() || containerInfoOpt.get().getStatus() != SwitchableStatus.ENABLE.getV()) {
|
||||
askResponse.setSuccess(false);
|
||||
askResponse.setMessage("can't find container by id: " + req.getContainerId());
|
||||
}else {
|
||||
ContainerInfoDO containerInfo = containerInfoOpt.get();
|
||||
askResponse.setSuccess(true);
|
||||
|
||||
ServerDeployContainerRequest dpReq = new ServerDeployContainerRequest();
|
||||
BeanUtils.copyProperties(containerInfo, dpReq);
|
||||
dpReq.setContainerId(containerInfo.getId());
|
||||
String downloadURL = String.format("http://%s:%s/container/downloadJar?version=%s", NetUtils.getLocalHost(), port, containerInfo.getVersion());
|
||||
dpReq.setDownloadURL(downloadURL);
|
||||
|
||||
askResponse.setData(JsonUtils.toBytes(dpReq));
|
||||
}
|
||||
return askResponse;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,46 @@
|
||||
package tech.powerjob.server.core.handler;
|
||||
|
||||
import tech.powerjob.common.request.*;
|
||||
import tech.powerjob.common.response.AskResponse;
|
||||
|
||||
/**
|
||||
* 定义 server 与 worker 之间需要处理的协议
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2022/9/10
|
||||
*/
|
||||
public interface IWorkerRequestHandler {
|
||||
|
||||
/**
|
||||
* 处理 worker 上报的心跳信息
|
||||
* @param heartbeat 心跳信息
|
||||
*/
|
||||
void processWorkerHeartbeat(WorkerHeartbeat heartbeat);
|
||||
|
||||
/**
|
||||
* 处理 TaskTracker 的任务实例上报
|
||||
* @param req 上报请求
|
||||
* @return 响应信息
|
||||
*/
|
||||
AskResponse processTaskTrackerReportInstanceStatus(TaskTrackerReportInstanceStatusReq req);
|
||||
|
||||
/**
|
||||
* 处理 worker 查询执行器集群
|
||||
* @param req 请求
|
||||
* @return cluster info
|
||||
*/
|
||||
AskResponse processWorkerQueryExecutorCluster(WorkerQueryExecutorClusterReq req);
|
||||
|
||||
/**
|
||||
* 处理 worker 日志推送请求(内部使用线程池异步处理,非阻塞)
|
||||
* @param req 请求
|
||||
*/
|
||||
void processWorkerLogReport(WorkerLogReportReq req);
|
||||
|
||||
/**
|
||||
* 处理 worker 的容器部署请求
|
||||
* @param request 请求
|
||||
* @return 容器部署信息
|
||||
*/
|
||||
AskResponse processWorkerNeedDeployContainer(WorkerNeedDeployContainerRequest request);
|
||||
}
|
||||
@ -0,0 +1,78 @@
|
||||
package tech.powerjob.server.core.handler;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.core.env.Environment;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import tech.powerjob.common.RemoteConstant;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq;
|
||||
import tech.powerjob.common.request.WorkerHeartbeat;
|
||||
import tech.powerjob.common.request.WorkerLogReportReq;
|
||||
import tech.powerjob.common.response.AskResponse;
|
||||
import tech.powerjob.remote.framework.actor.Actor;
|
||||
import tech.powerjob.server.core.instance.InstanceLogService;
|
||||
import tech.powerjob.server.core.instance.InstanceManager;
|
||||
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
|
||||
import tech.powerjob.server.monitor.MonitorService;
|
||||
import tech.powerjob.server.monitor.events.w2s.TtReportInstanceStatusEvent;
|
||||
import tech.powerjob.server.monitor.events.w2s.WorkerHeartbeatEvent;
|
||||
import tech.powerjob.server.monitor.events.w2s.WorkerLogReportEvent;
|
||||
import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterManagerService;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
|
||||
|
||||
/**
|
||||
* receive and process worker's request
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2022/9/11
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@Actor(path = RemoteConstant.S4W_PATH)
|
||||
public class WorkerRequestHandlerImpl extends AbWorkerRequestHandler {
|
||||
|
||||
private final InstanceManager instanceManager;
|
||||
|
||||
private final WorkflowInstanceManager workflowInstanceManager;
|
||||
|
||||
private final InstanceLogService instanceLogService;
|
||||
|
||||
public WorkerRequestHandlerImpl(InstanceManager instanceManager, WorkflowInstanceManager workflowInstanceManager, InstanceLogService instanceLogService,
|
||||
MonitorService monitorService, Environment environment, ContainerInfoRepository containerInfoRepository, WorkerClusterQueryService workerClusterQueryService) {
|
||||
super(monitorService, environment, containerInfoRepository, workerClusterQueryService);
|
||||
this.instanceManager = instanceManager;
|
||||
this.workflowInstanceManager = workflowInstanceManager;
|
||||
this.instanceLogService = instanceLogService;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processWorkerHeartbeat0(WorkerHeartbeat heartbeat, WorkerHeartbeatEvent event) {
|
||||
WorkerClusterManagerService.updateStatus(heartbeat);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AskResponse processTaskTrackerReportInstanceStatus0(TaskTrackerReportInstanceStatusReq req, TtReportInstanceStatusEvent event) throws Exception {
|
||||
// 2021/02/05 如果是工作流中的实例先尝试更新上下文信息,再更新实例状态,这里一定不会有异常
|
||||
if (req.getWfInstanceId() != null && !CollectionUtils.isEmpty(req.getAppendedWfContext())) {
|
||||
// 更新工作流上下文信息
|
||||
workflowInstanceManager.updateWorkflowContext(req.getWfInstanceId(),req.getAppendedWfContext());
|
||||
}
|
||||
|
||||
instanceManager.updateStatus(req);
|
||||
|
||||
// 结束状态(成功/失败)需要回复消息
|
||||
if (InstanceStatus.FINISHED_STATUS.contains(req.getInstanceStatus())) {
|
||||
return AskResponse.succeed(null);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processWorkerLogReport0(WorkerLogReportReq req, WorkerLogReportEvent event) {
|
||||
// 这个效率应该不会拉垮吧...也就是一些判断 + Map#get 吧...
|
||||
instanceLogService.submitLogs(req.getWorkerAddress(), req.getInstanceLogContents());
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
package tech.powerjob.server.core.helper;
|
||||
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowInstanceStatus;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/13
|
||||
*/
|
||||
public class StatusMappingHelper {
|
||||
|
||||
private StatusMappingHelper(){
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 工作流实例状态转任务实例状态
|
||||
*/
|
||||
public static InstanceStatus toInstanceStatus(WorkflowInstanceStatus workflowInstanceStatus) {
|
||||
switch (workflowInstanceStatus) {
|
||||
case FAILED:
|
||||
return InstanceStatus.FAILED;
|
||||
case SUCCEED:
|
||||
return InstanceStatus.SUCCEED;
|
||||
case RUNNING:
|
||||
return InstanceStatus.RUNNING;
|
||||
case STOPPED:
|
||||
return InstanceStatus.STOPPED;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,440 @@
|
||||
package tech.powerjob.server.core.instance;
|
||||
|
||||
import com.google.common.base.Stopwatch;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.apache.commons.lang3.time.FastDateFormat;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.task.AsyncTaskExecutor;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.support.TransactionTemplate;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import tech.powerjob.common.OmsConstant;
|
||||
import tech.powerjob.common.enums.LogLevel;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.model.InstanceLogContent;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.common.utils.NetUtils;
|
||||
import tech.powerjob.common.utils.SegmentLock;
|
||||
import tech.powerjob.server.common.constants.PJThreadPool;
|
||||
import tech.powerjob.server.common.utils.OmsFileUtils;
|
||||
import tech.powerjob.server.extension.dfs.*;
|
||||
import tech.powerjob.server.persistence.StringPage;
|
||||
import tech.powerjob.server.persistence.local.LocalInstanceLogDO;
|
||||
import tech.powerjob.server.persistence.local.LocalInstanceLogRepository;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.storage.Constants;
|
||||
import tech.powerjob.server.remote.server.redirector.DesignateServer;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* 任务实例运行时日志服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/27
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class InstanceLogService {
|
||||
|
||||
@Value("${server.port}")
|
||||
private int port;
|
||||
|
||||
@Resource
|
||||
private InstanceMetadataService instanceMetadataService;
|
||||
|
||||
@Resource
|
||||
private DFsService dFsService;
|
||||
/**
|
||||
* 本地数据库操作bean
|
||||
*/
|
||||
@Resource(name = "localTransactionTemplate")
|
||||
private TransactionTemplate localTransactionTemplate;
|
||||
|
||||
@Resource
|
||||
private LocalInstanceLogRepository localInstanceLogRepository;
|
||||
|
||||
/**
|
||||
* 本地维护了在线日志的任务实例ID
|
||||
*/
|
||||
private final Map<Long, Long> instanceId2LastReportTime = Maps.newConcurrentMap();
|
||||
|
||||
@Resource(name = PJThreadPool.BACKGROUND_POOL)
|
||||
private AsyncTaskExecutor powerJobBackgroundPool;
|
||||
|
||||
@Value("${server.servlet.context-path:#{null}}")
|
||||
private String servletContextPath;
|
||||
|
||||
private static final String DOWNLOAD_URL_PATTERN = "http://%s:%d%s/instance/downloadLog?instanceId=%d";
|
||||
|
||||
/**
|
||||
* 分段锁
|
||||
*/
|
||||
private final SegmentLock segmentLock = new SegmentLock(8);
|
||||
|
||||
/**
|
||||
* 格式化时间戳
|
||||
*/
|
||||
private static final FastDateFormat DATE_FORMAT = FastDateFormat.getInstance(OmsConstant.TIME_PATTERN_PLUS);
|
||||
/**
|
||||
* 每一个展示的行数
|
||||
*/
|
||||
private static final int MAX_LINE_COUNT = 100;
|
||||
/**
|
||||
* 更新中的日志缓存时间
|
||||
*/
|
||||
private static final long LOG_CACHE_TIME = 10000;
|
||||
|
||||
/**
|
||||
* 提交日志记录,持久化到本地数据库中
|
||||
* @param workerAddress 上报机器地址
|
||||
* @param logs 任务实例运行时日志
|
||||
*/
|
||||
@Async(value = PJThreadPool.LOCAL_DB_POOL)
|
||||
public void submitLogs(String workerAddress, List<InstanceLogContent> logs) {
|
||||
|
||||
List<LocalInstanceLogDO> logList = logs.stream().map(x -> {
|
||||
instanceId2LastReportTime.put(x.getInstanceId(), System.currentTimeMillis());
|
||||
|
||||
LocalInstanceLogDO y = new LocalInstanceLogDO();
|
||||
BeanUtils.copyProperties(x, y);
|
||||
y.setWorkerAddress(workerAddress);
|
||||
return y;
|
||||
}).collect(Collectors.toList());
|
||||
|
||||
try {
|
||||
CommonUtils.executeWithRetry0(() -> localInstanceLogRepository.saveAll(logList));
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLogService] persistent instance logs failed, these logs will be dropped: {}.", logs, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务实例运行日志(默认存在本地数据,需要由生成完成请求的路由与转发)
|
||||
* @param appId appId,AOP 专用
|
||||
* @param instanceId 任务实例ID
|
||||
* @param index 页码,从0开始
|
||||
* @return 文本字符串
|
||||
*/
|
||||
@DesignateServer
|
||||
public StringPage fetchInstanceLog(Long appId, Long instanceId, Long index) {
|
||||
try {
|
||||
Future<File> fileFuture = prepareLogFile(instanceId);
|
||||
// 超时并不会打断正在执行的任务
|
||||
File logFile = fileFuture.get(5, TimeUnit.SECONDS);
|
||||
|
||||
// 分页展示数据
|
||||
long lines = 0;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String lineStr;
|
||||
long left = index * MAX_LINE_COUNT;
|
||||
long right = left + MAX_LINE_COUNT;
|
||||
try (LineNumberReader lr = new LineNumberReader(new FileReader(logFile))) {
|
||||
while ((lineStr = lr.readLine()) != null) {
|
||||
|
||||
// 指定范围内,读出
|
||||
if (lines >= left && lines < right) {
|
||||
sb.append(lineStr).append(System.lineSeparator());
|
||||
}
|
||||
++lines;
|
||||
}
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLog-{}] read logFile from disk failed for app: {}.", instanceId, appId, e);
|
||||
return StringPage.simple("oms-server execution exception, caused by " + ExceptionUtils.getRootCauseMessage(e));
|
||||
}
|
||||
|
||||
double totalPage = Math.ceil(1.0 * lines / MAX_LINE_COUNT);
|
||||
return new StringPage(index, (long) totalPage, sb.toString());
|
||||
|
||||
}catch (TimeoutException te) {
|
||||
return StringPage.simple("log file is being prepared, please try again later.");
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLog-{}] fetch instance log failed.", instanceId, e);
|
||||
return StringPage.simple("oms-server execution exception, caused by " + ExceptionUtils.getRootCauseMessage(e));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取日志的下载链接
|
||||
* @param appId AOP 专用
|
||||
* @param instanceId 任务实例 ID
|
||||
* @return 下载链接
|
||||
*/
|
||||
@DesignateServer
|
||||
public String fetchDownloadUrl(Long appId, Long instanceId) {
|
||||
String path = Optional.ofNullable(servletContextPath).orElse(StringUtils.EMPTY);
|
||||
String url = String.format(DOWNLOAD_URL_PATTERN, NetUtils.getLocalHost(), port, path, instanceId);
|
||||
log.info("[InstanceLog-{}] downloadURL for appId[{}]: {}", instanceId, appId, url);
|
||||
return url;
|
||||
}
|
||||
|
||||
/**
|
||||
* 下载全部的任务日志文件
|
||||
* @param instanceId 任务实例ID
|
||||
* @return 日志文件
|
||||
* @throws Exception 异常
|
||||
*/
|
||||
public File downloadInstanceLog(long instanceId) throws Exception {
|
||||
Future<File> fileFuture = prepareLogFile(instanceId);
|
||||
return fileFuture.get(1, TimeUnit.MINUTES);
|
||||
}
|
||||
|
||||
/**
|
||||
* 异步准备日志文件
|
||||
* @param instanceId 任务实例ID
|
||||
* @return 异步结果
|
||||
*/
|
||||
private Future<File> prepareLogFile(long instanceId) {
|
||||
return powerJobBackgroundPool.submit(() -> {
|
||||
// 在线日志还在不断更新,需要使用本地数据库中的数据
|
||||
if (instanceId2LastReportTime.containsKey(instanceId)) {
|
||||
return genTemporaryLogFile(instanceId);
|
||||
}
|
||||
return genStableLogFile(instanceId);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 将本地的任务实例运行日志同步到 mongoDB 存储,在任务执行结束后异步执行
|
||||
* @param instanceId 任务实例ID
|
||||
*/
|
||||
@Async(PJThreadPool.BACKGROUND_POOL)
|
||||
public void sync(Long instanceId) {
|
||||
|
||||
Stopwatch sw = Stopwatch.createStarted();
|
||||
try {
|
||||
// 先持久化到本地文件
|
||||
File stableLogFile = genStableLogFile(instanceId);
|
||||
// 将文件推送到 MongoDB
|
||||
|
||||
FileLocation dfsFL = new FileLocation().setBucket(Constants.LOG_BUCKET).setName(genMongoFileName(instanceId));
|
||||
|
||||
try {
|
||||
dFsService.store(new StoreRequest().setLocalFile(stableLogFile).setFileLocation(dfsFL));
|
||||
log.info("[InstanceLog-{}] push local instanceLogs to mongoDB succeed, using: {}.", instanceId, sw.stop());
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLog-{}] push local instanceLogs to mongoDB failed.", instanceId, e);
|
||||
}
|
||||
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLog-{}] sync local instanceLogs failed.", instanceId, e);
|
||||
}
|
||||
// 删除本地数据库数据
|
||||
try {
|
||||
instanceId2LastReportTime.remove(instanceId);
|
||||
CommonUtils.executeWithRetry0(() -> localInstanceLogRepository.deleteByInstanceId(instanceId));
|
||||
log.info("[InstanceLog-{}] delete local instanceLog successfully.", instanceId);
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLog-{}] delete local instanceLog failed.", instanceId, e);
|
||||
}
|
||||
}
|
||||
|
||||
private File genTemporaryLogFile(long instanceId) {
|
||||
String path = genLogFilePath(instanceId, false);
|
||||
int lockId = ("tpFileLock-" + instanceId).hashCode();
|
||||
try {
|
||||
segmentLock.lockInterruptibleSafe(lockId);
|
||||
|
||||
// Stream 需要在事务的包裹之下使用
|
||||
return localTransactionTemplate.execute(status -> {
|
||||
File f = new File(path);
|
||||
// 如果文件存在且有效,则不再重新构建日志文件(这个判断也需要放在锁内,否则构建到一半的文件会被返回)
|
||||
if (f.exists() && (System.currentTimeMillis() - f.lastModified()) < LOG_CACHE_TIME) {
|
||||
return f;
|
||||
}
|
||||
try {
|
||||
// 创建父文件夹(文件在开流时自动会被创建)
|
||||
FileUtils.forceMkdirParent(f);
|
||||
|
||||
// 重新构建文件
|
||||
try (Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId)) {
|
||||
stream2File(allLogStream, f);
|
||||
}
|
||||
return f;
|
||||
}catch (Exception e) {
|
||||
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(f));
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
}finally {
|
||||
segmentLock.unlock(lockId);
|
||||
}
|
||||
}
|
||||
|
||||
private File genStableLogFile(long instanceId) {
|
||||
String path = genLogFilePath(instanceId, true);
|
||||
int lockId = ("stFileLock-" + instanceId).hashCode();
|
||||
try {
|
||||
segmentLock.lockInterruptibleSafe(lockId);
|
||||
|
||||
return localTransactionTemplate.execute(status -> {
|
||||
|
||||
File f = new File(path);
|
||||
if (f.exists()) {
|
||||
return f;
|
||||
}
|
||||
|
||||
try {
|
||||
// 创建父文件夹(文件在开流时自动会被创建)
|
||||
FileUtils.forceMkdirParent(f);
|
||||
|
||||
// 本地存在数据,从本地持久化(对应 SYNC 的情况)
|
||||
if (instanceId2LastReportTime.containsKey(instanceId)) {
|
||||
try (Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId)) {
|
||||
stream2File(allLogStream, f);
|
||||
}
|
||||
}else {
|
||||
|
||||
FileLocation dfl = new FileLocation().setBucket(Constants.LOG_BUCKET).setName(genMongoFileName(instanceId));
|
||||
Optional<FileMeta> dflMetaOpt = dFsService.fetchFileMeta(dfl);
|
||||
if (!dflMetaOpt.isPresent()) {
|
||||
OmsFileUtils.string2File("SYSTEM: There is no online log for this job instance.", f);
|
||||
return f;
|
||||
}
|
||||
|
||||
dFsService.download(new DownloadRequest().setTarget(f).setFileLocation(dfl));
|
||||
}
|
||||
return f;
|
||||
}catch (Exception e) {
|
||||
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(f));
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
}finally {
|
||||
segmentLock.unlock(lockId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 将数据库中存储的日志流转化为磁盘日志文件
|
||||
* @param stream 流
|
||||
* @param logFile 目标日志文件
|
||||
*/
|
||||
private void stream2File(Stream<LocalInstanceLogDO> stream, File logFile) {
|
||||
try (FileWriter fw = new FileWriter(logFile); BufferedWriter bfw = new BufferedWriter(fw)) {
|
||||
stream.forEach(instanceLog -> {
|
||||
try {
|
||||
bfw.write(convertLog(instanceLog) + System.lineSeparator());
|
||||
}catch (Exception ignore) {
|
||||
}
|
||||
});
|
||||
}catch (IOException ie) {
|
||||
ExceptionUtils.rethrow(ie);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 拼接日志 -> 2020-04-29 22:07:10.059 [192.168.1.1:2777] INFO XXX
|
||||
* @param instanceLog 日志对象
|
||||
* @return 字符串
|
||||
*/
|
||||
private static String convertLog(LocalInstanceLogDO instanceLog) {
|
||||
return String.format("%s [%s] %s %s",
|
||||
DATE_FORMAT.format(instanceLog.getLogTime()),
|
||||
instanceLog.getWorkerAddress(),
|
||||
LogLevel.genLogLevelString(instanceLog.getLogLevel()),
|
||||
instanceLog.getLogContent());
|
||||
}
|
||||
|
||||
|
||||
@Async(PJThreadPool.TIMING_POOL)
|
||||
@Scheduled(fixedDelay = 120000)
|
||||
public void timingCheck() {
|
||||
|
||||
// 定时删除秒级任务的日志
|
||||
List<Long> frequentInstanceIds = Lists.newLinkedList();
|
||||
instanceId2LastReportTime.keySet().forEach(instanceId -> {
|
||||
try {
|
||||
JobInfoDO jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(instanceId);
|
||||
if (TimeExpressionType.FREQUENT_TYPES.contains(jobInfo.getTimeExpressionType())) {
|
||||
frequentInstanceIds.add(instanceId);
|
||||
}
|
||||
}catch (Exception ignore) {
|
||||
}
|
||||
});
|
||||
|
||||
if (!CollectionUtils.isEmpty(frequentInstanceIds)) {
|
||||
// 只保留最近10分钟的日志
|
||||
long time = System.currentTimeMillis() - 10 * 60 * 1000;
|
||||
Lists.partition(frequentInstanceIds, 100).forEach(p -> {
|
||||
try {
|
||||
localInstanceLogRepository.deleteByInstanceIdInAndLogTimeLessThan(p, time);
|
||||
}catch (Exception e) {
|
||||
log.warn("[InstanceLogService] delete expired logs for instance: {} failed.", p, e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 删除长时间未 REPORT 的日志(必要性考证中......)
|
||||
}
|
||||
|
||||
|
||||
private static String genLogFilePath(long instanceId, boolean stable) {
|
||||
if (stable) {
|
||||
return OmsFileUtils.genLogDirPath() + String.format("%d-stable.log", instanceId);
|
||||
}else {
|
||||
return OmsFileUtils.genLogDirPath() + String.format("%d-temporary.log", instanceId);
|
||||
}
|
||||
}
|
||||
private static String genMongoFileName(long instanceId) {
|
||||
return String.format("oms-%d.log", instanceId);
|
||||
}
|
||||
|
||||
/**
|
||||
* description 在重跑之前移除老的文件,避免重跑后还看到的是之前的示例日志。
|
||||
* 因为当重跑完读取稳定日志的时候 目前逻辑会先判断本地文件是否存在 若存在则直接返回了 故需要先移除掉
|
||||
* 参考:tech.powerjob.server.core.instance.InstanceLogService#genStableLogFile(long)
|
||||
* @author jian chen jiang
|
||||
* date 2024/2/5 17:01
|
||||
* @param instanceId
|
||||
* @return void
|
||||
*/
|
||||
public void removeOldFile(Long instanceId) {
|
||||
// 库中的数据不删,删了就会丢失全部的历史日志
|
||||
try {
|
||||
//删除本地缓存
|
||||
String s = genLogFilePath(instanceId, true);
|
||||
File file = new File(s);
|
||||
if(!file.exists()){
|
||||
return;
|
||||
}
|
||||
boolean delete = file.delete();
|
||||
if(!delete){
|
||||
log.warn("[InstanceLogService] delete old logs{} for instance: {} failed.", s,instanceId);
|
||||
}
|
||||
//删除临时文件
|
||||
String tempFilePath = genLogFilePath(instanceId, false);
|
||||
File tempFile = new File(tempFilePath);
|
||||
if(!tempFile.exists()){
|
||||
return;
|
||||
}
|
||||
delete = tempFile.delete();
|
||||
if(!delete){
|
||||
log.warn("[InstanceLogService] delete old temp logs{} for instance: {} failed.", s,instanceId);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
log.error("[InstanceLogService] delete old logs for instance[{}] failed.", instanceId, t);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,250 @@
|
||||
package tech.powerjob.server.core.instance;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.model.LifeCycle;
|
||||
import tech.powerjob.common.request.ServerStopInstanceReq;
|
||||
import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.remote.framework.base.URL;
|
||||
import tech.powerjob.server.common.module.WorkerInfo;
|
||||
import tech.powerjob.server.common.timewheel.holder.HashedWheelTimerHolder;
|
||||
import tech.powerjob.server.common.utils.SpringUtils;
|
||||
import tech.powerjob.server.core.alarm.AlarmUtils;
|
||||
import tech.powerjob.server.core.service.UserService;
|
||||
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
|
||||
import tech.powerjob.server.core.alarm.AlarmCenter;
|
||||
import tech.powerjob.server.core.alarm.module.JobInstanceAlarm;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.UserInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.remote.aware.TransportServiceAware;
|
||||
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
|
||||
import tech.powerjob.server.remote.transporter.TransportService;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* 管理被调度的任务实例(状态更新相关)
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/7
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class InstanceManager implements TransportServiceAware {
|
||||
|
||||
private final AlarmCenter alarmCenter;
|
||||
|
||||
private final InstanceLogService instanceLogService;
|
||||
|
||||
private final InstanceMetadataService instanceMetadataService;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final WorkflowInstanceManager workflowInstanceManager;
|
||||
|
||||
private final WorkerClusterQueryService workerClusterQueryService;
|
||||
|
||||
/**
|
||||
* 基础组件通过 aware 注入,避免循环依赖
|
||||
*/
|
||||
private TransportService transportService;
|
||||
|
||||
/**
|
||||
* 更新任务状态
|
||||
* ********************************************
|
||||
* 2021-02-03 modify by Echo009
|
||||
* 实例的执行次数统一在这里管理,对于非固定频率的任务
|
||||
* 当 db 中实例的状态为等待派发时,runningTimes + 1
|
||||
* ********************************************
|
||||
*
|
||||
* @param req TaskTracker上报任务实例状态的请求
|
||||
*/
|
||||
public void updateStatus(TaskTrackerReportInstanceStatusReq req) throws ExecutionException {
|
||||
|
||||
Long instanceId = req.getInstanceId();
|
||||
// 获取相关数据
|
||||
JobInfoDO jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(req.getInstanceId());
|
||||
InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
|
||||
if (instanceInfo == null) {
|
||||
log.warn("[InstanceManager-{}] can't find InstanceInfo from database", instanceId);
|
||||
return;
|
||||
}
|
||||
|
||||
// 考虑极端情况:Processor 处理耗时小于 server 写 DB 耗时,会导致状态上报时无 taskTracker 地址,此处等待后重新从DB获取数据 GitHub#620
|
||||
if (StringUtils.isEmpty(instanceInfo.getTaskTrackerAddress())) {
|
||||
log.warn("[InstanceManager-{}] TaskTrackerAddress is empty, server will wait then acquire again!", instanceId);
|
||||
CommonUtils.easySleep(277);
|
||||
instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
|
||||
}
|
||||
|
||||
int originStatus = instanceInfo.getStatus();
|
||||
// 丢弃过期的上报数据
|
||||
if (req.getReportTime() <= instanceInfo.getLastReportTime()) {
|
||||
log.warn("[InstanceManager-{}] receive the expired status report request: {}, this report will be dropped.", instanceId, req);
|
||||
return;
|
||||
}
|
||||
// 丢弃非目标 TaskTracker 的上报数据(脑裂情况)
|
||||
if (!req.getSourceAddress().equals(instanceInfo.getTaskTrackerAddress())) {
|
||||
log.warn("[InstanceManager-{}] receive the other TaskTracker's report: {}, but current TaskTracker is {}, this report will be dropped.", instanceId, req, instanceInfo.getTaskTrackerAddress());
|
||||
return;
|
||||
}
|
||||
|
||||
InstanceStatus receivedInstanceStatus = InstanceStatus.of(req.getInstanceStatus());
|
||||
Integer timeExpressionType = jobInfo.getTimeExpressionType();
|
||||
// 更新 最后上报时间 和 修改时间
|
||||
instanceInfo.setLastReportTime(req.getReportTime());
|
||||
instanceInfo.setGmtModified(new Date());
|
||||
|
||||
// FREQUENT 任务没有失败重试机制,TaskTracker一直运行即可,只需要将存活信息同步到DB即可
|
||||
// FREQUENT 任务的 newStatus 只有2中情况,一种是 RUNNING,一种是 FAILED(表示该机器 overload,需要重新选一台机器执行)
|
||||
// 综上,直接把 status 和 runningNum 同步到DB即可
|
||||
if (TimeExpressionType.FREQUENT_TYPES.contains(timeExpressionType)) {
|
||||
// 如果实例处于失败状态,则说明该 worker 失联了一段时间,被 server 判定为宕机,而此时该秒级任务有可能已经重新派发了,故需要 Kill 掉该实例
|
||||
// fix issue 375
|
||||
if (instanceInfo.getStatus() == InstanceStatus.FAILED.getV()) {
|
||||
log.warn("[InstanceManager-{}] receive TaskTracker's report: {}, but current instance is already failed, this instance should be killed.", instanceId, req);
|
||||
stopInstance(instanceId, instanceInfo);
|
||||
return;
|
||||
}
|
||||
LifeCycle lifeCycle = LifeCycle.parse(jobInfo.getLifecycle());
|
||||
// 检查生命周期是否已结束
|
||||
if (lifeCycle.getEnd() != null && lifeCycle.getEnd() <= System.currentTimeMillis()) {
|
||||
stopInstance(instanceId, instanceInfo);
|
||||
instanceInfo.setStatus(InstanceStatus.SUCCEED.getV());
|
||||
} else {
|
||||
instanceInfo.setStatus(receivedInstanceStatus.getV());
|
||||
}
|
||||
instanceInfo.setResult(req.getResult());
|
||||
instanceInfo.setRunningTimes(req.getTotalTaskNum());
|
||||
instanceInfoRepository.saveAndFlush(instanceInfo);
|
||||
// 任务需要告警
|
||||
if (req.isNeedAlert()) {
|
||||
log.info("[InstanceManager-{}] receive frequent task alert req,time:{},content:{}", instanceId, req.getReportTime(), req.getAlertContent());
|
||||
alert(instanceId, req.getAlertContent());
|
||||
}
|
||||
return;
|
||||
}
|
||||
// 更新运行次数
|
||||
if (instanceInfo.getStatus() == InstanceStatus.WAITING_WORKER_RECEIVE.getV()) {
|
||||
// 这里不会存在并发问题
|
||||
instanceInfo.setRunningTimes(instanceInfo.getRunningTimes() + 1);
|
||||
}
|
||||
// QAQ ,不能提前变更 status,否则会导致更新运行次数的逻辑不生效继而导致普通任务 无限重试
|
||||
instanceInfo.setStatus(receivedInstanceStatus.getV());
|
||||
|
||||
boolean finished = false;
|
||||
if (receivedInstanceStatus == InstanceStatus.SUCCEED) {
|
||||
instanceInfo.setResult(req.getResult());
|
||||
instanceInfo.setFinishedTime(req.getEndTime() == null ? System.currentTimeMillis() : req.getEndTime());
|
||||
finished = true;
|
||||
} else if (receivedInstanceStatus == InstanceStatus.FAILED) {
|
||||
|
||||
// 当前重试次数 <= 最大重试次数,进行重试 (第一次运行,runningTimes为1,重试一次,instanceRetryNum也为1,故需要 =)
|
||||
if (instanceInfo.getRunningTimes() <= jobInfo.getInstanceRetryNum()) {
|
||||
|
||||
log.info("[InstanceManager-{}] instance execute failed but will take the {}th retry.", instanceId, instanceInfo.getRunningTimes());
|
||||
|
||||
// 延迟10S重试(由于重试不改变 instanceId,如果派发到同一台机器,上一个 TaskTracker 还处于资源释放阶段,无法创建新的TaskTracker,任务失败)
|
||||
instanceInfo.setExpectedTriggerTime(System.currentTimeMillis() + 10000);
|
||||
|
||||
// 修改状态为 等待派发,正式开始重试
|
||||
// 问题:会丢失以往的调度记录(actualTriggerTime什么的都会被覆盖)
|
||||
instanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
|
||||
} else {
|
||||
instanceInfo.setResult(req.getResult());
|
||||
instanceInfo.setFinishedTime(req.getEndTime() == null ? System.currentTimeMillis() : req.getEndTime());
|
||||
finished = true;
|
||||
log.info("[InstanceManager-{}] instance execute failed and have no chance to retry.", instanceId);
|
||||
}
|
||||
}
|
||||
if (finished) {
|
||||
// 最终状态允许直接覆盖更新
|
||||
instanceInfoRepository.saveAndFlush(instanceInfo);
|
||||
// 这里的 InstanceStatus 只有 成功/失败 两种,手动停止不会由 TaskTracker 上报
|
||||
processFinishedInstance(instanceId, req.getWfInstanceId(), receivedInstanceStatus, req.getResult());
|
||||
return;
|
||||
}
|
||||
// 带条件更新
|
||||
final int i = instanceInfoRepository.updateStatusChangeInfoByInstanceIdAndStatus(instanceInfo.getLastReportTime(), instanceInfo.getGmtModified(), instanceInfo.getRunningTimes(), instanceInfo.getStatus(), instanceInfo.getInstanceId(), originStatus);
|
||||
if (i == 0) {
|
||||
log.warn("[InstanceManager-{}] update instance status failed, maybe the instance status has been changed by other thread. discard this status change,{}", instanceId, instanceInfo);
|
||||
}
|
||||
}
|
||||
|
||||
private void stopInstance(Long instanceId, InstanceInfoDO instanceInfo) {
|
||||
Optional<WorkerInfo> workerInfoOpt = workerClusterQueryService.getWorkerInfoByAddress(instanceInfo.getAppId(), instanceInfo.getTaskTrackerAddress());
|
||||
if (workerInfoOpt.isPresent()) {
|
||||
ServerStopInstanceReq stopInstanceReq = new ServerStopInstanceReq(instanceId);
|
||||
WorkerInfo workerInfo = workerInfoOpt.get();
|
||||
final URL url = ServerURLFactory.stopInstance2Worker(workerInfo.getAddress());
|
||||
transportService.tell(workerInfo.getProtocol(), url, stopInstanceReq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 收尾完成的任务实例
|
||||
*
|
||||
* @param instanceId 任务实例ID
|
||||
* @param wfInstanceId 工作流实例ID,非必须
|
||||
* @param status 任务状态,有 成功/失败/手动停止
|
||||
* @param result 执行结果
|
||||
*/
|
||||
public void processFinishedInstance(Long instanceId, Long wfInstanceId, InstanceStatus status, String result) {
|
||||
|
||||
log.info("[Instance-{}] process finished, final status is {}.", instanceId, status.name());
|
||||
|
||||
// 上报日志数据
|
||||
HashedWheelTimerHolder.INACCURATE_TIMER.schedule(() -> instanceLogService.sync(instanceId), 60, TimeUnit.SECONDS);
|
||||
|
||||
// workflow 特殊处理
|
||||
if (wfInstanceId != null) {
|
||||
// 手动停止在工作流中也认为是失败(理论上不应该发生)
|
||||
workflowInstanceManager.move(wfInstanceId, instanceId, status, result);
|
||||
}
|
||||
|
||||
// 告警
|
||||
if (status == InstanceStatus.FAILED) {
|
||||
alert(instanceId, result);
|
||||
}
|
||||
// 主动移除缓存,减小内存占用
|
||||
instanceMetadataService.invalidateJobInfo(instanceId);
|
||||
}
|
||||
|
||||
private void alert(Long instanceId, String alertContent) {
|
||||
InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
|
||||
JobInfoDO jobInfo;
|
||||
try {
|
||||
jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(instanceId);
|
||||
} catch (Exception e) {
|
||||
log.warn("[InstanceManager-{}] can't find jobInfo, alarm failed.", instanceId);
|
||||
return;
|
||||
}
|
||||
JobInstanceAlarm content = new JobInstanceAlarm();
|
||||
BeanUtils.copyProperties(jobInfo, content);
|
||||
BeanUtils.copyProperties(instanceInfo, content);
|
||||
List<UserInfoDO> userList = SpringUtils.getBean(UserService.class).fetchNotifyUserList(jobInfo.getNotifyUserIds());
|
||||
if (!StringUtils.isEmpty(alertContent)) {
|
||||
content.setResult(alertContent);
|
||||
}
|
||||
alarmCenter.alarmFailed(content, AlarmUtils.convertUserInfoList2AlarmTargetList(userList));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTransportService(TransportService transportService) {
|
||||
this.transportService = transportService;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,84 @@
|
||||
package tech.powerjob.server.core.instance;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import org.springframework.beans.factory.InitializingBean;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
/**
|
||||
* 存储 instance 对应的 JobInfo 信息
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/6/23
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class InstanceMetadataService implements InitializingBean {
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
/**
|
||||
* 缓存,一旦生成任务实例,其对应的 JobInfo 不应该再改变(即使源数据改变)
|
||||
*/
|
||||
private Cache<Long, JobInfoDO> instanceId2JobInfoCache;
|
||||
|
||||
@Value("${oms.instance.metadata.cache.size}")
|
||||
private int instanceMetadataCacheSize;
|
||||
private static final int CACHE_CONCURRENCY_LEVEL = 16;
|
||||
|
||||
@Override
|
||||
public void afterPropertiesSet() throws Exception {
|
||||
instanceId2JobInfoCache = CacheBuilder.newBuilder()
|
||||
.concurrencyLevel(CACHE_CONCURRENCY_LEVEL)
|
||||
.maximumSize(instanceMetadataCacheSize)
|
||||
.softValues()
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 instanceId 获取 JobInfo
|
||||
* @param instanceId instanceId
|
||||
* @return JobInfoDO
|
||||
* @throws ExecutionException 异常
|
||||
*/
|
||||
public JobInfoDO fetchJobInfoByInstanceId(Long instanceId) throws ExecutionException {
|
||||
return instanceId2JobInfoCache.get(instanceId, () -> {
|
||||
InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
|
||||
if (instanceInfo != null) {
|
||||
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(instanceInfo.getJobId());
|
||||
return jobInfoOpt.orElseThrow(() -> new IllegalArgumentException("can't find JobInfo by jobId: " + instanceInfo.getJobId()));
|
||||
}
|
||||
throw new IllegalArgumentException("can't find Instance by instanceId: " + instanceId);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 装载缓存
|
||||
* @param instanceId instanceId
|
||||
* @param jobInfoDO 原始的任务数据
|
||||
*/
|
||||
public void loadJobInfo(Long instanceId, JobInfoDO jobInfoDO) {
|
||||
instanceId2JobInfoCache.put(instanceId, jobInfoDO);
|
||||
}
|
||||
|
||||
/**
|
||||
* 失效缓存
|
||||
* @param instanceId instanceId
|
||||
*/
|
||||
public void invalidateJobInfo(Long instanceId) {
|
||||
instanceId2JobInfoCache.invalidate(instanceId);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,351 @@
|
||||
package tech.powerjob.server.core.instance;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.RemoteConstant;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.InstanceDetail;
|
||||
import tech.powerjob.common.model.InstanceMeta;
|
||||
import tech.powerjob.common.request.ServerQueryInstanceStatusReq;
|
||||
import tech.powerjob.common.request.ServerStopInstanceReq;
|
||||
import tech.powerjob.common.request.query.InstancePageQuery;
|
||||
import tech.powerjob.common.response.AskResponse;
|
||||
import tech.powerjob.common.response.InstanceInfoDTO;
|
||||
import tech.powerjob.common.response.PageResult;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.remote.framework.base.URL;
|
||||
import tech.powerjob.server.common.constants.InstanceType;
|
||||
import tech.powerjob.server.common.module.WorkerInfo;
|
||||
import tech.powerjob.server.common.timewheel.TimerFuture;
|
||||
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
|
||||
import tech.powerjob.server.core.DispatchService;
|
||||
import tech.powerjob.server.core.uid.IdGenerateService;
|
||||
import tech.powerjob.server.persistence.QueryConvertUtils;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import tech.powerjob.server.remote.server.redirector.DesignateServer;
|
||||
import tech.powerjob.server.remote.transporter.TransportService;
|
||||
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static tech.powerjob.common.enums.InstanceStatus.RUNNING;
|
||||
import static tech.powerjob.common.enums.InstanceStatus.STOPPED;
|
||||
|
||||
/**
|
||||
* 任务运行实例服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/11
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class InstanceService {
|
||||
|
||||
private final TransportService transportService;
|
||||
|
||||
private final DispatchService dispatchService;
|
||||
|
||||
private final IdGenerateService idGenerateService;
|
||||
|
||||
private final InstanceManager instanceManager;
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final WorkerClusterQueryService workerClusterQueryService;
|
||||
|
||||
private final InstanceLogService instanceLogService;
|
||||
|
||||
/**
|
||||
* 创建任务实例(注意,该方法并不调用 saveAndFlush,如果有需要立即同步到DB的需求,请在方法结束后手动调用 flush)
|
||||
* ********************************************
|
||||
* 2021-02-03 modify by Echo009
|
||||
* 新增 jobParams ,每次均记录任务静态参数
|
||||
* ********************************************
|
||||
*
|
||||
* @param jobId 任务ID
|
||||
* @param appId 所属应用ID
|
||||
* @param jobParams 任务静态参数
|
||||
* @param instanceParams 任务实例参数,仅 OpenAPI 创建 或者 工作流任务 时存在
|
||||
* @param wfInstanceId 工作流任务实例ID,仅工作流下的任务实例存在
|
||||
* @param expectTriggerTime 预期执行时间
|
||||
* @return 任务实例ID
|
||||
*/
|
||||
public InstanceInfoDO create(Long jobId, Long appId, String jobParams, String instanceParams, Long wfInstanceId, Long expectTriggerTime, String outerKey, String extendValue) {
|
||||
|
||||
Long instanceId = idGenerateService.allocate();
|
||||
Date now = new Date();
|
||||
|
||||
InstanceInfoDO newInstanceInfo = new InstanceInfoDO();
|
||||
newInstanceInfo.setJobId(jobId);
|
||||
newInstanceInfo.setAppId(appId);
|
||||
newInstanceInfo.setInstanceId(instanceId);
|
||||
newInstanceInfo.setJobParams(jobParams);
|
||||
newInstanceInfo.setInstanceParams(instanceParams);
|
||||
newInstanceInfo.setType(wfInstanceId == null ? InstanceType.NORMAL.getV() : InstanceType.WORKFLOW.getV());
|
||||
newInstanceInfo.setWfInstanceId(wfInstanceId);
|
||||
|
||||
newInstanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
|
||||
newInstanceInfo.setRunningTimes(0L);
|
||||
newInstanceInfo.setExpectedTriggerTime(expectTriggerTime);
|
||||
newInstanceInfo.setLastReportTime(-1L);
|
||||
newInstanceInfo.setOuterKey(outerKey);
|
||||
newInstanceInfo.setExtendValue(extendValue);
|
||||
newInstanceInfo.setGmtCreate(now);
|
||||
newInstanceInfo.setGmtModified(now);
|
||||
|
||||
// 写入调度元信息
|
||||
InstanceMeta instanceMeta = new InstanceMeta();
|
||||
instanceMeta.setEtt(expectTriggerTime);
|
||||
newInstanceInfo.setMeta(JsonUtils.toJSONString(instanceMeta));
|
||||
|
||||
instanceInfoRepository.save(newInstanceInfo);
|
||||
return newInstanceInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* 停止任务实例
|
||||
*
|
||||
* @param instanceId 任务实例ID
|
||||
*/
|
||||
@DesignateServer
|
||||
public void stopInstance(Long appId,Long instanceId) {
|
||||
|
||||
log.info("[Instance-{}] try to stop the instance instance in appId: {}", instanceId,appId);
|
||||
try {
|
||||
|
||||
InstanceInfoDO instanceInfo = fetchInstanceInfo(instanceId);
|
||||
// 判断状态,只有运行中才能停止
|
||||
if (!InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(instanceInfo.getStatus())) {
|
||||
throw new IllegalArgumentException("can't stop finished instance!");
|
||||
}
|
||||
|
||||
// 更新数据库,将状态置为停止
|
||||
instanceInfo.setStatus(STOPPED.getV());
|
||||
instanceInfo.setGmtModified(new Date());
|
||||
instanceInfo.setFinishedTime(System.currentTimeMillis());
|
||||
instanceInfo.setResult(SystemInstanceResult.STOPPED_BY_USER);
|
||||
instanceInfoRepository.saveAndFlush(instanceInfo);
|
||||
|
||||
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), STOPPED, SystemInstanceResult.STOPPED_BY_USER);
|
||||
|
||||
/*
|
||||
不可靠通知停止 TaskTracker
|
||||
假如没有成功关闭,之后 TaskTracker 会再次 reportStatus,按照流程,instanceLog 会被更新为 RUNNING,开发者可以再次手动关闭
|
||||
*/
|
||||
Optional<WorkerInfo> workerInfoOpt = workerClusterQueryService.getWorkerInfoByAddress(instanceInfo.getAppId(), instanceInfo.getTaskTrackerAddress());
|
||||
if (workerInfoOpt.isPresent()) {
|
||||
ServerStopInstanceReq req = new ServerStopInstanceReq(instanceId);
|
||||
WorkerInfo workerInfo = workerInfoOpt.get();
|
||||
transportService.tell(workerInfo.getProtocol(), ServerURLFactory.stopInstance2Worker(workerInfo.getAddress()), req);
|
||||
log.info("[Instance-{}] update instanceInfo and send 'stopInstance' request succeed.", instanceId);
|
||||
} else {
|
||||
log.warn("[Instance-{}] update instanceInfo successfully but can't find TaskTracker to stop instance", instanceId);
|
||||
}
|
||||
|
||||
} catch (IllegalArgumentException ie) {
|
||||
throw ie;
|
||||
} catch (Exception e) {
|
||||
log.error("[Instance-{}] stopInstance failed.", instanceId, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 重试任务(只有结束的任务运行重试)
|
||||
*
|
||||
* @param instanceId 任务实例ID
|
||||
*/
|
||||
@DesignateServer
|
||||
public void retryInstance(Long appId, Long instanceId) {
|
||||
|
||||
log.info("[Instance-{}] retry instance in appId: {}", instanceId, appId);
|
||||
|
||||
InstanceInfoDO instanceInfo = fetchInstanceInfo(instanceId);
|
||||
if (!InstanceStatus.FINISHED_STATUS.contains(instanceInfo.getStatus())) {
|
||||
throw new PowerJobException("Only stopped instance can be retry!");
|
||||
}
|
||||
// 暂时不支持工作流任务的重试
|
||||
if (instanceInfo.getWfInstanceId() != null) {
|
||||
throw new PowerJobException("Workflow's instance do not support retry!");
|
||||
}
|
||||
|
||||
instanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
|
||||
instanceInfo.setExpectedTriggerTime(System.currentTimeMillis());
|
||||
instanceInfo.setFinishedTime(null);
|
||||
instanceInfo.setActualTriggerTime(null);
|
||||
instanceInfo.setTaskTrackerAddress(null);
|
||||
instanceInfo.setResult(null);
|
||||
instanceInfoRepository.saveAndFlush(instanceInfo);
|
||||
|
||||
// 派发任务
|
||||
Long jobId = instanceInfo.getJobId();
|
||||
JobInfoDO jobInfo = jobInfoRepository.findById(jobId).orElseThrow(() -> new PowerJobException("can't find job info by jobId: " + jobId));
|
||||
//删除掉之前的日志文件
|
||||
instanceLogService.removeOldFile(instanceId);
|
||||
|
||||
dispatchService.dispatch(jobInfo, instanceId,Optional.of(instanceInfo),Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* 取消任务实例的运行
|
||||
* 接口使用条件:调用接口时间与待取消任务的预计执行时间有一定时间间隔,否则不保证可靠性!
|
||||
*
|
||||
* @param instanceId 任务实例
|
||||
*/
|
||||
@DesignateServer
|
||||
public void cancelInstance(Long appId, Long instanceId) {
|
||||
log.info("[Instance-{}] try to cancel the instance with appId {}.", instanceId, appId);
|
||||
|
||||
try {
|
||||
InstanceInfoDO instanceInfo = fetchInstanceInfo(instanceId);
|
||||
TimerFuture timerFuture = InstanceTimeWheelService.fetchTimerFuture(instanceId);
|
||||
|
||||
boolean success;
|
||||
// 本机时间轮中存在该任务且顺利取消,抢救成功!
|
||||
if (timerFuture != null) {
|
||||
success = timerFuture.cancel();
|
||||
} else {
|
||||
// 调用该接口时间和预计调度时间相近时,理论上会出现问题,cancel 状态还没写进去另一边就完成了 dispatch,随后状态会被覆盖
|
||||
// 解决该问题的成本极高(分布式锁),因此选择不解决
|
||||
// 该接口使用条件:调用接口时间与待取消任务的预计执行时间有一定时间间隔,否则不保证可靠性
|
||||
success = InstanceStatus.WAITING_DISPATCH.getV() == instanceInfo.getStatus();
|
||||
}
|
||||
|
||||
if (success) {
|
||||
instanceInfo.setStatus(InstanceStatus.CANCELED.getV());
|
||||
instanceInfo.setResult(SystemInstanceResult.CANCELED_BY_USER);
|
||||
// 如果写 DB 失败,抛异常,接口返回 false,即取消失败,任务会被 HA 机制重新调度执行,因此此处不需要任何处理
|
||||
instanceInfoRepository.saveAndFlush(instanceInfo);
|
||||
log.info("[Instance-{}] cancel the instance successfully.", instanceId);
|
||||
} else {
|
||||
log.warn("[Instance-{}] cancel the instance failed.", instanceId);
|
||||
throw new PowerJobException("instance already up and running");
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[Instance-{}] cancelInstance failed.", instanceId, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public PageResult<InstanceInfoDTO> queryInstanceInfo(InstancePageQuery instancePageQuery) {
|
||||
Specification<InstanceInfoDO> specification = QueryConvertUtils.toSpecification(instancePageQuery);
|
||||
Pageable pageable = QueryConvertUtils.toPageable(instancePageQuery);
|
||||
Page<InstanceInfoDO> instanceInfoDOPage = instanceInfoRepository.findAll(specification, pageable);
|
||||
|
||||
PageResult<InstanceInfoDTO> ret = new PageResult<>();
|
||||
List<InstanceInfoDTO> instanceInfoDTOList = instanceInfoDOPage.get().map(InstanceService::directConvert).collect(Collectors.toList());
|
||||
|
||||
ret.setData(instanceInfoDTOList)
|
||||
.setIndex(instanceInfoDOPage.getNumber())
|
||||
.setPageSize(instanceInfoDOPage.getSize())
|
||||
.setTotalPages(instanceInfoDOPage.getTotalPages())
|
||||
.setTotalItems(instanceInfoDOPage.getTotalElements());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务实例的信息
|
||||
*
|
||||
* @param instanceId 任务实例ID
|
||||
* @return 任务实例的信息
|
||||
*/
|
||||
public InstanceInfoDTO getInstanceInfo(Long instanceId) {
|
||||
return directConvert(fetchInstanceInfo(instanceId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务实例的状态
|
||||
*
|
||||
* @param instanceId 任务实例ID
|
||||
* @return 任务实例的状态
|
||||
*/
|
||||
public InstanceStatus getInstanceStatus(Long instanceId) {
|
||||
InstanceInfoDO instanceInfoDO = fetchInstanceInfo(instanceId);
|
||||
return InstanceStatus.of(instanceInfoDO.getStatus());
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务实例的详细运行详细
|
||||
*
|
||||
* @param appId 用于远程 server 路由,勿删!
|
||||
* @param instanceId 任务实例ID
|
||||
* @return 详细运行状态
|
||||
*/
|
||||
@DesignateServer
|
||||
public InstanceDetail getInstanceDetail(Long appId, Long instanceId, String customQuery) {
|
||||
|
||||
InstanceInfoDO instanceInfoDO = fetchInstanceInfo(instanceId);
|
||||
|
||||
InstanceStatus instanceStatus = InstanceStatus.of(instanceInfoDO.getStatus());
|
||||
|
||||
InstanceDetail detail = new InstanceDetail();
|
||||
detail.setStatus(instanceStatus.getV());
|
||||
|
||||
// 只要不是运行状态,只需要返回简要信息
|
||||
if (instanceStatus != RUNNING) {
|
||||
BeanUtils.copyProperties(instanceInfoDO, detail);
|
||||
return detail;
|
||||
}
|
||||
|
||||
Optional<WorkerInfo> workerInfoOpt = workerClusterQueryService.getWorkerInfoByAddress(instanceInfoDO.getAppId(), instanceInfoDO.getTaskTrackerAddress());
|
||||
if (workerInfoOpt.isPresent()) {
|
||||
WorkerInfo workerInfo = workerInfoOpt.get();
|
||||
ServerQueryInstanceStatusReq req = new ServerQueryInstanceStatusReq(instanceId, customQuery);
|
||||
try {
|
||||
final URL url = ServerURLFactory.queryInstance2Worker(workerInfo.getAddress());
|
||||
AskResponse askResponse = transportService.ask(workerInfo.getProtocol(), url, req, AskResponse.class)
|
||||
.toCompletableFuture()
|
||||
.get(RemoteConstant.DEFAULT_TIMEOUT_MS, TimeUnit.MILLISECONDS);
|
||||
if (askResponse.isSuccess()) {
|
||||
InstanceDetail instanceDetail = askResponse.getData(InstanceDetail.class);
|
||||
instanceDetail.setRunningTimes(instanceInfoDO.getRunningTimes());
|
||||
instanceDetail.setInstanceParams(instanceInfoDO.getInstanceParams());
|
||||
return instanceDetail;
|
||||
}else {
|
||||
log.warn("[Instance-{}] ask InstanceStatus from TaskTracker failed, the message is {}.", instanceId, askResponse.getMessage());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[Instance-{}] ask InstanceStatus from TaskTracker failed, exception is {}", instanceId, e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
// 失败则返回基础版信息
|
||||
BeanUtils.copyProperties(instanceInfoDO, detail);
|
||||
return detail;
|
||||
}
|
||||
|
||||
private InstanceInfoDO fetchInstanceInfo(Long instanceId) {
|
||||
InstanceInfoDO instanceInfoDO = instanceInfoRepository.findByInstanceId(instanceId);
|
||||
if (instanceInfoDO == null) {
|
||||
log.warn("[Instance-{}] can't find InstanceInfo by instanceId", instanceId);
|
||||
throw new IllegalArgumentException("invalid instanceId: " + instanceId);
|
||||
}
|
||||
return instanceInfoDO;
|
||||
}
|
||||
|
||||
private static InstanceInfoDTO directConvert(InstanceInfoDO instanceInfoDO) {
|
||||
InstanceInfoDTO instanceInfoDTO = new InstanceInfoDTO();
|
||||
BeanUtils.copyProperties(instanceInfoDO, instanceInfoDTO);
|
||||
return instanceInfoDTO;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,74 @@
|
||||
package tech.powerjob.server.core.lock;
|
||||
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.common.utils.NetUtils;
|
||||
import tech.powerjob.server.extension.LockService;
|
||||
import tech.powerjob.server.persistence.remote.model.OmsLockDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.OmsLockRepository;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.dao.DataIntegrityViolationException;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* 基于数据库实现的分布式锁
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/5
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class DatabaseLockService implements LockService {
|
||||
|
||||
private final String ownerIp;
|
||||
|
||||
private final OmsLockRepository omsLockRepository;
|
||||
|
||||
@Autowired
|
||||
public DatabaseLockService(OmsLockRepository omsLockRepository) {
|
||||
|
||||
this.ownerIp = NetUtils.getLocalHost();
|
||||
this.omsLockRepository = omsLockRepository;
|
||||
|
||||
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
|
||||
int num = omsLockRepository.deleteByOwnerIP(ownerIp);
|
||||
log.info("[DatabaseLockService] execute shutdown hook, release all lock(owner={},num={})", ownerIp, num);
|
||||
}));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tryLock(String name, long maxLockTime) {
|
||||
|
||||
OmsLockDO newLock = new OmsLockDO(name, ownerIp, maxLockTime);
|
||||
try {
|
||||
omsLockRepository.saveAndFlush(newLock);
|
||||
return true;
|
||||
} catch (DataIntegrityViolationException ignore) {
|
||||
} catch (Exception e) {
|
||||
log.warn("[DatabaseLockService] write lock to database failed, lockName = {}.", name, e);
|
||||
}
|
||||
|
||||
OmsLockDO omsLockDO = omsLockRepository.findByLockName(name);
|
||||
long lockedMillions = System.currentTimeMillis() - omsLockDO.getGmtCreate().getTime();
|
||||
|
||||
// 锁超时,强制释放锁并重新尝试获取
|
||||
if (lockedMillions > omsLockDO.getMaxLockTime()) {
|
||||
|
||||
log.warn("[DatabaseLockService] The lock[{}] already timeout, will be unlocked now.", omsLockDO);
|
||||
unlock(name);
|
||||
return tryLock(name, maxLockTime);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unlock(String name) {
|
||||
|
||||
try {
|
||||
CommonUtils.executeWithRetry0(() -> omsLockRepository.deleteByLockName(name));
|
||||
}catch (Exception e) {
|
||||
log.error("[DatabaseLockService] unlock {} failed.", name, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,24 @@
|
||||
package tech.powerjob.server.core.lock;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
/**
|
||||
* use cached lock to make concurrent safe
|
||||
*
|
||||
* @author tjq
|
||||
* @author Echo009
|
||||
* @since 1/16/21
|
||||
*/
|
||||
@Target(ElementType.METHOD)
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface UseCacheLock {
|
||||
|
||||
String type();
|
||||
|
||||
String key();
|
||||
|
||||
int concurrencyLevel();
|
||||
}
|
||||
@ -0,0 +1,83 @@
|
||||
package tech.powerjob.server.core.lock;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.collect.Maps;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.aspectj.lang.ProceedingJoinPoint;
|
||||
import org.aspectj.lang.annotation.Around;
|
||||
import org.aspectj.lang.annotation.Aspect;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.server.common.utils.AOPUtils;
|
||||
import tech.powerjob.server.monitor.MonitorService;
|
||||
import tech.powerjob.server.monitor.events.lock.SlowLockEvent;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
* aspect for @UseSegmentLock
|
||||
*
|
||||
* @author tjq
|
||||
* @since 1/16/21
|
||||
*/
|
||||
@Slf4j
|
||||
@Aspect
|
||||
@Component
|
||||
@Order(1)
|
||||
@RequiredArgsConstructor
|
||||
public class UseCacheLockAspect {
|
||||
|
||||
private final MonitorService monitorService;
|
||||
|
||||
private final Map<String, Cache<String, ReentrantLock>> lockContainer = Maps.newConcurrentMap();
|
||||
|
||||
private static final long SLOW_THRESHOLD = 100;
|
||||
|
||||
@Around(value = "@annotation(useCacheLock))")
|
||||
public Object execute(ProceedingJoinPoint point, UseCacheLock useCacheLock) throws Throwable {
|
||||
Cache<String, ReentrantLock> lockCache = lockContainer.computeIfAbsent(useCacheLock.type(), ignore -> {
|
||||
int concurrencyLevel = useCacheLock.concurrencyLevel();
|
||||
log.info("[UseSegmentLockAspect] create Lock Cache for [{}] with concurrencyLevel: {}", useCacheLock.type(), concurrencyLevel);
|
||||
return CacheBuilder.newBuilder()
|
||||
.initialCapacity(300000)
|
||||
.maximumSize(500000)
|
||||
.concurrencyLevel(concurrencyLevel)
|
||||
.expireAfterWrite(30, TimeUnit.MINUTES)
|
||||
.build();
|
||||
});
|
||||
final Method method = AOPUtils.parseMethod(point);
|
||||
Long key = AOPUtils.parseSpEl(method, point.getArgs(), useCacheLock.key(), Long.class, 1L);
|
||||
final ReentrantLock reentrantLock = lockCache.get(String.valueOf(key), ReentrantLock::new);
|
||||
long start = System.currentTimeMillis();
|
||||
reentrantLock.lockInterruptibly();
|
||||
try {
|
||||
long timeCost = System.currentTimeMillis() - start;
|
||||
if (timeCost > SLOW_THRESHOLD) {
|
||||
|
||||
final SlowLockEvent slowLockEvent = new SlowLockEvent()
|
||||
.setType(SlowLockEvent.Type.LOCAL)
|
||||
.setLockType(useCacheLock.type())
|
||||
.setLockKey(String.valueOf(key))
|
||||
.setCallerService(method.getDeclaringClass().getSimpleName())
|
||||
.setCallerMethod(method.getName())
|
||||
.setCost(timeCost);
|
||||
|
||||
monitorService.monitor(slowLockEvent);
|
||||
|
||||
log.warn("[UseSegmentLockAspect] wait lock for method({}#{}) cost {} ms! key = '{}', args = {}, ", method.getDeclaringClass().getSimpleName(), method.getName(), timeCost,
|
||||
key,
|
||||
JSON.toJSONString(point.getArgs()));
|
||||
}
|
||||
return point.proceed();
|
||||
} finally {
|
||||
reentrantLock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,206 @@
|
||||
package tech.powerjob.server.core.scheduler;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Stopwatch;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowInstanceStatus;
|
||||
import tech.powerjob.server.common.constants.PJThreadPool;
|
||||
import tech.powerjob.server.common.utils.OmsFileUtils;
|
||||
import tech.powerjob.server.extension.LockService;
|
||||
import tech.powerjob.server.extension.dfs.DFsService;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
|
||||
import tech.powerjob.server.persistence.storage.Constants;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterManagerService;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* CCO(Chief Clean Officer)
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/5/18
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class CleanService {
|
||||
|
||||
private final DFsService dFsService;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
|
||||
|
||||
private final WorkflowNodeInfoRepository workflowNodeInfoRepository;
|
||||
|
||||
private final LockService lockService;
|
||||
|
||||
private final int instanceInfoRetentionDay;
|
||||
|
||||
private final int localContainerRetentionDay;
|
||||
|
||||
private final int remoteContainerRetentionDay;
|
||||
|
||||
private static final int TEMPORARY_RETENTION_DAY = 3;
|
||||
|
||||
/**
|
||||
* 每天凌晨3点定时清理
|
||||
*/
|
||||
private static final String CLEAN_TIME_EXPRESSION = "0 0 3 * * ?";
|
||||
|
||||
private static final String HISTORY_DELETE_LOCK = "history_delete_lock";
|
||||
|
||||
public CleanService(DFsService dFsService, InstanceInfoRepository instanceInfoRepository, WorkflowInstanceInfoRepository workflowInstanceInfoRepository,
|
||||
WorkflowNodeInfoRepository workflowNodeInfoRepository, LockService lockService,
|
||||
@Value("${oms.instanceinfo.retention}") int instanceInfoRetentionDay,
|
||||
@Value("${oms.container.retention.local}") int localContainerRetentionDay,
|
||||
@Value("${oms.container.retention.remote}") int remoteContainerRetentionDay) {
|
||||
this.dFsService = dFsService;
|
||||
this.instanceInfoRepository = instanceInfoRepository;
|
||||
this.workflowInstanceInfoRepository = workflowInstanceInfoRepository;
|
||||
this.workflowNodeInfoRepository = workflowNodeInfoRepository;
|
||||
this.lockService = lockService;
|
||||
this.instanceInfoRetentionDay = instanceInfoRetentionDay;
|
||||
this.localContainerRetentionDay = localContainerRetentionDay;
|
||||
this.remoteContainerRetentionDay = remoteContainerRetentionDay;
|
||||
}
|
||||
|
||||
|
||||
@Async(PJThreadPool.TIMING_POOL)
|
||||
@Scheduled(cron = CLEAN_TIME_EXPRESSION)
|
||||
public void timingClean() {
|
||||
|
||||
// 释放本地缓存
|
||||
WorkerClusterManagerService.cleanUp();
|
||||
|
||||
// 释放磁盘空间
|
||||
cleanLocal(OmsFileUtils.genLogDirPath(), instanceInfoRetentionDay);
|
||||
cleanLocal(OmsFileUtils.genContainerJarPath(), localContainerRetentionDay);
|
||||
cleanLocal(OmsFileUtils.genTemporaryPath(), TEMPORARY_RETENTION_DAY);
|
||||
|
||||
// 删除数据库历史的数据
|
||||
cleanByOneServer();
|
||||
}
|
||||
|
||||
/**
|
||||
* 只能一台server清理的操作统一到这里执行
|
||||
*/
|
||||
private void cleanByOneServer() {
|
||||
// 只要第一个server抢到锁其他server就会返回,所以锁10分钟应该足够了
|
||||
boolean lock = lockService.tryLock(HISTORY_DELETE_LOCK, 10 * 60 * 1000L);
|
||||
if (!lock) {
|
||||
log.info("[CleanService] clean job is already running, just return.");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// 删除数据库运行记录
|
||||
cleanInstanceLog();
|
||||
cleanWorkflowInstanceLog();
|
||||
// 删除无用节点
|
||||
cleanWorkflowNodeInfo();
|
||||
// 删除 GridFS 过期文件
|
||||
cleanRemote(Constants.LOG_BUCKET, instanceInfoRetentionDay);
|
||||
cleanRemote(Constants.CONTAINER_BUCKET, remoteContainerRetentionDay);
|
||||
} finally {
|
||||
lockService.unlock(HISTORY_DELETE_LOCK);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void cleanLocal(String path, int day) {
|
||||
if (day < 0) {
|
||||
log.info("[CleanService] won't clean up {} because of offset day <= 0.", path);
|
||||
return;
|
||||
}
|
||||
|
||||
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||
File dir = new File(path);
|
||||
if (!dir.exists()) {
|
||||
return;
|
||||
}
|
||||
File[] logFiles = dir.listFiles();
|
||||
if (logFiles == null || logFiles.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 计算最大偏移量
|
||||
long maxOffset = day * 24 * 60 * 60 * 1000L;
|
||||
|
||||
for (File f : logFiles) {
|
||||
long offset = System.currentTimeMillis() - f.lastModified();
|
||||
if (offset >= maxOffset) {
|
||||
if (!f.delete()) {
|
||||
log.warn("[CleanService] delete file({}) failed.", f.getName());
|
||||
}else {
|
||||
log.info("[CleanService] delete file({}) successfully.", f.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info("[CleanService] clean {} successfully, using {}.", path, stopwatch.stop());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void cleanRemote(String bucketName, int day) {
|
||||
if (day < 0) {
|
||||
log.info("[CleanService] won't clean up bucket({}) because of offset day <= 0.", bucketName);
|
||||
return;
|
||||
}
|
||||
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||
try {
|
||||
dFsService.cleanExpiredFiles(bucketName, day);
|
||||
}catch (Exception e) {
|
||||
log.warn("[CleanService] clean remote bucket({}) failed.", bucketName, e);
|
||||
}
|
||||
log.info("[CleanService] clean remote bucket({}) successfully, using {}.", bucketName, stopwatch.stop());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void cleanInstanceLog() {
|
||||
if (instanceInfoRetentionDay < 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
Date t = DateUtils.addDays(new Date(), -instanceInfoRetentionDay);
|
||||
int num = instanceInfoRepository.deleteAllByGmtModifiedBeforeAndStatusIn(t, InstanceStatus.FINISHED_STATUS);
|
||||
log.info("[CleanService] deleted {} instanceInfo records whose modify time before {}.", num, t);
|
||||
}catch (Exception e) {
|
||||
log.warn("[CleanService] clean instanceInfo failed.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void cleanWorkflowInstanceLog() {
|
||||
if (instanceInfoRetentionDay < 0) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
Date t = DateUtils.addDays(new Date(), -instanceInfoRetentionDay);
|
||||
int num = workflowInstanceInfoRepository.deleteAllByGmtModifiedBeforeAndStatusIn(t, WorkflowInstanceStatus.FINISHED_STATUS);
|
||||
log.info("[CleanService] deleted {} workflow instanceInfo records whose modify time before {}.", num, t);
|
||||
}catch (Exception e) {
|
||||
log.warn("[CleanService] clean workflow instanceInfo failed.", e);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public void cleanWorkflowNodeInfo(){
|
||||
try {
|
||||
// 清理一天前创建的,且没有工作流 ID 的节点信息
|
||||
Date t = DateUtils.addDays(new Date(), -1);
|
||||
int num = workflowNodeInfoRepository.deleteAllByWorkflowIdIsNullAndGmtCreateBefore(t);
|
||||
log.info("[CleanService] deleted {} node records whose create time before {} and workflowId is null.", num, t);
|
||||
} catch (Exception e) {
|
||||
log.warn("[CleanService] clean workflow node info failed.", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
package tech.powerjob.server.core.scheduler;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.DisposableBean;
|
||||
import org.springframework.beans.factory.InitializingBean;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/10/12
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class CoreScheduleTaskManager implements InitializingBean, DisposableBean {
|
||||
|
||||
|
||||
private final PowerScheduleService powerScheduleService;
|
||||
|
||||
private final InstanceStatusCheckService instanceStatusCheckService;
|
||||
|
||||
private final List<Thread> coreThreadContainer = new ArrayList<>();
|
||||
|
||||
|
||||
@SuppressWarnings("AlibabaAvoidManuallyCreateThread")
|
||||
@Override
|
||||
public void afterPropertiesSet() {
|
||||
// 定时调度
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleCronJob", PowerScheduleService.SCHEDULE_RATE, () -> powerScheduleService.scheduleNormalJob(TimeExpressionType.CRON)), "Thread-ScheduleCronJob"));
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleDailyTimeIntervalJob", PowerScheduleService.SCHEDULE_RATE, () -> powerScheduleService.scheduleNormalJob(TimeExpressionType.DAILY_TIME_INTERVAL)), "Thread-ScheduleDailyTimeIntervalJob"));
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleCronWorkflow", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleCronWorkflow), "Thread-ScheduleCronWorkflow"));
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleFrequentJob", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleFrequentJob), "Thread-ScheduleFrequentJob"));
|
||||
// 数据清理
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("CleanWorkerData", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::cleanData), "Thread-CleanWorkerData"));
|
||||
// 状态检查
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("CheckRunningInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkRunningInstance), "Thread-CheckRunningInstance"));
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("CheckWaitingDispatchInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWaitingDispatchInstance), "Thread-CheckWaitingDispatchInstance"));
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("CheckWaitingWorkerReceiveInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWaitingWorkerReceiveInstance), "Thread-CheckWaitingWorkerReceiveInstance"));
|
||||
coreThreadContainer.add(new Thread(new LoopRunnable("CheckWorkflowInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWorkflowInstance), "Thread-CheckWorkflowInstance"));
|
||||
|
||||
coreThreadContainer.forEach(Thread::start);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
coreThreadContainer.forEach(Thread::interrupt);
|
||||
}
|
||||
|
||||
|
||||
@RequiredArgsConstructor
|
||||
private static class LoopRunnable implements Runnable {
|
||||
|
||||
private final String taskName;
|
||||
|
||||
private final Long runningInterval;
|
||||
|
||||
private final Runnable innerRunnable;
|
||||
|
||||
@SuppressWarnings("BusyWait")
|
||||
@Override
|
||||
public void run() {
|
||||
log.info("start task : {}.", taskName);
|
||||
while (true) {
|
||||
try {
|
||||
|
||||
// 倒置顺序为 先 sleep 再执行,解决异常情况 while true 打日志的问题 https://github.com/PowerJob/PowerJob/issues/769
|
||||
Thread.sleep(runningInterval);
|
||||
|
||||
innerRunnable.run();
|
||||
} catch (InterruptedException e) {
|
||||
log.warn("[{}] task has been interrupted!", taskName, e);
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
log.error("[{}] task failed!", taskName, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,302 @@
|
||||
package tech.powerjob.server.core.scheduler;
|
||||
|
||||
import com.google.common.base.Stopwatch;
|
||||
import com.google.common.collect.Lists;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.enums.WorkflowInstanceStatus;
|
||||
import tech.powerjob.server.common.Holder;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.core.DispatchService;
|
||||
import tech.powerjob.server.core.instance.InstanceManager;
|
||||
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo;
|
||||
import tech.powerjob.server.persistence.remote.repository.*;
|
||||
import tech.powerjob.server.remote.transporter.TransportService;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 定时状态检查
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/7
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class InstanceStatusCheckService {
|
||||
|
||||
private static final int MAX_BATCH_NUM_APP = 10;
|
||||
private static final int MAX_BATCH_NUM_INSTANCE = 3000;
|
||||
private static final int MAX_BATCH_UPDATE_NUM = 500;
|
||||
private static final long DISPATCH_TIMEOUT_MS = 30000;
|
||||
private static final long RECEIVE_TIMEOUT_MS = 60000;
|
||||
private static final long RUNNING_TIMEOUT_MS = 60000;
|
||||
private static final long WORKFLOW_WAITING_TIMEOUT_MS = 60000;
|
||||
|
||||
public static final long CHECK_INTERVAL = 10000;
|
||||
|
||||
private final TransportService transportService;
|
||||
|
||||
private final DispatchService dispatchService;
|
||||
|
||||
private final InstanceManager instanceManager;
|
||||
|
||||
private final WorkflowInstanceManager workflowInstanceManager;
|
||||
|
||||
private final AppInfoRepository appInfoRepository;
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
|
||||
|
||||
public void checkWorkflowInstance() {
|
||||
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||
// 查询 DB 获取该 Server 需要负责的 AppGroup
|
||||
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[InstanceStatusChecker] current server has no app's job to check");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
checkWorkflowInstance(allAppIds);
|
||||
} catch (Exception e) {
|
||||
log.error("[InstanceStatusChecker] WorkflowInstance status check failed.", e);
|
||||
}
|
||||
log.info("[InstanceStatusChecker] WorkflowInstance status check using {}.", stopwatch.stop());
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查等待派发的实例
|
||||
* WAITING_DISPATCH 超时:写入时间轮但未调度前 server down
|
||||
*/
|
||||
public void checkWaitingDispatchInstance() {
|
||||
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||
// 查询 DB 获取该 Server 需要负责的 AppGroup
|
||||
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[InstanceStatusChecker] current server has no app's job to check");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// 检查等待 WAITING_DISPATCH 状态的任务
|
||||
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleWaitingDispatchInstance);
|
||||
} catch (Exception e) {
|
||||
log.error("[InstanceStatusChecker] WaitingDispatchInstance status check failed.", e);
|
||||
}
|
||||
log.info("[InstanceStatusChecker] WaitingDispatchInstance status check using {}.", stopwatch.stop());
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查等待 worker 接收的实例
|
||||
* WAITING_WORKER_RECEIVE 超时:由于网络错误导致 worker 未接受成功
|
||||
*/
|
||||
public void checkWaitingWorkerReceiveInstance() {
|
||||
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||
// 查询 DB 获取该 Server 需要负责的 AppGroup
|
||||
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[InstanceStatusChecker] current server has no app's job to check");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// 检查 WAITING_WORKER_RECEIVE 状态的任务
|
||||
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleWaitingWorkerReceiveInstance);
|
||||
} catch (Exception e) {
|
||||
log.error("[InstanceStatusChecker] WaitingWorkerReceiveInstance status check failed.", e);
|
||||
}
|
||||
log.info("[InstanceStatusChecker] WaitingWorkerReceiveInstance status check using {}.", stopwatch.stop());
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查运行中的实例
|
||||
* RUNNING 超时:TaskTracker down,断开与 server 的心跳连接
|
||||
*/
|
||||
public void checkRunningInstance() {
|
||||
Stopwatch stopwatch = Stopwatch.createStarted();
|
||||
// 查询 DB 获取该 Server 需要负责的 AppGroup
|
||||
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[InstanceStatusChecker] current server has no app's job to check");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
// 检查 RUNNING 状态的任务(一定时间没收到 TaskTracker 的状态报告,视为失败)
|
||||
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleRunningInstance);
|
||||
} catch (Exception e) {
|
||||
log.error("[InstanceStatusChecker] RunningInstance status check failed.", e);
|
||||
}
|
||||
log.info("[InstanceStatusChecker] RunningInstance status check using {}.", stopwatch.stop());
|
||||
}
|
||||
|
||||
private void handleWaitingDispatchInstance(List<Long> appIds) {
|
||||
|
||||
// 存在移除操作,需要重新创建集合,否则会导致外层抛出 NoSuchElementException: null
|
||||
List<Long> partAppIds = Lists.newArrayList(appIds);
|
||||
|
||||
// 1. 检查等待 WAITING_DISPATCH 状态的任务
|
||||
long threshold = System.currentTimeMillis() - DISPATCH_TIMEOUT_MS;
|
||||
List<InstanceInfoDO> waitingDispatchInstances = instanceInfoRepository.findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
|
||||
while (!waitingDispatchInstances.isEmpty()) {
|
||||
List<Long> overloadAppIdList = new ArrayList<>();
|
||||
long startTime = System.currentTimeMillis();
|
||||
// 按照 appId 分组处理,方便处理超载的逻辑
|
||||
Map<Long, List<InstanceInfoDO>> waitingDispatchInstancesMap = waitingDispatchInstances.stream().collect(Collectors.groupingBy(InstanceInfoDO::getAppId));
|
||||
for (Map.Entry<Long, List<InstanceInfoDO>> entry : waitingDispatchInstancesMap.entrySet()) {
|
||||
final Long currentAppId = entry.getKey();
|
||||
final List<InstanceInfoDO> currentAppWaitingDispatchInstances = entry.getValue();
|
||||
// collect job id
|
||||
Set<Long> jobIds = currentAppWaitingDispatchInstances.stream().map(InstanceInfoDO::getJobId).collect(Collectors.toSet());
|
||||
// query job info and map
|
||||
Map<Long, JobInfoDO> jobInfoMap = jobInfoRepository.findByIdIn(jobIds).stream().collect(Collectors.toMap(JobInfoDO::getId, e -> e));
|
||||
log.warn("[InstanceStatusChecker] find some instance in app({}) which is not triggered as expected: {}", currentAppId, currentAppWaitingDispatchInstances.stream().map(InstanceInfoDO::getInstanceId).collect(Collectors.toList()));
|
||||
final Holder<Boolean> overloadFlag = new Holder<>(false);
|
||||
// 先这么简单处理没问题,毕竟只有这一个地方用了 parallelStream
|
||||
currentAppWaitingDispatchInstances.parallelStream().forEach(instance -> {
|
||||
if (overloadFlag.get()) {
|
||||
// 直接忽略
|
||||
return;
|
||||
}
|
||||
Optional<JobInfoDO> jobInfoOpt = Optional.ofNullable(jobInfoMap.get(instance.getJobId()));
|
||||
if (jobInfoOpt.isPresent()) {
|
||||
// 处理等待派发的任务没有必要再重置一次状态,减少 io 次数
|
||||
dispatchService.dispatch(jobInfoOpt.get(), instance.getInstanceId(), Optional.of(instance), Optional.of(overloadFlag));
|
||||
} else {
|
||||
log.warn("[InstanceStatusChecker] can't find job by jobId[{}], so redispatch failed, failed instance: {}", instance.getJobId(), instance);
|
||||
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
|
||||
opt.ifPresent(instanceInfoDO -> updateFailedInstance(instanceInfoDO, SystemInstanceResult.CAN_NOT_FIND_JOB_INFO));
|
||||
}
|
||||
});
|
||||
threshold = System.currentTimeMillis() - DISPATCH_TIMEOUT_MS;
|
||||
if (overloadFlag.get()) {
|
||||
overloadAppIdList.add(currentAppId);
|
||||
}
|
||||
}
|
||||
log.info("[InstanceStatusChecker] process {} task,use {} ms", waitingDispatchInstances.size(), System.currentTimeMillis() - startTime);
|
||||
if (!overloadAppIdList.isEmpty()) {
|
||||
log.warn("[InstanceStatusChecker] app[{}] is overload, so skip check waiting dispatch instance", overloadAppIdList);
|
||||
partAppIds.removeAll(overloadAppIdList);
|
||||
}
|
||||
if (partAppIds.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
waitingDispatchInstances = instanceInfoRepository.findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void handleWaitingWorkerReceiveInstance(List<Long> partAppIds) {
|
||||
// 2. 检查 WAITING_WORKER_RECEIVE 状态的任务
|
||||
long threshold = System.currentTimeMillis() - RECEIVE_TIMEOUT_MS;
|
||||
List<BriefInstanceInfo> waitingWorkerReceiveInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
|
||||
while (!waitingWorkerReceiveInstances.isEmpty()) {
|
||||
log.warn("[InstanceStatusChecker] find some instance didn't receive any reply from worker, try to redispatch: {}", waitingWorkerReceiveInstances.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()));
|
||||
final List<List<BriefInstanceInfo>> partitions = Lists.partition(waitingWorkerReceiveInstances, MAX_BATCH_UPDATE_NUM);
|
||||
for (List<BriefInstanceInfo> partition : partitions) {
|
||||
dispatchService.redispatchBatchAsyncLockFree(partition.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()), InstanceStatus.WAITING_WORKER_RECEIVE.getV());
|
||||
}
|
||||
// 重新查询
|
||||
threshold = System.currentTimeMillis() - RECEIVE_TIMEOUT_MS;
|
||||
waitingWorkerReceiveInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
|
||||
}
|
||||
}
|
||||
|
||||
private void handleRunningInstance(List<Long> partAppIds) {
|
||||
// 3. 检查 RUNNING 状态的任务(一定时间没收到 TaskTracker 的状态报告,视为失败)
|
||||
long threshold = System.currentTimeMillis() - RUNNING_TIMEOUT_MS;
|
||||
List<BriefInstanceInfo> failedInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold), PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
|
||||
while (!failedInstances.isEmpty()) {
|
||||
// collect job id
|
||||
Set<Long> jobIds = failedInstances.stream().map(BriefInstanceInfo::getJobId).collect(Collectors.toSet());
|
||||
// query job info and map
|
||||
Map<Long, JobInfoDO> jobInfoMap = jobInfoRepository.findByIdIn(jobIds).stream().collect(Collectors.toMap(JobInfoDO::getId, e -> e));
|
||||
log.warn("[InstanceStatusCheckService] find some instances have not received status report for a long time : {}", failedInstances.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()));
|
||||
failedInstances.forEach(instance -> {
|
||||
Optional<JobInfoDO> jobInfoOpt = Optional.ofNullable(jobInfoMap.get(instance.getJobId()));
|
||||
if (!jobInfoOpt.isPresent()) {
|
||||
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
|
||||
opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT));
|
||||
return;
|
||||
}
|
||||
TimeExpressionType timeExpressionType = TimeExpressionType.of(jobInfoOpt.get().getTimeExpressionType());
|
||||
SwitchableStatus switchableStatus = SwitchableStatus.of(jobInfoOpt.get().getStatus());
|
||||
// 如果任务已关闭,则不进行重试,将任务置为失败即可;秒级任务也直接置为失败,由派发器重新调度
|
||||
if (switchableStatus != SwitchableStatus.ENABLE || TimeExpressionType.FREQUENT_TYPES.contains(timeExpressionType.getV())) {
|
||||
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
|
||||
opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT));
|
||||
return;
|
||||
}
|
||||
// CRON 和 API一样,失败次数 + 1,根据重试配置进行重试
|
||||
if (instance.getRunningTimes() < jobInfoOpt.get().getInstanceRetryNum()) {
|
||||
dispatchService.redispatchAsync(instance.getInstanceId(), InstanceStatus.RUNNING.getV());
|
||||
} else {
|
||||
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
|
||||
opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT));
|
||||
}
|
||||
});
|
||||
threshold = System.currentTimeMillis() - RUNNING_TIMEOUT_MS;
|
||||
failedInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold), PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 定期检查工作流实例状态
|
||||
* 此处仅检查并重试长时间处于 WAITING 状态的工作流实例,工作流的其他可靠性由 Instance 支撑,即子任务失败会反馈会 WorkflowInstance
|
||||
*
|
||||
* @param allAppIds 本系统所承担的所有 appIds
|
||||
*/
|
||||
private void checkWorkflowInstance(List<Long> allAppIds) {
|
||||
|
||||
// 重试长时间处于 WAITING 状态的工作流实例
|
||||
long threshold = System.currentTimeMillis() - WORKFLOW_WAITING_TIMEOUT_MS;
|
||||
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(partAppIds -> {
|
||||
List<WorkflowInstanceInfoDO> waitingWfInstanceList = workflowInstanceInfoRepository.findByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, WorkflowInstanceStatus.WAITING.getV(), threshold);
|
||||
if (!CollectionUtils.isEmpty(waitingWfInstanceList)) {
|
||||
|
||||
List<Long> wfInstanceIds = waitingWfInstanceList.stream().map(WorkflowInstanceInfoDO::getWfInstanceId).collect(Collectors.toList());
|
||||
log.warn("[WorkflowInstanceChecker] wfInstance({}) is not started as expected, oms try to restart these workflowInstance.", wfInstanceIds);
|
||||
|
||||
waitingWfInstanceList.forEach(wfInstance -> {
|
||||
Optional<WorkflowInfoDO> workflowOpt = workflowInfoRepository.findById(wfInstance.getWorkflowId());
|
||||
workflowOpt.ifPresent(workflowInfo -> {
|
||||
workflowInstanceManager.start(workflowInfo, wfInstance.getWfInstanceId());
|
||||
log.info("[Workflow-{}|{}] restart workflowInstance successfully~", workflowInfo.getId(), wfInstance.getWfInstanceId());
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理失败的任务实例
|
||||
*/
|
||||
private void updateFailedInstance(InstanceInfoDO instance, String result) {
|
||||
|
||||
log.warn("[InstanceStatusChecker] instance[{}] failed due to {}, instanceInfo: {}", instance.getInstanceId(), result, instance);
|
||||
|
||||
instance.setStatus(InstanceStatus.FAILED.getV());
|
||||
instance.setFinishedTime(System.currentTimeMillis());
|
||||
instance.setGmtModified(new Date());
|
||||
instance.setResult(result);
|
||||
instanceInfoRepository.saveAndFlush(instance);
|
||||
|
||||
instanceManager.processFinishedInstance(instance.getInstanceId(), instance.getWfInstanceId(), InstanceStatus.FAILED, result);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,328 @@
|
||||
package tech.powerjob.server.core.scheduler;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.model.LifeCycle;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.common.request.http.RunJobRequest;
|
||||
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
|
||||
import tech.powerjob.server.core.DispatchService;
|
||||
import tech.powerjob.server.core.instance.InstanceService;
|
||||
import tech.powerjob.server.core.service.JobService;
|
||||
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.AppInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
import tech.powerjob.server.remote.transporter.TransportService;
|
||||
import tech.powerjob.server.remote.worker.WorkerClusterManagerService;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 任务调度执行服务(调度 CRON 表达式的任务进行执行)
|
||||
* 原:FIX_RATE和FIX_DELAY任务不需要被调度,创建后直接被派发到Worker执行,只需要失败重试机制(在InstanceStatusCheckService中完成)
|
||||
* 先:那样写不太优雅,东一坨代码西一坨代码的,还是牺牲点性能统一调度算了 (优雅,永不过时~ BY:青钢影)
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/5
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PowerScheduleService {
|
||||
|
||||
/**
|
||||
* 每次并发调度的应用数量
|
||||
*/
|
||||
private static final int MAX_APP_NUM = 10;
|
||||
|
||||
private final TransportService transportService;
|
||||
private final DispatchService dispatchService;
|
||||
|
||||
private final InstanceService instanceService;
|
||||
|
||||
private final WorkflowInstanceManager workflowInstanceManager;
|
||||
|
||||
private final AppInfoRepository appInfoRepository;
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final JobService jobService;
|
||||
|
||||
private final TimingStrategyService timingStrategyService;
|
||||
|
||||
public static final long SCHEDULE_RATE = 15000;
|
||||
|
||||
|
||||
public void scheduleNormalJob(TimeExpressionType timeExpressionType) {
|
||||
long start = System.currentTimeMillis();
|
||||
// 调度 CRON 表达式 JOB
|
||||
try {
|
||||
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[NormalScheduler] current server has no app's job to schedule.");
|
||||
return;
|
||||
}
|
||||
scheduleNormalJob0(timeExpressionType, allAppIds);
|
||||
} catch (Exception e) {
|
||||
log.error("[NormalScheduler] schedule cron job failed.", e);
|
||||
}
|
||||
long cost = System.currentTimeMillis() - start;
|
||||
log.info("[NormalScheduler] {} job schedule use {} ms.", timeExpressionType, cost);
|
||||
if (cost > SCHEDULE_RATE) {
|
||||
log.warn("[NormalScheduler] The database query is using too much time({}ms), please check if the database load is too high!", cost);
|
||||
}
|
||||
}
|
||||
|
||||
public void scheduleCronWorkflow() {
|
||||
long start = System.currentTimeMillis();
|
||||
// 调度 CRON 表达式 WORKFLOW
|
||||
try {
|
||||
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[CronWorkflowSchedule] current server has no app's workflow to schedule.");
|
||||
return;
|
||||
}
|
||||
scheduleWorkflowCore(allAppIds);
|
||||
} catch (Exception e) {
|
||||
log.error("[CronWorkflowSchedule] schedule cron workflow failed.", e);
|
||||
}
|
||||
long cost = System.currentTimeMillis() - start;
|
||||
log.info("[CronWorkflowSchedule] cron workflow schedule use {} ms.", cost);
|
||||
if (cost > SCHEDULE_RATE) {
|
||||
log.warn("[CronWorkflowSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void scheduleFrequentJob() {
|
||||
long start = System.currentTimeMillis();
|
||||
// 调度 FIX_RATE/FIX_DELAY 表达式 JOB
|
||||
try {
|
||||
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (CollectionUtils.isEmpty(allAppIds)) {
|
||||
log.info("[FrequentJobSchedule] current server has no app's job to schedule.");
|
||||
return;
|
||||
}
|
||||
scheduleFrequentJobCore(allAppIds);
|
||||
} catch (Exception e) {
|
||||
log.error("[FrequentJobSchedule] schedule frequent job failed.", e);
|
||||
}
|
||||
long cost = System.currentTimeMillis() - start;
|
||||
log.info("[FrequentJobSchedule] frequent job schedule use {} ms.", cost);
|
||||
if (cost > SCHEDULE_RATE) {
|
||||
log.warn("[FrequentJobSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void cleanData() {
|
||||
try {
|
||||
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
|
||||
if (allAppIds.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
WorkerClusterManagerService.clean(allAppIds);
|
||||
} catch (Exception e) {
|
||||
log.error("[CleanData] clean data failed.", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 调度普通服务端计算表达式类型(CRON、DAILY_TIME_INTERVAL)的任务
|
||||
* @param timeExpressionType 表达式类型
|
||||
* @param appIds appIds
|
||||
*/
|
||||
private void scheduleNormalJob0(TimeExpressionType timeExpressionType, List<Long> appIds) {
|
||||
|
||||
long nowTime = System.currentTimeMillis();
|
||||
long timeThreshold = nowTime + 2 * SCHEDULE_RATE;
|
||||
Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> {
|
||||
|
||||
try {
|
||||
|
||||
// 查询条件:任务开启 + 使用CRON表达调度时间 + 指定appId + 即将需要调度执行
|
||||
List<JobInfoDO> jobInfos = jobInfoRepository.findByAppIdInAndStatusAndTimeExpressionTypeAndNextTriggerTimeLessThanEqual(partAppIds, SwitchableStatus.ENABLE.getV(), timeExpressionType.getV(), timeThreshold);
|
||||
|
||||
if (CollectionUtils.isEmpty(jobInfos)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 1. 批量写日志表
|
||||
Map<Long, Long> jobId2InstanceId = Maps.newHashMap();
|
||||
log.info("[NormalScheduler] These {} jobs will be scheduled: {}.", timeExpressionType.name(), jobInfos);
|
||||
|
||||
jobInfos.forEach(jobInfo -> {
|
||||
Long instanceId = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(), null, null, jobInfo.getNextTriggerTime(), null, null).getInstanceId();
|
||||
jobId2InstanceId.put(jobInfo.getId(), instanceId);
|
||||
});
|
||||
instanceInfoRepository.flush();
|
||||
|
||||
// 2. 推入时间轮中等待调度执行
|
||||
jobInfos.forEach(jobInfoDO -> {
|
||||
|
||||
Long instanceId = jobId2InstanceId.get(jobInfoDO.getId());
|
||||
|
||||
long targetTriggerTime = jobInfoDO.getNextTriggerTime();
|
||||
long delay = 0;
|
||||
if (targetTriggerTime < nowTime) {
|
||||
log.warn("[Job-{}] schedule delay, expect: {}, current: {}", jobInfoDO.getId(), targetTriggerTime, System.currentTimeMillis());
|
||||
} else {
|
||||
delay = targetTriggerTime - nowTime;
|
||||
}
|
||||
|
||||
InstanceTimeWheelService.schedule(instanceId, delay, () -> dispatchService.dispatch(jobInfoDO, instanceId, Optional.empty(), Optional.empty()));
|
||||
});
|
||||
|
||||
// 3. 计算下一次调度时间(忽略5S内的重复执行,即CRON模式下最小的连续执行间隔为 SCHEDULE_RATE ms)
|
||||
jobInfos.forEach(jobInfoDO -> {
|
||||
try {
|
||||
refreshJob(timeExpressionType, jobInfoDO);
|
||||
} catch (Exception e) {
|
||||
log.error("[Job-{}] refresh job failed.", jobInfoDO.getId(), e);
|
||||
}
|
||||
});
|
||||
jobInfoRepository.flush();
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[NormalScheduler] schedule {} job failed.", timeExpressionType.name(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void scheduleWorkflowCore(List<Long> appIds) {
|
||||
|
||||
long nowTime = System.currentTimeMillis();
|
||||
long timeThreshold = nowTime + 2 * SCHEDULE_RATE;
|
||||
Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> {
|
||||
List<WorkflowInfoDO> wfInfos = workflowInfoRepository.findByAppIdInAndStatusAndTimeExpressionTypeAndNextTriggerTimeLessThanEqual(partAppIds, SwitchableStatus.ENABLE.getV(), TimeExpressionType.CRON.getV(), timeThreshold);
|
||||
|
||||
if (CollectionUtils.isEmpty(wfInfos)) {
|
||||
return;
|
||||
}
|
||||
|
||||
wfInfos.forEach(wfInfo -> {
|
||||
|
||||
// 1. 先生成调度记录,防止不调度的情况发生
|
||||
Long wfInstanceId = workflowInstanceManager.create(wfInfo, null, wfInfo.getNextTriggerTime(), null);
|
||||
|
||||
// 2. 推入时间轮,准备调度执行
|
||||
long delay = wfInfo.getNextTriggerTime() - System.currentTimeMillis();
|
||||
if (delay < 0) {
|
||||
log.warn("[Workflow-{}] workflow schedule delay, expect:{}, actual: {}", wfInfo.getId(), wfInfo.getNextTriggerTime(), System.currentTimeMillis());
|
||||
delay = 0;
|
||||
}
|
||||
InstanceTimeWheelService.schedule(wfInstanceId, delay, () -> workflowInstanceManager.start(wfInfo, wfInstanceId));
|
||||
|
||||
// 3. 重新计算下一次调度时间并更新
|
||||
try {
|
||||
refreshWorkflow(wfInfo);
|
||||
} catch (Exception e) {
|
||||
log.error("[Workflow-{}] refresh workflow failed.", wfInfo.getId(), e);
|
||||
}
|
||||
});
|
||||
workflowInfoRepository.flush();
|
||||
});
|
||||
}
|
||||
|
||||
private void scheduleFrequentJobCore(List<Long> appIds) {
|
||||
|
||||
Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> {
|
||||
try {
|
||||
// 查询所有的秒级任务(只包含ID)
|
||||
List<Long> jobIds = jobInfoRepository.findByAppIdInAndStatusAndTimeExpressionTypeIn(partAppIds, SwitchableStatus.ENABLE.getV(), TimeExpressionType.FREQUENT_TYPES);
|
||||
if (CollectionUtils.isEmpty(jobIds)) {
|
||||
return;
|
||||
}
|
||||
// 查询日志记录表中是否存在相关的任务
|
||||
List<Long> runningJobIdList = instanceInfoRepository.findByJobIdInAndStatusIn(jobIds, InstanceStatus.GENERALIZED_RUNNING_STATUS);
|
||||
Set<Long> runningJobIdSet = Sets.newHashSet(runningJobIdList);
|
||||
|
||||
List<Long> notRunningJobIds = Lists.newLinkedList();
|
||||
jobIds.forEach(jobId -> {
|
||||
if (!runningJobIdSet.contains(jobId)) {
|
||||
notRunningJobIds.add(jobId);
|
||||
}
|
||||
});
|
||||
|
||||
if (CollectionUtils.isEmpty(notRunningJobIds)) {
|
||||
return;
|
||||
}
|
||||
|
||||
notRunningJobIds.forEach(jobId -> {
|
||||
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(jobId);
|
||||
jobInfoOpt.ifPresent(jobInfoDO -> {
|
||||
LifeCycle lifeCycle = LifeCycle.parse(jobInfoDO.getLifecycle());
|
||||
// 生命周期已经结束
|
||||
if (lifeCycle.getEnd() != null && lifeCycle.getEnd() < System.currentTimeMillis()) {
|
||||
jobInfoDO.setStatus(SwitchableStatus.DISABLE.getV());
|
||||
jobInfoDO.setGmtModified(new Date());
|
||||
jobInfoRepository.saveAndFlush(jobInfoDO);
|
||||
log.info("[FrequentScheduler] disable frequent job,id:{}.", jobInfoDO.getId());
|
||||
} else if (lifeCycle.getStart() == null || lifeCycle.getStart() < System.currentTimeMillis() + SCHEDULE_RATE * 2) {
|
||||
log.info("[FrequentScheduler] schedule frequent job,id:{}.", jobInfoDO.getId());
|
||||
RunJobRequest runJobRequest = new RunJobRequest()
|
||||
.setAppId(jobInfoDO.getAppId()).setJobId(jobId).setDelay(Optional.ofNullable(lifeCycle.getStart()).orElse(0L) - System.currentTimeMillis());
|
||||
jobService.runJob(runJobRequest.getAppId(), runJobRequest);
|
||||
}
|
||||
});
|
||||
});
|
||||
} catch (Exception e) {
|
||||
log.error("[FrequentScheduler] schedule frequent job failed.", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void refreshJob(TimeExpressionType timeExpressionType, JobInfoDO jobInfo) {
|
||||
LifeCycle lifeCycle = LifeCycle.parse(jobInfo.getLifecycle());
|
||||
Long nextTriggerTime = timingStrategyService.calculateNextTriggerTime(jobInfo.getNextTriggerTime(), timeExpressionType, jobInfo.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
|
||||
|
||||
JobInfoDO updatedJobInfo = new JobInfoDO();
|
||||
BeanUtils.copyProperties(jobInfo, updatedJobInfo);
|
||||
|
||||
if (nextTriggerTime == null) {
|
||||
log.warn("[Job-{}] this job won't be scheduled anymore, system will set the status to DISABLE!", jobInfo.getId());
|
||||
updatedJobInfo.setStatus(SwitchableStatus.DISABLE.getV());
|
||||
} else {
|
||||
updatedJobInfo.setNextTriggerTime(nextTriggerTime);
|
||||
}
|
||||
updatedJobInfo.setGmtModified(new Date());
|
||||
|
||||
jobInfoRepository.save(updatedJobInfo);
|
||||
}
|
||||
|
||||
private void refreshWorkflow(WorkflowInfoDO wfInfo) {
|
||||
LifeCycle lifeCycle = LifeCycle.parse(wfInfo.getLifecycle());
|
||||
Long nextTriggerTime = timingStrategyService.calculateNextTriggerTime(wfInfo.getNextTriggerTime(), TimeExpressionType.CRON, wfInfo.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
|
||||
|
||||
WorkflowInfoDO updateEntity = new WorkflowInfoDO();
|
||||
BeanUtils.copyProperties(wfInfo, updateEntity);
|
||||
|
||||
if (nextTriggerTime == null) {
|
||||
log.warn("[Workflow-{}] this workflow won't be scheduled anymore, system will set the status to DISABLE!", wfInfo.getId());
|
||||
updateEntity.setStatus(SwitchableStatus.DISABLE.getV());
|
||||
} else {
|
||||
updateEntity.setNextTriggerTime(nextTriggerTime);
|
||||
}
|
||||
|
||||
updateEntity.setGmtModified(new Date());
|
||||
workflowInfoRepository.save(updateEntity);
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,122 @@
|
||||
package tech.powerjob.server.core.scheduler;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.time.DateFormatUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.OmsConstant;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.TimingStrategyHandler;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/3/21
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class TimingStrategyService {
|
||||
|
||||
private static final int NEXT_N_TIMES = 5;
|
||||
|
||||
private static final List<String> TIPS = Collections.singletonList("It is valid, but has not trigger time list!");
|
||||
|
||||
|
||||
private final Map<TimeExpressionType, TimingStrategyHandler> strategyContainer;
|
||||
|
||||
public TimingStrategyService(List<TimingStrategyHandler> timingStrategyHandlers) {
|
||||
// init
|
||||
strategyContainer = new EnumMap<>(TimeExpressionType.class);
|
||||
for (TimingStrategyHandler timingStrategyHandler : timingStrategyHandlers) {
|
||||
strategyContainer.put(timingStrategyHandler.supportType(), timingStrategyHandler);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算接下来几次的调度时间
|
||||
*
|
||||
* @param timeExpressionType 定时表达式类型
|
||||
* @param timeExpression 表达式
|
||||
* @param startTime 起始时间(include)
|
||||
* @param endTime 结束时间(include)
|
||||
* @return 调度时间列表
|
||||
*/
|
||||
public List<String> calculateNextTriggerTimes(TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
|
||||
|
||||
TimingStrategyHandler timingStrategyHandler = getHandler(timeExpressionType);
|
||||
List<Long> triggerTimeList = new ArrayList<>(NEXT_N_TIMES);
|
||||
Long nextTriggerTime = System.currentTimeMillis();
|
||||
do {
|
||||
nextTriggerTime = timingStrategyHandler.calculateNextTriggerTime(nextTriggerTime, timeExpression, startTime, endTime);
|
||||
if (nextTriggerTime == null) {
|
||||
break;
|
||||
}
|
||||
triggerTimeList.add(nextTriggerTime);
|
||||
} while (triggerTimeList.size() < NEXT_N_TIMES);
|
||||
|
||||
if (triggerTimeList.isEmpty()) {
|
||||
return TIPS;
|
||||
}
|
||||
return triggerTimeList.stream().map(t -> DateFormatUtils.format(t, OmsConstant.TIME_PATTERN)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算下次的调度时间
|
||||
*
|
||||
* @param preTriggerTime 上次触发时间(nullable)
|
||||
* @param timeExpressionType 定时表达式类型
|
||||
* @param timeExpression 表达式
|
||||
* @param startTime 起始时间(include)
|
||||
* @param endTime 结束时间(include)
|
||||
* @return 下次的调度时间
|
||||
*/
|
||||
public Long calculateNextTriggerTime(Long preTriggerTime, TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
|
||||
if (preTriggerTime == null || preTriggerTime < System.currentTimeMillis()) {
|
||||
preTriggerTime = System.currentTimeMillis();
|
||||
}
|
||||
return getHandler(timeExpressionType).calculateNextTriggerTime(preTriggerTime, timeExpression, startTime, endTime);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 计算下次的调度时间并检查校验规则
|
||||
*
|
||||
* @param timeExpressionType 定时表达式类型
|
||||
* @param timeExpression 表达式
|
||||
* @param startTime 起始时间(include)
|
||||
* @param endTime 结束时间(include)
|
||||
* @return 下次的调度时间
|
||||
*/
|
||||
public Long calculateNextTriggerTimeWithInspection( TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
|
||||
Long nextTriggerTime = calculateNextTriggerTime(null, timeExpressionType, timeExpression, startTime, endTime);
|
||||
if (TimeExpressionType.INSPECT_TYPES.contains(timeExpressionType.getV()) && nextTriggerTime == null) {
|
||||
throw new PowerJobException("time expression is out of date: " + timeExpression);
|
||||
}
|
||||
return nextTriggerTime;
|
||||
}
|
||||
|
||||
|
||||
public void validate(TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
|
||||
if (endTime != null) {
|
||||
if (endTime <= System.currentTimeMillis()) {
|
||||
throw new PowerJobException("lifecycle is out of date!");
|
||||
}
|
||||
if (startTime != null && startTime > endTime) {
|
||||
throw new PowerJobException("lifecycle is invalid! start time must earlier then end time.");
|
||||
}
|
||||
}
|
||||
getHandler(timeExpressionType).validate(timeExpression);
|
||||
}
|
||||
|
||||
|
||||
private TimingStrategyHandler getHandler(TimeExpressionType timeExpressionType) {
|
||||
TimingStrategyHandler timingStrategyHandler = strategyContainer.get(timeExpressionType);
|
||||
if (timingStrategyHandler == null) {
|
||||
throw new PowerJobException("No matching TimingStrategyHandler for this TimeExpressionType:" + timeExpressionType);
|
||||
}
|
||||
return timingStrategyHandler;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,19 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary;
|
||||
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/3/22
|
||||
*/
|
||||
public abstract class AbstractTimingStrategyHandler implements TimingStrategyHandler {
|
||||
@Override
|
||||
public void validate(String timeExpression) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
|
||||
// do nothing
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,238 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.TimeZone;
|
||||
|
||||
/**
|
||||
* Represents a time in hour, minute and second of any given day.
|
||||
*
|
||||
* <p>The hour is in 24-hour convention, meaning values are from 0 to 23.</p>
|
||||
* <a href="https://github.com/quartz-scheduler/quartz">PowerJob learn from quartz</a>
|
||||
*
|
||||
* @since 2.0.3
|
||||
*
|
||||
* @author James House
|
||||
* @author Zemian Deng <saltnlight5@gmail.com>
|
||||
*/
|
||||
public class TimeOfDay implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 2964774315889061771L;
|
||||
|
||||
private final int hour;
|
||||
private final int minute;
|
||||
private final int second;
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay instance for the given hour, minute and second.
|
||||
*
|
||||
* @param hour The hour of day, between 0 and 23.
|
||||
* @param minute The minute of the hour, between 0 and 59.
|
||||
* @param second The second of the minute, between 0 and 59.
|
||||
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
|
||||
*/
|
||||
public TimeOfDay(int hour, int minute, int second) {
|
||||
this.hour = hour;
|
||||
this.minute = minute;
|
||||
this.second = second;
|
||||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay instance for the given hour and minute (at the zero second of the minute).
|
||||
*
|
||||
* @param hour The hour of day, between 0 and 23.
|
||||
* @param minute The minute of the hour, between 0 and 59.
|
||||
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
|
||||
*/
|
||||
public TimeOfDay(int hour, int minute) {
|
||||
this.hour = hour;
|
||||
this.minute = minute;
|
||||
this.second = 0;
|
||||
validate();
|
||||
}
|
||||
|
||||
private void validate() {
|
||||
if(hour < 0 || hour > 23)
|
||||
throw new IllegalArgumentException("Hour must be from 0 to 23");
|
||||
if(minute < 0 || minute > 59)
|
||||
throw new IllegalArgumentException("Minute must be from 0 to 59");
|
||||
if(second < 0 || second > 59)
|
||||
throw new IllegalArgumentException("Second must be from 0 to 59");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay instance for the given hour, minute and second.
|
||||
*
|
||||
* @param hour The hour of day, between 0 and 23.
|
||||
* @param minute The minute of the hour, between 0 and 59.
|
||||
* @param second The second of the minute, between 0 and 59.
|
||||
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
|
||||
*/
|
||||
public static TimeOfDay hourMinuteAndSecondOfDay(int hour, int minute, int second) {
|
||||
return new TimeOfDay(hour, minute, second);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay instance for the given hour and minute (at the zero second of the minute).
|
||||
*
|
||||
* @param hour The hour of day, between 0 and 23.
|
||||
* @param minute The minute of the hour, between 0 and 59.
|
||||
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
|
||||
*/
|
||||
public static TimeOfDay hourAndMinuteOfDay(int hour, int minute) {
|
||||
return new TimeOfDay(hour, minute);
|
||||
}
|
||||
|
||||
/**
|
||||
* The hour of the day (between 0 and 23).
|
||||
*
|
||||
* @return The hour of the day (between 0 and 23).
|
||||
*/
|
||||
public int getHour() {
|
||||
return hour;
|
||||
}
|
||||
|
||||
/**
|
||||
* The minute of the hour.
|
||||
*
|
||||
* @return The minute of the hour (between 0 and 59).
|
||||
*/
|
||||
public int getMinute() {
|
||||
return minute;
|
||||
}
|
||||
|
||||
/**
|
||||
* The second of the minute.
|
||||
*
|
||||
* @return The second of the minute (between 0 and 59).
|
||||
*/
|
||||
public int getSecond() {
|
||||
return second;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine with this time of day is before the given time of day.
|
||||
*
|
||||
* @return true this time of day is before the given time of day.
|
||||
*/
|
||||
public boolean before(TimeOfDay timeOfDay) {
|
||||
|
||||
if(timeOfDay.hour > hour)
|
||||
return true;
|
||||
if(timeOfDay.hour < hour)
|
||||
return false;
|
||||
|
||||
if(timeOfDay.minute > minute)
|
||||
return true;
|
||||
if(timeOfDay.minute < minute)
|
||||
return false;
|
||||
|
||||
if(timeOfDay.second > second)
|
||||
return true;
|
||||
if(timeOfDay.second < second)
|
||||
return false;
|
||||
|
||||
return false; // must be equal...
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if(!(obj instanceof TimeOfDay))
|
||||
return false;
|
||||
|
||||
TimeOfDay other = (TimeOfDay)obj;
|
||||
|
||||
return (other.hour == hour && other.minute == minute && other.second == second);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (hour + 1) ^ (minute + 1) ^ (second + 1);
|
||||
}
|
||||
|
||||
/** Return a date with time of day reset to this object values. The millisecond value will be zero. */
|
||||
public Date getTimeOfDayForDate(Date dateTime) {
|
||||
if (dateTime == null)
|
||||
return null;
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.setTime(dateTime);
|
||||
cal.set(Calendar.HOUR_OF_DAY, hour);
|
||||
cal.set(Calendar.MINUTE, minute);
|
||||
cal.set(Calendar.SECOND, second);
|
||||
cal.clear(Calendar.MILLISECOND);
|
||||
return cal.getTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay from the given date, in the system default TimeZone.
|
||||
*
|
||||
* @param dateTime The java.util.Date from which to extract Hour, Minute and Second.
|
||||
*/
|
||||
public static TimeOfDay hourAndMinuteAndSecondFromDate(Date dateTime) {
|
||||
return hourAndMinuteAndSecondFromDate(dateTime, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay from the given date, in the given TimeZone.
|
||||
*
|
||||
* @param dateTime The java.util.Date from which to extract Hour, Minute and Second.
|
||||
* @param tz The TimeZone from which relate Hour, Minute and Second for the given date. If null, system default
|
||||
* TimeZone will be used.
|
||||
*/
|
||||
public static TimeOfDay hourAndMinuteAndSecondFromDate(Date dateTime, TimeZone tz) {
|
||||
if (dateTime == null)
|
||||
return null;
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.setTime(dateTime);
|
||||
if(tz != null)
|
||||
cal.setTimeZone(tz);
|
||||
|
||||
return new TimeOfDay(cal.get(Calendar.HOUR_OF_DAY), cal.get(Calendar.MINUTE), cal.get(Calendar.SECOND));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay from the given date (at the zero-second), in the system default TimeZone.
|
||||
*
|
||||
* @param dateTime The java.util.Date from which to extract Hour and Minute.
|
||||
*/
|
||||
public static TimeOfDay hourAndMinuteFromDate(Date dateTime) {
|
||||
return hourAndMinuteFromDate(dateTime, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a TimeOfDay from the given date (at the zero-second), in the system default TimeZone.
|
||||
*
|
||||
* @param dateTime The java.util.Date from which to extract Hour and Minute.
|
||||
* @param tz The TimeZone from which relate Hour and Minute for the given date. If null, system default
|
||||
* TimeZone will be used.
|
||||
*/
|
||||
public static TimeOfDay hourAndMinuteFromDate(Date dateTime, TimeZone tz) {
|
||||
if (dateTime == null)
|
||||
return null;
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.setTime(dateTime);
|
||||
if(tz != null)
|
||||
cal.setTimeZone(tz);
|
||||
|
||||
return new TimeOfDay(cal.get(Calendar.HOUR_OF_DAY), cal.get(Calendar.MINUTE));
|
||||
}
|
||||
|
||||
public static TimeOfDay from(String hms) {
|
||||
String[] split = hms.split(":");
|
||||
if (split.length != 3) {
|
||||
throw new IllegalArgumentException("invalid TimeOfDay, make pattern like 15:30:10");
|
||||
}
|
||||
return new TimeOfDay(Integer.parseInt(split[0]), Integer.parseInt(split[1]), Integer.parseInt(split[2]));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TimeOfDay[" + hour + ":" + minute + ":" + second + "]";
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,37 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary;
|
||||
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/2/24
|
||||
*/
|
||||
public interface TimingStrategyHandler {
|
||||
|
||||
/**
|
||||
* 校验表达式
|
||||
*
|
||||
* @param timeExpression 时间表达式
|
||||
*/
|
||||
void validate(String timeExpression);
|
||||
|
||||
/**
|
||||
* 计算下次触发时间
|
||||
*
|
||||
* @param preTriggerTime 上次触发时间 (not null)
|
||||
* @param timeExpression 时间表达式
|
||||
* @param startTime 开始时间(include)
|
||||
* @param endTime 结束时间(include)
|
||||
* @return next trigger time
|
||||
*/
|
||||
Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime);
|
||||
|
||||
/**
|
||||
* 支持的定时策略
|
||||
*
|
||||
* @return TimeExpressionType
|
||||
*/
|
||||
TimeExpressionType supportType();
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,17 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary.impl;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/3/22
|
||||
*/
|
||||
@Component
|
||||
public class ApiTimingStrategyHandler extends AbstractTimingStrategyHandler {
|
||||
@Override
|
||||
public TimeExpressionType supportType() {
|
||||
return TimeExpressionType.API;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,77 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary.impl;
|
||||
|
||||
import com.cronutils.model.Cron;
|
||||
import com.cronutils.model.definition.CronDefinition;
|
||||
import com.cronutils.model.definition.CronDefinitionBuilder;
|
||||
import com.cronutils.model.time.ExecutionTime;
|
||||
import com.cronutils.parser.CronParser;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.TimingStrategyHandler;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/2/24
|
||||
*/
|
||||
@Component
|
||||
public class CronTimingStrategyHandler implements TimingStrategyHandler {
|
||||
|
||||
private final CronParser cronParser;
|
||||
|
||||
/**
|
||||
* @see CronDefinitionBuilder#instanceDefinitionFor
|
||||
* <p>
|
||||
* Enhanced quartz cron,Support for specifying both a day-of-week and a day-of-month parameter.
|
||||
* https://github.com/PowerJob/PowerJob/issues/382
|
||||
*/
|
||||
public CronTimingStrategyHandler() {
|
||||
CronDefinition cronDefinition = CronDefinitionBuilder.defineCron()
|
||||
.withSeconds().withValidRange(0, 59).and()
|
||||
.withMinutes().withValidRange(0, 59).and()
|
||||
.withHours().withValidRange(0, 23).and()
|
||||
.withDayOfMonth().withValidRange(1, 31).supportsL().supportsW().supportsLW().supportsQuestionMark().and()
|
||||
.withMonth().withValidRange(1, 12).and()
|
||||
.withDayOfWeek().withValidRange(1, 7).withMondayDoWValue(2).supportsHash().supportsL().supportsQuestionMark().and()
|
||||
.withYear().withValidRange(1970, 2099).withStrictRange().optional().and()
|
||||
.instance();
|
||||
this.cronParser = new CronParser(cronDefinition);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void validate(String timeExpression) {
|
||||
cronParser.parse(timeExpression);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
|
||||
Cron cron = cronParser.parse(timeExpression);
|
||||
ExecutionTime executionTime = ExecutionTime.forCron(cron);
|
||||
if (startTime != null && startTime > System.currentTimeMillis() && preTriggerTime < startTime) {
|
||||
// 需要计算出离 startTime 最近的一次真正的触发时间
|
||||
Optional<ZonedDateTime> zonedDateTime = executionTime.lastExecution(ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTime), ZoneId.systemDefault()));
|
||||
preTriggerTime = zonedDateTime.map(dateTime -> dateTime.toEpochSecond() * 1000).orElse(startTime);
|
||||
}
|
||||
Instant instant = Instant.ofEpochMilli(preTriggerTime);
|
||||
ZonedDateTime preZonedDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
|
||||
Optional<ZonedDateTime> opt = executionTime.nextExecution(preZonedDateTime);
|
||||
if (opt.isPresent()) {
|
||||
long nextTriggerTime = opt.get().toEpochSecond() * 1000;
|
||||
if (endTime != null && endTime < nextTriggerTime) {
|
||||
return null;
|
||||
}
|
||||
return nextTriggerTime;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimeExpressionType supportType() {
|
||||
return TimeExpressionType.CRON;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,166 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary.impl;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import lombok.Data;
|
||||
import lombok.SneakyThrows;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.common.utils.CollectionUtils;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.server.common.utils.TimeUtils;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.TimeOfDay;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.TimingStrategyHandler;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* DailyTimeIntervalStrategyHandler
|
||||
* @author 550w
|
||||
* @date 2027/02/15
|
||||
*/
|
||||
@Component
|
||||
public class DailyTimeIntervalStrategyHandler implements TimingStrategyHandler {
|
||||
|
||||
/**
|
||||
* 使用中国星期!!!
|
||||
*/
|
||||
private static final Set<Integer> ALL_DAY = Sets.newHashSet(1, 2, 3, 4, 5, 6, 7);
|
||||
|
||||
@Override
|
||||
public TimeExpressionType supportType() {
|
||||
return TimeExpressionType.DAILY_TIME_INTERVAL;
|
||||
}
|
||||
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public void validate(String timeExpression) {
|
||||
DailyTimeIntervalExpress ep = JsonUtils.parseObject(timeExpression, DailyTimeIntervalExpress.class);
|
||||
CommonUtils.requireNonNull(ep.interval, "interval can't be null or empty in DailyTimeIntervalExpress");
|
||||
CommonUtils.requireNonNull(ep.startTimeOfDay, "startTimeOfDay can't be null or empty in DailyTimeIntervalExpress");
|
||||
CommonUtils.requireNonNull(ep.endTimeOfDay, "endTimeOfDay can't be null or empty in DailyTimeIntervalExpress");
|
||||
|
||||
TimeOfDay startTime = TimeOfDay.from(ep.startTimeOfDay);
|
||||
TimeOfDay endTime = TimeOfDay.from(ep.endTimeOfDay);
|
||||
|
||||
if (endTime.before(startTime)) {
|
||||
throw new IllegalArgumentException("endTime should after startTime!");
|
||||
}
|
||||
|
||||
if (StringUtils.isNotEmpty(ep.intervalUnit)) {
|
||||
TimeUnit.valueOf(ep.intervalUnit);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
|
||||
DailyTimeIntervalExpress ep = JsonUtils.parseObject(timeExpression, DailyTimeIntervalExpress.class);
|
||||
|
||||
// 未开始状态下,用起点算调度时间
|
||||
if (startTime != null && startTime > System.currentTimeMillis() && preTriggerTime < startTime) {
|
||||
return calculateInRangeTime(startTime, ep);
|
||||
}
|
||||
|
||||
// 间隔时间
|
||||
TimeUnit timeUnit = Optional.ofNullable(ep.intervalUnit).map(TimeUnit::valueOf).orElse(TimeUnit.SECONDS);
|
||||
long interval = timeUnit.toMillis(ep.interval);
|
||||
|
||||
Long ret = calculateInRangeTime(preTriggerTime + interval, ep);
|
||||
if (ret == null || ret <= Optional.ofNullable(endTime).orElse(Long.MAX_VALUE)) {
|
||||
return ret;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算最近一次在范围中的时间
|
||||
* @param time 当前时间基准,可能直接返回该时间作为结果
|
||||
* @param ep 表达式
|
||||
* @return 最近一次在范围中的时间
|
||||
*/
|
||||
static Long calculateInRangeTime(Long time, DailyTimeIntervalExpress ep) {
|
||||
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
calendar.setTime(new Date(time));
|
||||
|
||||
int year = calendar.get(Calendar.YEAR);
|
||||
// 月份 + 1,转为熟悉的 1~12 月
|
||||
int month = calendar.get(Calendar.MONTH) + 1;
|
||||
int day = calendar.get(Calendar.DAY_OF_MONTH);
|
||||
|
||||
// 判断是否符合"日"的执行条件
|
||||
int week = TimeUtils.calculateWeek(year, month, day);
|
||||
Set<Integer> targetDays = CollectionUtils.isEmpty(ep.daysOfWeek) ? ALL_DAY : ep.daysOfWeek;
|
||||
// 未包含情况下,将时间改写为符合条件日的 00:00 分,重新开始递归(这部分应该有性能更优的写法,不过这个调度模式应该很难触发瓶颈,先简单好用的实现)
|
||||
if (!targetDays.contains(week)) {
|
||||
simpleSetCalendar(calendar, 0, 0, 0);
|
||||
Date tomorrowZero = DateUtils.addDays(calendar.getTime(), 1);
|
||||
return calculateInRangeTime(tomorrowZero.getTime(), ep);
|
||||
}
|
||||
|
||||
// 范围的开始时间
|
||||
TimeOfDay rangeStartTime = TimeOfDay.from(ep.startTimeOfDay);
|
||||
simpleSetCalendar(calendar, rangeStartTime.getHour(), rangeStartTime.getMinute(), rangeStartTime.getSecond());
|
||||
long todayStartTs = calendar.getTimeInMillis();
|
||||
|
||||
// 未开始
|
||||
if (time < todayStartTs) {
|
||||
return todayStartTs;
|
||||
}
|
||||
|
||||
TimeOfDay rangeEndTime = TimeOfDay.from(ep.endTimeOfDay);
|
||||
simpleSetCalendar(calendar, rangeEndTime.getHour(), rangeEndTime.getMinute(), rangeEndTime.getSecond());
|
||||
long todayEndTs = calendar.getTimeInMillis();
|
||||
|
||||
// 范围之间
|
||||
if (time <= todayEndTs) {
|
||||
return time;
|
||||
}
|
||||
|
||||
// 已结束,重新计算第二天时间
|
||||
simpleSetCalendar(calendar, 0, 0, 0);
|
||||
return calculateInRangeTime(DateUtils.addDays(calendar.getTime(), 1).getTime(), ep);
|
||||
}
|
||||
|
||||
private static void simpleSetCalendar(Calendar calendar, int h, int m, int s) {
|
||||
calendar.set(Calendar.SECOND, s);
|
||||
calendar.set(Calendar.MINUTE, m);
|
||||
calendar.set(Calendar.HOUR_OF_DAY, h);
|
||||
calendar.set(Calendar.MILLISECOND, 0);
|
||||
}
|
||||
|
||||
@Data
|
||||
static class DailyTimeIntervalExpress implements Serializable {
|
||||
|
||||
/**
|
||||
* 时间间隔
|
||||
*/
|
||||
private Long interval;
|
||||
/**
|
||||
* 每天激活的时间起点,格式为:18:30:00 代表 18点30分00秒激活
|
||||
*/
|
||||
private String startTimeOfDay;
|
||||
/**
|
||||
* 每日激活的时间终点,格式同上
|
||||
*/
|
||||
private String endTimeOfDay;
|
||||
|
||||
/* ************ 非必填字段 ************ */
|
||||
/**
|
||||
* 时间单位,默认秒
|
||||
*/
|
||||
private String intervalUnit;
|
||||
/**
|
||||
* 每周的哪几天激活,空代表每天都激活
|
||||
*/
|
||||
private Set<Integer> daysOfWeek;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,38 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary.impl;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.PowerJobDKey;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/3/22
|
||||
*/
|
||||
@Component
|
||||
public class FixedDelayTimingStrategyHandler extends AbstractTimingStrategyHandler {
|
||||
|
||||
@Override
|
||||
public void validate(String timeExpression) {
|
||||
long delay;
|
||||
try {
|
||||
delay = Long.parseLong(timeExpression);
|
||||
} catch (Exception e) {
|
||||
throw new PowerJobException("invalid timeExpression!");
|
||||
}
|
||||
// 默认 120s ,超过这个限制应该考虑使用其他类型以减少资源占用
|
||||
int maxInterval = Integer.parseInt(System.getProperty(PowerJobDKey.FREQUENCY_JOB_MAX_INTERVAL, "120000"));
|
||||
if (delay > maxInterval) {
|
||||
throw new PowerJobException("the delay must be less than " + maxInterval + "ms");
|
||||
}
|
||||
if (delay <= 0) {
|
||||
throw new PowerJobException("the delay must be greater than 0 ms");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimeExpressionType supportType() {
|
||||
return TimeExpressionType.FIXED_DELAY;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,46 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary.impl;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.PowerJobDKey;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
|
||||
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/3/22
|
||||
*/
|
||||
@Component
|
||||
public class FixedRateTimingStrategyHandler extends AbstractTimingStrategyHandler {
|
||||
|
||||
@Override
|
||||
public void validate(String timeExpression) {
|
||||
long delay;
|
||||
try {
|
||||
delay = Long.parseLong(timeExpression);
|
||||
} catch (Exception e) {
|
||||
throw new PowerJobException("invalid timeExpression!");
|
||||
}
|
||||
// 默认 120s ,超过这个限制应该使用考虑使用其他类型以减少资源占用
|
||||
int maxInterval = Integer.parseInt(System.getProperty(PowerJobDKey.FREQUENCY_JOB_MAX_INTERVAL, "120000"));
|
||||
if (delay > maxInterval) {
|
||||
throw new PowerJobException("the rate must be less than " + maxInterval + "ms");
|
||||
}
|
||||
if (delay <= 0) {
|
||||
throw new PowerJobException("the rate must be greater than 0 ms");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
|
||||
long r = startTime != null && startTime > preTriggerTime
|
||||
? startTime : preTriggerTime + Long.parseLong(timeExpression);
|
||||
return endTime != null && endTime < r ? null : r;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimeExpressionType supportType() {
|
||||
return TimeExpressionType.FIXED_RATE;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,17 @@
|
||||
package tech.powerjob.server.core.scheduler.auxiliary.impl;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2022/3/22
|
||||
*/
|
||||
@Component
|
||||
public class WorkflowTimingStrategyHandler extends AbstractTimingStrategyHandler {
|
||||
@Override
|
||||
public TimeExpressionType supportType() {
|
||||
return TimeExpressionType.WORKFLOW;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,46 @@
|
||||
package tech.powerjob.server.core.service;
|
||||
|
||||
import tech.powerjob.server.persistence.remote.model.AppInfoDO;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* AppInfoService
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2023/3/4
|
||||
*/
|
||||
public interface AppInfoService {
|
||||
|
||||
Optional<AppInfoDO> findByAppName(String appName);
|
||||
|
||||
/**
|
||||
* 获取 AppInfo(带缓存)
|
||||
* @param appId appId
|
||||
* @param useCache cache
|
||||
* @return App 信息
|
||||
*/
|
||||
Optional<AppInfoDO> findById(Long appId, boolean useCache);
|
||||
|
||||
void deleteById(Long appId);
|
||||
|
||||
/**
|
||||
* 保存 App
|
||||
* @param appInfo app 信息
|
||||
* @return 保存后结果
|
||||
*/
|
||||
AppInfoDO save(AppInfoDO appInfo);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param appName 验证 APP 账号密码
|
||||
* @param password 密码
|
||||
* @param encryptType 密码类型
|
||||
* @return appId
|
||||
*/
|
||||
Long assertApp(String appName, String password, String encryptType);
|
||||
|
||||
Long assertApp(AppInfoDO appInfo, String password, String encryptType);
|
||||
|
||||
String fetchOriginAppPassword(AppInfoDO appInfo);
|
||||
}
|
||||
@ -0,0 +1,138 @@
|
||||
package tech.powerjob.server.core.service;
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* 本地缓存常用数据查询操作
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/14
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class CacheService {
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final Cache<Long, String> jobId2JobNameCache;
|
||||
private final Cache<Long, String> workflowId2WorkflowNameCache;
|
||||
private final Cache<Long, Long> instanceId2AppId;
|
||||
private final Cache<Long, Long> jobId2AppId;
|
||||
|
||||
public CacheService(JobInfoRepository jobInfoRepository, WorkflowInfoRepository workflowInfoRepository, InstanceInfoRepository instanceInfoRepository) {
|
||||
|
||||
this.jobInfoRepository = jobInfoRepository;
|
||||
this.workflowInfoRepository = workflowInfoRepository;
|
||||
this.instanceInfoRepository = instanceInfoRepository;
|
||||
|
||||
jobId2JobNameCache = CacheBuilder.newBuilder()
|
||||
.expireAfterWrite(Duration.ofMinutes(1))
|
||||
.maximumSize(512)
|
||||
.softValues()
|
||||
.build();
|
||||
|
||||
workflowId2WorkflowNameCache = CacheBuilder.newBuilder()
|
||||
.expireAfterWrite(Duration.ofMinutes(1))
|
||||
.maximumSize(512)
|
||||
.softValues()
|
||||
.build();
|
||||
|
||||
instanceId2AppId = CacheBuilder.newBuilder()
|
||||
.maximumSize(1024)
|
||||
.softValues()
|
||||
.build();
|
||||
jobId2AppId = CacheBuilder.newBuilder()
|
||||
.maximumSize(1024)
|
||||
.softValues()
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 jobId 查询 jobName(不保证数据一致性,或者说只要改了数据必不一致hhh)
|
||||
* @param jobId 任务ID
|
||||
* @return 任务名称
|
||||
*/
|
||||
public String getJobName(Long jobId) {
|
||||
try {
|
||||
return jobId2JobNameCache.get(jobId, () -> {
|
||||
Optional<JobInfoDO> jobInfoDOOptional = jobInfoRepository.findById(jobId);
|
||||
// 防止缓存穿透 hhh(但是一开始没有,后来创建的情况下会有问题,不过问题不大,这里就不管了)
|
||||
return jobInfoDOOptional.map(JobInfoDO::getJobName).orElse("");
|
||||
});
|
||||
}catch (Exception e) {
|
||||
log.error("[CacheService] getJobName for {} failed.", jobId, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 workflowId 查询 工作流名称
|
||||
* @param workflowId 工作流ID
|
||||
* @return 工作流名称
|
||||
*/
|
||||
public String getWorkflowName(Long workflowId) {
|
||||
try {
|
||||
return workflowId2WorkflowNameCache.get(workflowId, () -> {
|
||||
Optional<WorkflowInfoDO> jobInfoDOOptional = workflowInfoRepository.findById(workflowId);
|
||||
// 防止缓存穿透 hhh(但是一开始没有,后来创建的情况下会有问题,不过问题不大,这里就不管了)
|
||||
return jobInfoDOOptional.map(WorkflowInfoDO::getWfName).orElse("");
|
||||
});
|
||||
}catch (Exception e) {
|
||||
log.error("[CacheService] getWorkflowName for {} failed.", workflowId, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Long getAppIdByInstanceId(Long instanceId) {
|
||||
|
||||
try {
|
||||
return instanceId2AppId.get(instanceId, () -> {
|
||||
// 内部记录数据库异常
|
||||
try {
|
||||
InstanceInfoDO instanceLog = instanceInfoRepository.findByInstanceId(instanceId);
|
||||
if (instanceLog != null) {
|
||||
return instanceLog.getAppId();
|
||||
}
|
||||
}catch (Exception e) {
|
||||
log.error("[CacheService] getAppId for instanceId:{} failed.", instanceId, e);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}catch (Exception ignore) {
|
||||
// 忽略缓存 load 失败的异常
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Long getAppIdByJobId(Long jobId) {
|
||||
try {
|
||||
return jobId2AppId.get(jobId, () -> {
|
||||
try {
|
||||
Optional<JobInfoDO> jobInfoDOOptional = jobInfoRepository.findById(jobId);
|
||||
return jobInfoDOOptional.map(JobInfoDO::getAppId).orElse(null);
|
||||
}catch (Exception e) {
|
||||
log.error("[CacheService] getAppId for job:{} failed.", jobId, e);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
} catch (Exception ignore) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,38 @@
|
||||
package tech.powerjob.server.core.service;
|
||||
|
||||
import tech.powerjob.common.PowerQuery;
|
||||
import tech.powerjob.common.request.http.RunJobRequest;
|
||||
import tech.powerjob.common.request.http.SaveJobInfoRequest;
|
||||
import tech.powerjob.common.response.JobInfoDTO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* JobService
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2023/3/4
|
||||
*/
|
||||
public interface JobService {
|
||||
|
||||
Long saveJob(SaveJobInfoRequest request);
|
||||
|
||||
JobInfoDO copyJob(Long jobId);
|
||||
|
||||
JobInfoDTO fetchJob(Long jobId);
|
||||
|
||||
List<JobInfoDTO> fetchAllJob(Long appId);
|
||||
|
||||
List<JobInfoDTO> queryJob(PowerQuery powerQuery);
|
||||
|
||||
long runJob(Long appId, RunJobRequest runJobRequest);
|
||||
|
||||
void deleteJob(Long jobId);
|
||||
|
||||
void disableJob(Long jobId);
|
||||
|
||||
void enableJob(Long jobId);
|
||||
|
||||
SaveJobInfoRequest exportJob(Long jobId);
|
||||
}
|
||||
@ -0,0 +1,56 @@
|
||||
package tech.powerjob.server.core.service;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.server.core.validator.NodeValidator;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/14
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class NodeValidateService {
|
||||
|
||||
private final Map<WorkflowNodeType, NodeValidator> nodeValidatorMap;
|
||||
|
||||
public NodeValidateService(List<NodeValidator> nodeValidators) {
|
||||
nodeValidatorMap = new EnumMap<>(WorkflowNodeType.class);
|
||||
nodeValidators.forEach(e -> nodeValidatorMap.put(e.matchingType(), e));
|
||||
}
|
||||
|
||||
|
||||
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
|
||||
NodeValidator nodeValidator = getNodeValidator(node);
|
||||
if (nodeValidator == null) {
|
||||
// 默认不需要校验
|
||||
return;
|
||||
}
|
||||
nodeValidator.complexValidate(node, dag);
|
||||
}
|
||||
|
||||
public void simpleValidate(WorkflowNodeInfoDO node) {
|
||||
NodeValidator nodeValidator = getNodeValidator(node);
|
||||
if (nodeValidator == null) {
|
||||
// 默认不需要校验
|
||||
return;
|
||||
}
|
||||
nodeValidator.simpleValidate(node);
|
||||
}
|
||||
|
||||
private NodeValidator getNodeValidator(WorkflowNodeInfoDO node) {
|
||||
Integer nodeTypeCode = node.getType();
|
||||
if (nodeTypeCode == null) {
|
||||
// 前向兼容,默认为 任务节点
|
||||
return nodeValidatorMap.get(WorkflowNodeType.JOB);
|
||||
}
|
||||
return nodeValidatorMap.get(WorkflowNodeType.of(nodeTypeCode));
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,42 @@
|
||||
package tech.powerjob.server.core.service;
|
||||
|
||||
import com.google.common.base.Splitter;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.server.persistence.remote.model.UserInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.UserInfoRepository;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 用户服务
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/6/12
|
||||
*/
|
||||
@Service
|
||||
public class UserService {
|
||||
|
||||
@Resource
|
||||
private UserInfoRepository userInfoRepository;
|
||||
|
||||
/**
|
||||
* 根据用户ID字符串获取用户信息详细列表
|
||||
* @param userIds 逗号分割的用户ID信息
|
||||
* @return 用户信息详细列表
|
||||
*/
|
||||
public List<UserInfoDO> fetchNotifyUserList(String userIds) {
|
||||
if (StringUtils.isEmpty(userIds)) {
|
||||
return Lists.newLinkedList();
|
||||
}
|
||||
// 去重
|
||||
Set<Long> userIdList = Splitter.on(",").splitToList(userIds).stream().map(Long::valueOf).collect(Collectors.toSet());
|
||||
List<UserInfoDO> res = userInfoRepository.findByIdIn(Lists.newLinkedList(userIdList));
|
||||
res.forEach(x -> x.setPassword(null));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,101 @@
|
||||
package tech.powerjob.server.core.service;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.server.core.workflow.hanlder.ControlNodeHandler;
|
||||
import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler;
|
||||
import tech.powerjob.server.core.workflow.hanlder.WorkflowNodeHandlerMarker;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/9
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class WorkflowNodeHandleService {
|
||||
|
||||
private final Map<WorkflowNodeType, ControlNodeHandler> controlNodeHandlerContainer;
|
||||
|
||||
private final Map<WorkflowNodeType, TaskNodeHandler> taskNodeHandlerContainer;
|
||||
|
||||
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
|
||||
|
||||
public WorkflowNodeHandleService(List<ControlNodeHandler> controlNodeHandlerList, List<TaskNodeHandler> taskNodeHandlerList, WorkflowInstanceInfoRepository workflowInstanceInfoRepository) {
|
||||
// init
|
||||
controlNodeHandlerContainer = new EnumMap<>(WorkflowNodeType.class);
|
||||
taskNodeHandlerContainer = new EnumMap<>(WorkflowNodeType.class);
|
||||
controlNodeHandlerList.forEach(controlNodeHandler -> controlNodeHandlerContainer.put(controlNodeHandler.matchingType(), controlNodeHandler));
|
||||
taskNodeHandlerList.forEach(taskNodeHandler -> taskNodeHandlerContainer.put(taskNodeHandler.matchingType(), taskNodeHandler));
|
||||
//
|
||||
this.workflowInstanceInfoRepository = workflowInstanceInfoRepository;
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理任务节点
|
||||
* 注意,上层调用方必须保证这里的 taskNodeList 不能为空
|
||||
*/
|
||||
public void handleTaskNodes(List<PEWorkflowDAG.Node> taskNodeList, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
|
||||
|
||||
// 创建任务实例
|
||||
taskNodeList.forEach(taskNode -> {
|
||||
// 注意:这里必须保证任务实例全部创建成功,如果在这里创建实例部分失败,会导致 DAG 信息不会更新,已经生成的实例节点在工作流日志中没法展示
|
||||
TaskNodeHandler taskNodeHandler = (TaskNodeHandler) findMatchingHandler(taskNode);
|
||||
taskNodeHandler.createTaskInstance(taskNode, dag, wfInstanceInfo);
|
||||
log.debug("[Workflow-{}|{}] workflowInstance start to process new node(nodeId={},jobId={})", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), taskNode.getNodeId(), taskNode.getJobId());
|
||||
});
|
||||
// 持久化工作流实例信息
|
||||
wfInstanceInfo.setDag(JSON.toJSONString(dag));
|
||||
workflowInstanceInfoRepository.saveAndFlush(wfInstanceInfo);
|
||||
// 启动
|
||||
taskNodeList.forEach(taskNode -> {
|
||||
TaskNodeHandler taskNodeHandler = (TaskNodeHandler) findMatchingHandler(taskNode);
|
||||
taskNodeHandler.startTaskInstance(taskNode);
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理控制节点
|
||||
* 注意,上层调用方必须保证这里的 controlNodeList 不能为空
|
||||
*/
|
||||
public void handleControlNodes(List<PEWorkflowDAG.Node> controlNodeList, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
|
||||
for (PEWorkflowDAG.Node node : controlNodeList) {
|
||||
handleControlNode(node, dag, wfInstanceInfo);
|
||||
}
|
||||
}
|
||||
|
||||
public void handleControlNode(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
|
||||
ControlNodeHandler controlNodeHandler = (ControlNodeHandler) findMatchingHandler(node);
|
||||
node.setStartTime(CommonUtils.formatTime(System.currentTimeMillis()));
|
||||
controlNodeHandler.handle(node, dag, wfInstanceInfo);
|
||||
node.setFinishedTime(CommonUtils.formatTime(System.currentTimeMillis()));
|
||||
}
|
||||
|
||||
|
||||
private WorkflowNodeHandlerMarker findMatchingHandler(PEWorkflowDAG.Node node) {
|
||||
WorkflowNodeType nodeType = WorkflowNodeType.of(node.getNodeType());
|
||||
WorkflowNodeHandlerMarker res;
|
||||
if (!nodeType.isControlNode()) {
|
||||
res = taskNodeHandlerContainer.get(nodeType);
|
||||
} else {
|
||||
res = controlNodeHandlerContainer.get(nodeType);
|
||||
}
|
||||
if (res == null) {
|
||||
// impossible
|
||||
throw new UnsupportedOperationException("unsupported node type : " + nodeType);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,128 @@
|
||||
package tech.powerjob.server.core.service.impl;
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.enums.EncryptType;
|
||||
import tech.powerjob.common.enums.ErrorCodes;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.utils.DigestUtils;
|
||||
import tech.powerjob.server.common.utils.AESUtil;
|
||||
import tech.powerjob.server.core.service.AppInfoService;
|
||||
import tech.powerjob.server.persistence.remote.model.AppInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.AppInfoRepository;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* AppInfoServiceImpl
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2023/3/4
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class AppInfoServiceImpl implements AppInfoService {
|
||||
|
||||
private final Cache<Long, AppInfoDO> appId2AppInfoDO = CacheBuilder.newBuilder()
|
||||
.softValues()
|
||||
.expireAfterWrite(3, TimeUnit.MINUTES)
|
||||
.maximumSize(1024)
|
||||
.build();
|
||||
|
||||
private final AppInfoRepository appInfoRepository;
|
||||
|
||||
private static final String ENCRYPT_KEY = "ChinaNo.1_ChinaNo.1_ChinaNo.1AAA";
|
||||
|
||||
private static final String ENCRYPT_PWD_PREFIX = "sys_encrypt_aes:";
|
||||
|
||||
@Override
|
||||
public Optional<AppInfoDO> findByAppName(String appName) {
|
||||
return appInfoRepository.findByAppName(appName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<AppInfoDO> findById(Long appId, boolean useCache) {
|
||||
if (!useCache) {
|
||||
Optional<AppInfoDO> appInfoOpt = appInfoRepository.findById(appId);
|
||||
appInfoOpt.ifPresent(appInfo -> appId2AppInfoDO.put(appId, appInfo));
|
||||
return appInfoOpt;
|
||||
}
|
||||
try {
|
||||
AppInfoDO appInfoDO = appId2AppInfoDO.get(appId, () -> {
|
||||
Optional<AppInfoDO> appInfoOpt = appInfoRepository.findById(appId);
|
||||
if (appInfoOpt.isPresent()) {
|
||||
return appInfoOpt.get();
|
||||
}
|
||||
throw new IllegalArgumentException("can't find appInfo by appId:" + appId);
|
||||
});
|
||||
return Optional.of(appInfoDO);
|
||||
} catch (Exception e) {
|
||||
log.warn("[AppInfoService] findByIdWithCache failed,appId={}", appId, e);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteById(Long appId) {
|
||||
appInfoRepository.deleteById(appId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppInfoDO save(AppInfoDO appInfo) {
|
||||
|
||||
String originPassword = appInfo.getPassword();
|
||||
String encryptPassword = AESUtil.encrypt(originPassword, ENCRYPT_KEY);
|
||||
String finalPassword = ENCRYPT_PWD_PREFIX.concat(encryptPassword);
|
||||
appInfo.setPassword(finalPassword);
|
||||
|
||||
return appInfoRepository.saveAndFlush(appInfo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long assertApp(String appName, String password, String encryptType) {
|
||||
AppInfoDO appInfo = appInfoRepository.findByAppName(appName).orElseThrow(() -> new PowerJobException(ErrorCodes.INVALID_APP, appName));
|
||||
return assertApp(appInfo, password, encryptType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long assertApp(AppInfoDO appInfo, String password, String encryptType) {
|
||||
boolean checkPass = checkPassword(appInfo, password, encryptType);
|
||||
if (!checkPass) {
|
||||
throw new PowerJobException(ErrorCodes.INCORRECT_PASSWORD, null);
|
||||
}
|
||||
return appInfo.getId();
|
||||
}
|
||||
|
||||
private boolean checkPassword(AppInfoDO appInfo, String password, String encryptType) {
|
||||
String originPwd = fetchOriginAppPassword(appInfo);
|
||||
if (StringUtils.isEmpty(encryptType) || EncryptType.NONE.getCode().equalsIgnoreCase(encryptType)) {
|
||||
return password.equals(originPwd);
|
||||
}
|
||||
if (EncryptType.MD5.getCode().equalsIgnoreCase(encryptType)) {
|
||||
return password.equalsIgnoreCase(DigestUtils.md5(originPwd));
|
||||
}
|
||||
throw new PowerJobException(ErrorCodes.INVALID_REQUEST, "unknown_encryptType:" + encryptType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String fetchOriginAppPassword(AppInfoDO appInfo) {
|
||||
String dbPwd = appInfo.getPassword();
|
||||
if (StringUtils.isEmpty(dbPwd)) {
|
||||
return dbPwd;
|
||||
}
|
||||
|
||||
if (dbPwd.startsWith(ENCRYPT_PWD_PREFIX)) {
|
||||
String encryptPassword = dbPwd.replaceFirst(ENCRYPT_PWD_PREFIX, StringUtils.EMPTY);
|
||||
return AESUtil.decrypt(encryptPassword, ENCRYPT_KEY);
|
||||
}
|
||||
|
||||
return dbPwd;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,65 @@
|
||||
package tech.powerjob.server.core.service.impl.job;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import tech.powerjob.common.enums.DispatchStrategy;
|
||||
import tech.powerjob.common.enums.ExecuteType;
|
||||
import tech.powerjob.common.enums.ProcessorType;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.model.AlarmConfig;
|
||||
import tech.powerjob.common.model.JobAdvancedRuntimeConfig;
|
||||
import tech.powerjob.common.model.LifeCycle;
|
||||
import tech.powerjob.common.model.LogConfig;
|
||||
import tech.powerjob.common.request.http.SaveJobInfoRequest;
|
||||
import tech.powerjob.common.response.JobInfoDTO;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.server.common.SJ;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* JobConverter
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2023/3/4
|
||||
*/
|
||||
public class JobConverter {
|
||||
|
||||
public static SaveJobInfoRequest convertJobInfoDO2SaveJobInfoRequest(JobInfoDO jobInfoDO) {
|
||||
SaveJobInfoRequest saveJobInfoRequest = new SaveJobInfoRequest();
|
||||
BeanUtils.copyProperties(jobInfoDO, saveJobInfoRequest);
|
||||
saveJobInfoRequest.setTimeExpressionType(TimeExpressionType.of(jobInfoDO.getTimeExpressionType()));
|
||||
saveJobInfoRequest.setExecuteType(ExecuteType.of(jobInfoDO.getExecuteType()));
|
||||
saveJobInfoRequest.setProcessorType(ProcessorType.of(jobInfoDO.getProcessorType()));
|
||||
if (StringUtils.isNotEmpty(jobInfoDO.getNotifyUserIds())) {
|
||||
saveJobInfoRequest.setNotifyUserIds(Lists.newArrayList(SJ.COMMA_SPLITTER.split(jobInfoDO.getNotifyUserIds())).stream().map(Long::valueOf).collect(Collectors.toList()));
|
||||
}
|
||||
saveJobInfoRequest.setDispatchStrategy(DispatchStrategy.of(jobInfoDO.getDispatchStrategy()));
|
||||
saveJobInfoRequest.setLifeCycle(LifeCycle.parse(jobInfoDO.getLifecycle()));
|
||||
saveJobInfoRequest.setAlarmConfig(JsonUtils.parseObjectIgnoreException(jobInfoDO.getAlarmConfig(), AlarmConfig.class));
|
||||
saveJobInfoRequest.setLogConfig(JsonUtils.parseObjectIgnoreException(jobInfoDO.getLogConfig(), LogConfig.class));
|
||||
saveJobInfoRequest.setAdvancedRuntimeConfig(JsonUtils.parseObjectIgnoreException(jobInfoDO.getAdvancedRuntimeConfig(), JobAdvancedRuntimeConfig.class));
|
||||
return saveJobInfoRequest;
|
||||
}
|
||||
|
||||
public static JobInfoDTO convertJobInfoDO2JobInfoDTO(JobInfoDO jobInfoDO) {
|
||||
JobInfoDTO jobInfoDTO = new JobInfoDTO();
|
||||
BeanUtils.copyProperties(jobInfoDO, jobInfoDTO);
|
||||
if (jobInfoDO.getAlarmConfig() != null) {
|
||||
jobInfoDTO.setAlarmConfig(JSON.parseObject(jobInfoDO.getAlarmConfig(), AlarmConfig.class));
|
||||
}
|
||||
|
||||
if (StringUtils.isNotEmpty(jobInfoDO.getLogConfig())) {
|
||||
jobInfoDTO.setLogConfig(JSON.parseObject(jobInfoDO.getLogConfig(), LogConfig.class));
|
||||
}
|
||||
|
||||
if (StringUtils.isNotEmpty(jobInfoDO.getAdvancedRuntimeConfig())) {
|
||||
jobInfoDTO.setAdvancedRuntimeConfig(JSON.parseObject(jobInfoDO.getAdvancedRuntimeConfig(), JobAdvancedRuntimeConfig.class));
|
||||
}
|
||||
return jobInfoDTO;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,323 @@
|
||||
package tech.powerjob.server.core.service.impl.job;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.data.jpa.domain.Specification;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import tech.powerjob.common.PowerQuery;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.AlarmConfig;
|
||||
import tech.powerjob.common.model.LifeCycle;
|
||||
import tech.powerjob.common.request.http.RunJobRequest;
|
||||
import tech.powerjob.common.request.http.SaveJobInfoRequest;
|
||||
import tech.powerjob.common.response.JobInfoDTO;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.server.common.SJ;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
|
||||
import tech.powerjob.server.core.DispatchService;
|
||||
import tech.powerjob.server.core.instance.InstanceService;
|
||||
import tech.powerjob.server.core.scheduler.TimingStrategyService;
|
||||
import tech.powerjob.server.core.service.JobService;
|
||||
import tech.powerjob.server.persistence.QueryConvertUtils;
|
||||
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import tech.powerjob.server.remote.server.redirector.DesignateServer;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* JobServiceImpl
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2023/3/4
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class JobServiceImpl implements JobService {
|
||||
|
||||
private final InstanceService instanceService;
|
||||
|
||||
private final DispatchService dispatchService;
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final InstanceInfoRepository instanceInfoRepository;
|
||||
|
||||
private final TimingStrategyService timingStrategyService;
|
||||
|
||||
/**
|
||||
* 保存/修改任务
|
||||
*
|
||||
* @param request 任务请求
|
||||
* @return 创建的任务ID(jobId)
|
||||
*/
|
||||
@Override
|
||||
public Long saveJob(SaveJobInfoRequest request) {
|
||||
|
||||
request.valid();
|
||||
|
||||
JobInfoDO jobInfoDO;
|
||||
if (request.getId() != null) {
|
||||
jobInfoDO = jobInfoRepository.findById(request.getId()).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId: " + request.getId()));
|
||||
} else {
|
||||
jobInfoDO = new JobInfoDO();
|
||||
}
|
||||
|
||||
// 值拷贝
|
||||
BeanUtils.copyProperties(request, jobInfoDO);
|
||||
|
||||
// 拷贝枚举值
|
||||
jobInfoDO.setExecuteType(request.getExecuteType().getV());
|
||||
jobInfoDO.setProcessorType(request.getProcessorType().getV());
|
||||
jobInfoDO.setTimeExpressionType(request.getTimeExpressionType().getV());
|
||||
jobInfoDO.setStatus(request.isEnable() ? SwitchableStatus.ENABLE.getV() : SwitchableStatus.DISABLE.getV());
|
||||
jobInfoDO.setDispatchStrategy(request.getDispatchStrategy().getV());
|
||||
|
||||
// 填充默认值,非空保护防止 NPE
|
||||
fillDefaultValue(jobInfoDO);
|
||||
|
||||
// 转化报警用户列表
|
||||
if (request.getNotifyUserIds() != null) {
|
||||
if (request.getNotifyUserIds().size() == 0) {
|
||||
jobInfoDO.setNotifyUserIds(null);
|
||||
} else {
|
||||
jobInfoDO.setNotifyUserIds(SJ.COMMA_JOINER.join(request.getNotifyUserIds()));
|
||||
}
|
||||
}
|
||||
LifeCycle lifecycle = Optional.ofNullable(request.getLifeCycle()).orElse(LifeCycle.EMPTY_LIFE_CYCLE);
|
||||
jobInfoDO.setLifecycle(JSON.toJSONString(lifecycle));
|
||||
// 检查定时策略
|
||||
timingStrategyService.validate(request.getTimeExpressionType(), request.getTimeExpression(), lifecycle.getStart(), lifecycle.getEnd());
|
||||
calculateNextTriggerTime(jobInfoDO);
|
||||
if (request.getId() == null) {
|
||||
jobInfoDO.setGmtCreate(new Date());
|
||||
}
|
||||
// 检查告警配置
|
||||
if (request.getAlarmConfig() != null) {
|
||||
AlarmConfig config = request.getAlarmConfig();
|
||||
if (config.getStatisticWindowLen() == null || config.getAlertThreshold() == null || config.getSilenceWindowLen() == null) {
|
||||
throw new PowerJobException("illegal alarm config!");
|
||||
}
|
||||
jobInfoDO.setAlarmConfig(JSON.toJSONString(request.getAlarmConfig()));
|
||||
}
|
||||
// 日志配置
|
||||
if (request.getLogConfig() != null) {
|
||||
jobInfoDO.setLogConfig(JSONObject.toJSONString(request.getLogConfig()));
|
||||
}
|
||||
// 日志配置
|
||||
if (request.getAdvancedRuntimeConfig() != null) {
|
||||
jobInfoDO.setAdvancedRuntimeConfig(JSONObject.toJSONString(request.getAdvancedRuntimeConfig()));
|
||||
}
|
||||
JobInfoDO res = jobInfoRepository.saveAndFlush(jobInfoDO);
|
||||
return res.getId();
|
||||
}
|
||||
|
||||
/**
|
||||
* 复制任务
|
||||
*
|
||||
* @param jobId 目标任务ID
|
||||
* @return 复制后的任务 ID
|
||||
*/
|
||||
@Override
|
||||
public JobInfoDO copyJob(Long jobId) {
|
||||
|
||||
JobInfoDO origin = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId: " + jobId));
|
||||
if (origin.getStatus() == SwitchableStatus.DELETED.getV()) {
|
||||
throw new IllegalStateException("can't copy the job which has been deleted!");
|
||||
}
|
||||
JobInfoDO copyJob = new JobInfoDO();
|
||||
// 值拷贝
|
||||
BeanUtils.copyProperties(origin, copyJob);
|
||||
// 填充默认值,理论上应该不需要
|
||||
fillDefaultValue(copyJob);
|
||||
// 修正创建时间以及更新时间
|
||||
copyJob.setId(null);
|
||||
copyJob.setJobName(copyJob.getJobName() + "_COPY");
|
||||
copyJob.setGmtCreate(new Date());
|
||||
copyJob.setGmtModified(new Date());
|
||||
|
||||
copyJob = jobInfoRepository.saveAndFlush(copyJob);
|
||||
return copyJob;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public JobInfoDTO fetchJob(Long jobId) {
|
||||
return JobConverter.convertJobInfoDO2JobInfoDTO(jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId: " + jobId)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<JobInfoDTO> fetchAllJob(Long appId) {
|
||||
return jobInfoRepository.findByAppId(appId).stream().map(JobConverter::convertJobInfoDO2JobInfoDTO).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<JobInfoDTO> queryJob(PowerQuery powerQuery) {
|
||||
Specification<JobInfoDO> specification = QueryConvertUtils.toSpecification(powerQuery);
|
||||
return jobInfoRepository.findAll(specification).stream().map(JobConverter::convertJobInfoDO2JobInfoDTO).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
@DesignateServer
|
||||
public long runJob(Long appId, RunJobRequest runJobRequest) {
|
||||
Long jobId = runJobRequest.getJobId();
|
||||
String instanceParams = runJobRequest.getInstanceParams();
|
||||
String outerKey = runJobRequest.getOuterKey();
|
||||
long delay = runJobRequest.getDelay() == null ? 0 : runJobRequest.getDelay();
|
||||
|
||||
JobInfoDO jobInfo = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by id:" + jobId));
|
||||
|
||||
log.info("[Job-{}] try to run job in app[{}], instanceParams={},delay={} ms,outerKey={}", jobInfo.getId(), appId, instanceParams, delay, outerKey);
|
||||
|
||||
final InstanceInfoDO instanceInfo = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(),
|
||||
instanceParams, null, System.currentTimeMillis() + Math.max(delay, 0),
|
||||
outerKey, runJobRequest.getExtendValue()
|
||||
);
|
||||
instanceInfoRepository.flush();
|
||||
if (delay <= 0) {
|
||||
dispatchService.dispatch(jobInfo, instanceInfo.getInstanceId(), Optional.of(instanceInfo),Optional.empty());
|
||||
} else {
|
||||
InstanceTimeWheelService.schedule(instanceInfo.getInstanceId(), delay, () -> dispatchService.dispatch(jobInfo, instanceInfo.getInstanceId(), Optional.empty(),Optional.empty()));
|
||||
}
|
||||
log.info("[Job-{}|{}] execute 'runJob' successfully, params={}", jobInfo.getId(), instanceInfo.getInstanceId(), instanceParams);
|
||||
return instanceInfo.getInstanceId();
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除某个任务
|
||||
*
|
||||
* @param jobId 任务ID
|
||||
*/
|
||||
@Override
|
||||
public void deleteJob(Long jobId) {
|
||||
shutdownOrStopJob(jobId, SwitchableStatus.DELETED);
|
||||
}
|
||||
|
||||
/**
|
||||
* 禁用某个任务
|
||||
*/
|
||||
@Override
|
||||
public void disableJob(Long jobId) {
|
||||
shutdownOrStopJob(jobId, SwitchableStatus.DISABLE);
|
||||
}
|
||||
|
||||
/**
|
||||
* 导出某个任务为 JSON
|
||||
* @param jobId jobId
|
||||
* @return 导出结果
|
||||
*/
|
||||
@Override
|
||||
public SaveJobInfoRequest exportJob(Long jobId) {
|
||||
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(jobId);
|
||||
if (!jobInfoOpt.isPresent()) {
|
||||
throw new IllegalArgumentException("can't find job by jobId: " + jobId);
|
||||
}
|
||||
final JobInfoDO jobInfoDO = jobInfoOpt.get();
|
||||
final SaveJobInfoRequest saveJobInfoRequest = JobConverter.convertJobInfoDO2SaveJobInfoRequest(jobInfoDO);
|
||||
saveJobInfoRequest.setId(null);
|
||||
saveJobInfoRequest.setJobName(saveJobInfoRequest.getJobName() + "_EXPORT_" + System.currentTimeMillis());
|
||||
log.info("[Job-{}] [exportJob] jobInfoDO: {}, saveJobInfoRequest: {}", jobId, JsonUtils.toJSONString(jobInfoDO), JsonUtils.toJSONString(saveJobInfoRequest));
|
||||
return saveJobInfoRequest;
|
||||
}
|
||||
|
||||
/**
|
||||
* 启用某个任务
|
||||
*
|
||||
* @param jobId 任务ID
|
||||
*/
|
||||
@Override
|
||||
public void enableJob(Long jobId) {
|
||||
JobInfoDO jobInfoDO = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId:" + jobId));
|
||||
|
||||
jobInfoDO.setStatus(SwitchableStatus.ENABLE.getV());
|
||||
calculateNextTriggerTime(jobInfoDO);
|
||||
|
||||
jobInfoRepository.saveAndFlush(jobInfoDO);
|
||||
}
|
||||
|
||||
/**
|
||||
* 停止或删除某个JOB
|
||||
* 秒级任务还要额外停止正在运行的任务实例
|
||||
*/
|
||||
private void shutdownOrStopJob(Long jobId, SwitchableStatus status) {
|
||||
|
||||
// 1. 先更新 job_info 表
|
||||
Optional<JobInfoDO> jobInfoOPT = jobInfoRepository.findById(jobId);
|
||||
if (!jobInfoOPT.isPresent()) {
|
||||
throw new IllegalArgumentException("can't find job by jobId:" + jobId);
|
||||
}
|
||||
JobInfoDO jobInfoDO = jobInfoOPT.get();
|
||||
jobInfoDO.setStatus(status.getV());
|
||||
jobInfoDO.setGmtModified(new Date());
|
||||
jobInfoRepository.saveAndFlush(jobInfoDO);
|
||||
|
||||
// 2. 关闭秒级任务
|
||||
if (!TimeExpressionType.FREQUENT_TYPES.contains(jobInfoDO.getTimeExpressionType())) {
|
||||
return;
|
||||
}
|
||||
List<InstanceInfoDO> executeLogs = instanceInfoRepository.findByJobIdAndStatusIn(jobId, InstanceStatus.GENERALIZED_RUNNING_STATUS);
|
||||
if (CollectionUtils.isEmpty(executeLogs)) {
|
||||
return;
|
||||
}
|
||||
if (executeLogs.size() > 1) {
|
||||
log.warn("[Job-{}] frequent job should just have one running instance, there must have some bug.", jobId);
|
||||
}
|
||||
executeLogs.forEach(instance -> {
|
||||
try {
|
||||
// 重复查询了数据库,不过问题不大,这个调用量很小
|
||||
instanceService.stopInstance(instance.getAppId(), instance.getInstanceId());
|
||||
} catch (Exception ignore) {
|
||||
// ignore exception
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void calculateNextTriggerTime(JobInfoDO jobInfo) {
|
||||
// 计算下次调度时间
|
||||
if (TimeExpressionType.FREQUENT_TYPES.contains(jobInfo.getTimeExpressionType())) {
|
||||
// 固定频率类型的任务不计算
|
||||
jobInfo.setNextTriggerTime(null);
|
||||
} else {
|
||||
LifeCycle lifeCycle = LifeCycle.parse(jobInfo.getLifecycle());
|
||||
Long nextValidTime = timingStrategyService.calculateNextTriggerTimeWithInspection(TimeExpressionType.of(jobInfo.getTimeExpressionType()), jobInfo.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
|
||||
jobInfo.setNextTriggerTime(nextValidTime);
|
||||
}
|
||||
// 重写最后修改时间
|
||||
jobInfo.setGmtModified(new Date());
|
||||
}
|
||||
|
||||
private void fillDefaultValue(JobInfoDO jobInfoDO) {
|
||||
if (jobInfoDO.getMaxWorkerCount() == null) {
|
||||
jobInfoDO.setMaxWorkerCount(0);
|
||||
}
|
||||
if (jobInfoDO.getMaxInstanceNum() == null) {
|
||||
jobInfoDO.setMaxInstanceNum(0);
|
||||
}
|
||||
if (jobInfoDO.getConcurrency() == null) {
|
||||
jobInfoDO.setConcurrency(5);
|
||||
}
|
||||
if (jobInfoDO.getInstanceRetryNum() == null) {
|
||||
jobInfoDO.setInstanceRetryNum(0);
|
||||
}
|
||||
if (jobInfoDO.getTaskRetryNum() == null) {
|
||||
jobInfoDO.setTaskRetryNum(0);
|
||||
}
|
||||
if (jobInfoDO.getInstanceTimeLimit() == null) {
|
||||
jobInfoDO.setInstanceTimeLimit(0L);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
package tech.powerjob.server.core.uid;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.server.remote.server.self.ServerInfoService;
|
||||
|
||||
/**
|
||||
* 唯一ID生成服务,使用 Twitter snowflake 算法
|
||||
* 机房ID:固定为0,占用2位
|
||||
* 机器ID:由 ServerIdProvider 提供
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/6
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class IdGenerateService {
|
||||
|
||||
private final SnowFlakeIdGenerator snowFlakeIdGenerator;
|
||||
|
||||
private static final int DATA_CENTER_ID = 0;
|
||||
|
||||
public IdGenerateService(ServerInfoService serverInfoService) {
|
||||
long id = serverInfoService.fetchCurrentServerInfo().getId();
|
||||
snowFlakeIdGenerator = new SnowFlakeIdGenerator(DATA_CENTER_ID, id);
|
||||
log.info("[IdGenerateService] initialize IdGenerateService successfully, ID:{}", id);
|
||||
}
|
||||
|
||||
/**
|
||||
* 分配分布式唯一ID
|
||||
* @return 分布式唯一ID
|
||||
*/
|
||||
public long allocate() {
|
||||
return snowFlakeIdGenerator.nextId();
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,122 @@
|
||||
package tech.powerjob.server.core.uid;
|
||||
|
||||
/**
|
||||
* Twitter SnowFlake(Scala -> Java)
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/4/6
|
||||
*/
|
||||
public class SnowFlakeIdGenerator {
|
||||
/**
|
||||
* 起始的时间戳(a special day for me)
|
||||
*/
|
||||
private final static long START_STAMP = 1555776000000L;
|
||||
/**
|
||||
* 序列号占用的位数
|
||||
*/
|
||||
private final static long SEQUENCE_BIT = 6;
|
||||
/**
|
||||
* 机器标识占用的位数
|
||||
*/
|
||||
private final static long MACHINE_BIT = 14;
|
||||
/**
|
||||
* 数据中心占用的位数
|
||||
*/
|
||||
private final static long DATA_CENTER_BIT = 2;
|
||||
/**
|
||||
* 每一部分的最大值
|
||||
*/
|
||||
private final static long MAX_DATA_CENTER_NUM = ~(-1L << DATA_CENTER_BIT);
|
||||
private final static long MAX_MACHINE_NUM = ~(-1L << MACHINE_BIT);
|
||||
private final static long MAX_SEQUENCE = ~(-1L << SEQUENCE_BIT);
|
||||
/**
|
||||
* 每一部分向左的位移
|
||||
*/
|
||||
private final static long MACHINE_LEFT = SEQUENCE_BIT;
|
||||
private final static long DATA_CENTER_LEFT = SEQUENCE_BIT + MACHINE_BIT;
|
||||
private final static long TIMESTAMP_LEFT = DATA_CENTER_LEFT + DATA_CENTER_BIT;
|
||||
/**
|
||||
* 数据中心
|
||||
*/
|
||||
private final long dataCenterId;
|
||||
/**
|
||||
* 机器标识
|
||||
*/
|
||||
private final long machineId;
|
||||
/**
|
||||
* 序列号
|
||||
*/
|
||||
private long sequence = 0L;
|
||||
/**
|
||||
* 上一次时间戳
|
||||
*/
|
||||
private long lastTimestamp = -1L;
|
||||
|
||||
public SnowFlakeIdGenerator(long dataCenterId, long machineId) {
|
||||
if (dataCenterId > MAX_DATA_CENTER_NUM || dataCenterId < 0) {
|
||||
throw new IllegalArgumentException("dataCenterId can't be greater than MAX_DATA_CENTER_NUM or less than 0");
|
||||
}
|
||||
if (machineId > MAX_MACHINE_NUM || machineId < 0) {
|
||||
throw new IllegalArgumentException("machineId can't be greater than MAX_MACHINE_NUM or less than 0");
|
||||
}
|
||||
this.dataCenterId = dataCenterId;
|
||||
this.machineId = machineId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 产生下一个ID
|
||||
*/
|
||||
public synchronized long nextId() {
|
||||
long currStamp = getNewStamp();
|
||||
if (currStamp < lastTimestamp) {
|
||||
return futureId();
|
||||
}
|
||||
|
||||
if (currStamp == lastTimestamp) {
|
||||
//相同毫秒内,序列号自增
|
||||
sequence = (sequence + 1) & MAX_SEQUENCE;
|
||||
//同一毫秒的序列数已经达到最大
|
||||
if (sequence == 0L) {
|
||||
currStamp = getNextMill();
|
||||
}
|
||||
} else {
|
||||
//不同毫秒内,序列号置为0
|
||||
sequence = 0L;
|
||||
}
|
||||
|
||||
lastTimestamp = currStamp;
|
||||
|
||||
return (currStamp - START_STAMP) << TIMESTAMP_LEFT //时间戳部分
|
||||
| dataCenterId << DATA_CENTER_LEFT //数据中心部分
|
||||
| machineId << MACHINE_LEFT //机器标识部分
|
||||
| sequence; //序列号部分
|
||||
}
|
||||
|
||||
/**
|
||||
* 发生时钟回拨时借用未来时间生成Id,避免运行过程中任务调度和工作流直接进入不可用状态
|
||||
* 注:该方式不可解决原算法中停服状态下时钟回拨导致的重复id问题
|
||||
*/
|
||||
private long futureId() {
|
||||
sequence = (sequence + 1) & MAX_SEQUENCE;
|
||||
if (sequence == 0L) {
|
||||
lastTimestamp = lastTimestamp + 1;
|
||||
}
|
||||
|
||||
return (lastTimestamp - START_STAMP) << TIMESTAMP_LEFT //时间戳部分
|
||||
| dataCenterId << DATA_CENTER_LEFT //数据中心部分
|
||||
| machineId << MACHINE_LEFT //机器标识部分
|
||||
| sequence; //序列号部分
|
||||
}
|
||||
|
||||
private long getNextMill() {
|
||||
long mill = getNewStamp();
|
||||
while (mill <= lastTimestamp) {
|
||||
mill = getNewStamp();
|
||||
}
|
||||
return mill;
|
||||
}
|
||||
|
||||
private long getNewStamp() {
|
||||
return System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,70 @@
|
||||
package tech.powerjob.server.core.validator;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/14
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
public class DecisionNodeValidator implements NodeValidator {
|
||||
|
||||
|
||||
@Override
|
||||
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
|
||||
// 出度固定为 2
|
||||
WorkflowDAG.Node nodeWrapper = dag.getNode(node.getId());
|
||||
Collection<PEWorkflowDAG.Edge> edges = nodeWrapper.getSuccessorEdgeMap().values();
|
||||
if (edges.size() != 2) {
|
||||
throw new PowerJobException("DecisionNode‘s out-degree must be 2,node name : " + node.getNodeName());
|
||||
}
|
||||
// 边的属性必须为 ture 或者 false
|
||||
boolean containFalse = false;
|
||||
boolean containTrue = false;
|
||||
for (PEWorkflowDAG.Edge edge : edges) {
|
||||
if (!isValidBooleanStr(edge.getProperty())) {
|
||||
throw new PowerJobException("Illegal property of DecisionNode‘s out-degree edge,node name : " + node.getNodeName());
|
||||
}
|
||||
boolean b = Boolean.parseBoolean(edge.getProperty());
|
||||
if (b) {
|
||||
containTrue = true;
|
||||
} else {
|
||||
containFalse = true;
|
||||
}
|
||||
}
|
||||
if (!containFalse || !containTrue) {
|
||||
throw new PowerJobException("Illegal property of DecisionNode‘s out-degree edge,node name : " + node.getNodeName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void simpleValidate(WorkflowNodeInfoDO node) {
|
||||
// 简单校验
|
||||
String nodeParams = node.getNodeParams();
|
||||
if (StringUtils.isBlank(nodeParams)) {
|
||||
throw new PowerJobException("DecisionNode‘s param must be not null,node name : " + node.getNodeName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static boolean isValidBooleanStr(String str) {
|
||||
return StringUtils.equalsIgnoreCase(str.trim(), "true") || StringUtils.equalsIgnoreCase(str.trim(), "false");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public WorkflowNodeType matchingType() {
|
||||
return WorkflowNodeType.DECISION;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,45 @@
|
||||
package tech.powerjob.server.core.validator;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/14
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class JobNodeValidator implements NodeValidator {
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
@Override
|
||||
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void simpleValidate(WorkflowNodeInfoDO node) {
|
||||
// 判断对应的任务是否存在
|
||||
JobInfoDO job = jobInfoRepository.findById(node.getJobId())
|
||||
.orElseThrow(() -> new PowerJobException("Illegal job node,specified job is not exist,node name : " + node.getNodeName()));
|
||||
|
||||
if (job.getStatus() == SwitchableStatus.DELETED.getV()) {
|
||||
throw new PowerJobException("Illegal job node,specified job has been deleted,node name : " + node.getNodeName());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkflowNodeType matchingType() {
|
||||
return WorkflowNodeType.JOB;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,69 @@
|
||||
package tech.powerjob.server.core.validator;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/14
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class NestedWorkflowNodeValidator implements NodeValidator {
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
private final WorkflowNodeInfoRepository workflowNodeInfoRepository;
|
||||
|
||||
@Override
|
||||
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
|
||||
// 这里检查是否循环嵌套(自身引用自身)
|
||||
if (Objects.equals(node.getJobId(), node.getWorkflowId())) {
|
||||
throw new PowerJobException("Illegal nested workflow node,Prohibit circular references!" + node.getNodeName());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void simpleValidate(WorkflowNodeInfoDO node) {
|
||||
// 判断对应工作流是否存在
|
||||
WorkflowInfoDO workflowInfo = workflowInfoRepository.findById(node.getJobId())
|
||||
.orElseThrow(() -> new PowerJobException("Illegal nested workflow node,specified workflow is not exist,node name : " + node.getNodeName()));
|
||||
if (workflowInfo.getStatus() == SwitchableStatus.DELETED.getV()) {
|
||||
throw new PowerJobException("Illegal nested workflow node,specified workflow has been deleted,node name : " + node.getNodeName());
|
||||
}
|
||||
// 不允许多层嵌套,即 嵌套工作流节点引用的工作流中不能包含嵌套节点
|
||||
PEWorkflowDAG peDag = JSON.parseObject(workflowInfo.getPeDAG(), PEWorkflowDAG.class);
|
||||
for (PEWorkflowDAG.Node peDagNode : peDag.getNodes()) {
|
||||
//
|
||||
final Optional<WorkflowNodeInfoDO> nestWfNodeOp = workflowNodeInfoRepository.findById(peDagNode.getNodeId());
|
||||
if (!nestWfNodeOp.isPresent()) {
|
||||
// 嵌套的工作流无效,缺失节点元数据
|
||||
throw new PowerJobException("Illegal nested workflow node,specified workflow is invalidate,node name : " + node.getNodeName());
|
||||
}
|
||||
if (Objects.equals(nestWfNodeOp.get().getType(), WorkflowNodeType.NESTED_WORKFLOW.getCode())) {
|
||||
throw new PowerJobException("Illegal nested workflow node,specified workflow must be a simple workflow,node name : " + node.getNodeName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkflowNodeType matchingType() {
|
||||
return WorkflowNodeType.NESTED_WORKFLOW;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
package tech.powerjob.server.core.validator;
|
||||
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/14
|
||||
*/
|
||||
public interface NodeValidator {
|
||||
/**
|
||||
* 校验工作流节点(校验拓扑关系等)
|
||||
* @param node 节点
|
||||
* @param dag dag
|
||||
*/
|
||||
void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag);
|
||||
|
||||
/**
|
||||
* 校验工作流节点
|
||||
* @param node 节点
|
||||
*/
|
||||
void simpleValidate(WorkflowNodeInfoDO node);
|
||||
|
||||
/**
|
||||
* 匹配的节点类型
|
||||
* @return node type
|
||||
*/
|
||||
WorkflowNodeType matchingType();
|
||||
|
||||
}
|
||||
@ -0,0 +1,488 @@
|
||||
package tech.powerjob.server.core.workflow;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.WorkflowContextConstant;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowInstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.utils.SpringUtils;
|
||||
import tech.powerjob.server.core.alarm.AlarmUtils;
|
||||
import tech.powerjob.server.core.helper.StatusMappingHelper;
|
||||
import tech.powerjob.server.core.lock.UseCacheLock;
|
||||
import tech.powerjob.server.core.service.UserService;
|
||||
import tech.powerjob.server.core.service.WorkflowNodeHandleService;
|
||||
import tech.powerjob.server.core.uid.IdGenerateService;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
|
||||
import tech.powerjob.server.core.alarm.AlarmCenter;
|
||||
import tech.powerjob.server.core.alarm.module.WorkflowInstanceAlarm;
|
||||
import tech.powerjob.server.persistence.remote.model.*;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils.isNotAllowSkipWhenFailed;
|
||||
|
||||
/**
|
||||
* 管理运行中的工作流实例
|
||||
*
|
||||
* @author tjq
|
||||
* @author Echo009
|
||||
* @since 2020/5/26
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@SuppressWarnings("squid:S1192")
|
||||
public class WorkflowInstanceManager {
|
||||
|
||||
private final AlarmCenter alarmCenter;
|
||||
|
||||
private final IdGenerateService idGenerateService;
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
private final UserService userService;
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
|
||||
|
||||
private final WorkflowNodeInfoRepository workflowNodeInfoRepository;
|
||||
|
||||
private final WorkflowNodeHandleService workflowNodeHandleService;
|
||||
|
||||
/**
|
||||
* 创建工作流任务实例
|
||||
* ********************************************
|
||||
* 2021-02-03 modify by Echo009
|
||||
* 通过 initParams 初始化工作流上下文(wfContext)
|
||||
* ********************************************
|
||||
*
|
||||
* @param wfInfo 工作流任务元数据(描述信息)
|
||||
* @param initParams 启动参数
|
||||
* @param expectTriggerTime 预计执行时间
|
||||
* @return wfInstanceId
|
||||
*/
|
||||
public Long create(WorkflowInfoDO wfInfo, String initParams, Long expectTriggerTime, Long parentWfInstanceId) {
|
||||
|
||||
Long wfId = wfInfo.getId();
|
||||
Long wfInstanceId = idGenerateService.allocate();
|
||||
// 构造实例信息
|
||||
WorkflowInstanceInfoDO newWfInstance = constructWfInstance(wfInfo, initParams, expectTriggerTime, wfId, wfInstanceId);
|
||||
if (parentWfInstanceId != null) {
|
||||
// 处理子工作流
|
||||
newWfInstance.setParentWfInstanceId(parentWfInstanceId);
|
||||
// 直接透传上下文
|
||||
newWfInstance.setWfContext(initParams);
|
||||
}
|
||||
|
||||
PEWorkflowDAG dag = null;
|
||||
try {
|
||||
dag = JSON.parseObject(wfInfo.getPeDAG(), PEWorkflowDAG.class);
|
||||
// 校验 DAG 信息
|
||||
if (!WorkflowDAGUtils.valid(dag)) {
|
||||
log.error("[Workflow-{}|{}] DAG of this workflow is illegal! maybe you has modified the DAG info directly in database!", wfId, wfInstanceId);
|
||||
throw new PowerJobException(SystemInstanceResult.INVALID_DAG);
|
||||
}
|
||||
// 初始化节点信息
|
||||
initNodeInfo(dag);
|
||||
// 最后检查工作流中的任务是否均处于可用状态(没有被删除)
|
||||
Set<Long> allJobIds = Sets.newHashSet();
|
||||
dag.getNodes().forEach(node -> {
|
||||
if (node.getNodeType() == WorkflowNodeType.JOB.getCode()) {
|
||||
allJobIds.add(node.getJobId());
|
||||
}
|
||||
// 将节点的初始状态置为等待派发
|
||||
node.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
|
||||
});
|
||||
int needNum = allJobIds.size();
|
||||
long dbNum = jobInfoRepository.countByAppIdAndStatusInAndIdIn(wfInfo.getAppId(), Sets.newHashSet(SwitchableStatus.ENABLE.getV(), SwitchableStatus.DISABLE.getV()), allJobIds);
|
||||
log.debug("[Workflow-{}|{}] contains {} jobs, find {} jobs in database.", wfId, wfInstanceId, needNum, dbNum);
|
||||
if (dbNum < allJobIds.size()) {
|
||||
log.warn("[Workflow-{}|{}] this workflow need {} jobs, but just find {} jobs in database, maybe you delete or disable some job!", wfId, wfInstanceId, needNum, dbNum);
|
||||
throw new PowerJobException(SystemInstanceResult.CAN_NOT_FIND_JOB);
|
||||
}
|
||||
newWfInstance.setDag(JSON.toJSONString(dag));
|
||||
workflowInstanceInfoRepository.saveAndFlush(newWfInstance);
|
||||
} catch (Exception e) {
|
||||
if (dag != null) {
|
||||
newWfInstance.setDag(JSON.toJSONString(dag));
|
||||
}
|
||||
handleWfInstanceFinalStatus(newWfInstance, e.getMessage(), WorkflowInstanceStatus.FAILED);
|
||||
}
|
||||
return wfInstanceId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化节点信息
|
||||
*/
|
||||
private void initNodeInfo(PEWorkflowDAG dag) {
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
WorkflowNodeInfoDO workflowNodeInfo = workflowNodeInfoRepository.findById(node.getNodeId()).orElseThrow(() -> new PowerJobException(SystemInstanceResult.CAN_NOT_FIND_NODE));
|
||||
if (workflowNodeInfo.getType() == null) {
|
||||
// 前向兼容
|
||||
workflowNodeInfo.setType(WorkflowNodeType.JOB.getCode());
|
||||
}
|
||||
// 填充基础信息
|
||||
node.setNodeType(workflowNodeInfo.getType())
|
||||
.setJobId(workflowNodeInfo.getJobId())
|
||||
.setNodeName(workflowNodeInfo.getNodeName())
|
||||
.setNodeParams(workflowNodeInfo.getNodeParams())
|
||||
.setEnable(workflowNodeInfo.getEnable())
|
||||
.setSkipWhenFailed(workflowNodeInfo.getSkipWhenFailed());
|
||||
|
||||
// 任务节点,初始化节点参数时需要特殊处理
|
||||
if (node.getNodeType() == WorkflowNodeType.JOB.getCode()) {
|
||||
// 任务节点缺失任务信息
|
||||
if (workflowNodeInfo.getJobId() == null) {
|
||||
throw new PowerJobException(SystemInstanceResult.ILLEGAL_NODE);
|
||||
}
|
||||
JobInfoDO jobInfo = jobInfoRepository.findById(workflowNodeInfo.getJobId()).orElseThrow(() -> new PowerJobException(SystemInstanceResult.CAN_NOT_FIND_JOB));
|
||||
if (!StringUtils.isBlank(workflowNodeInfo.getNodeParams())) {
|
||||
node.setNodeParams(workflowNodeInfo.getNodeParams());
|
||||
} else {
|
||||
node.setNodeParams(jobInfo.getJobParams());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 构造工作流实例,并初始化基础信息(不包括 DAG )
|
||||
*/
|
||||
private WorkflowInstanceInfoDO constructWfInstance(WorkflowInfoDO wfInfo, String initParams, Long expectTriggerTime, Long wfId, Long wfInstanceId) {
|
||||
|
||||
Date now = new Date();
|
||||
WorkflowInstanceInfoDO newWfInstance = new WorkflowInstanceInfoDO();
|
||||
newWfInstance.setAppId(wfInfo.getAppId());
|
||||
newWfInstance.setWfInstanceId(wfInstanceId);
|
||||
newWfInstance.setWorkflowId(wfId);
|
||||
newWfInstance.setStatus(WorkflowInstanceStatus.WAITING.getV());
|
||||
newWfInstance.setExpectedTriggerTime(expectTriggerTime);
|
||||
newWfInstance.setActualTriggerTime(System.currentTimeMillis());
|
||||
newWfInstance.setWfInitParams(initParams);
|
||||
|
||||
// 如果 initParams 是个合法的 Map<String,String> JSON 串则直接将其注入 wfContext
|
||||
boolean injectDirect = false;
|
||||
try {
|
||||
Map<String, String> parseRes = JSON.parseObject(initParams, new TypeReference<Map<String, String>>() {
|
||||
});
|
||||
if (parseRes != null && !parseRes.isEmpty()) {
|
||||
injectDirect = true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
}
|
||||
if (injectDirect) {
|
||||
newWfInstance.setWfContext(initParams);
|
||||
} else {
|
||||
// 初始化上下文
|
||||
Map<String, String> wfContextMap = Maps.newHashMap();
|
||||
wfContextMap.put(WorkflowContextConstant.CONTEXT_INIT_PARAMS_KEY, initParams);
|
||||
newWfInstance.setWfContext(JsonUtils.toJSONString(wfContextMap));
|
||||
}
|
||||
newWfInstance.setGmtCreate(now);
|
||||
newWfInstance.setGmtModified(now);
|
||||
return newWfInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* 开始任务
|
||||
* ********************************************
|
||||
* 2021-02-03 modify by Echo009
|
||||
* 1、工作流支持配置重复的任务节点
|
||||
* 2、移除参数 initParams,改为统一从工作流实例中获取
|
||||
* 传递工作流实例的 wfContext 作为 初始启动参数
|
||||
* 3、通过 {@link WorkflowDAGUtils#listReadyNodes} 兼容原地重试逻辑
|
||||
* ********************************************
|
||||
*
|
||||
* @param wfInfo 工作流任务信息
|
||||
* @param wfInstanceId 工作流任务实例ID
|
||||
*/
|
||||
@UseCacheLock(type = "processWfInstance", key = "#wfInfo.getMaxWfInstanceNum() > 0 ? #wfInfo.getId() : #wfInstanceId", concurrencyLevel = 1024)
|
||||
public void start(WorkflowInfoDO wfInfo, Long wfInstanceId) {
|
||||
|
||||
Optional<WorkflowInstanceInfoDO> wfInstanceInfoOpt = workflowInstanceInfoRepository.findByWfInstanceId(wfInstanceId);
|
||||
if (!wfInstanceInfoOpt.isPresent()) {
|
||||
log.error("[WorkflowInstanceManager] can't find metadata by workflowInstanceId({}).", wfInstanceId);
|
||||
return;
|
||||
}
|
||||
WorkflowInstanceInfoDO wfInstanceInfo = wfInstanceInfoOpt.get();
|
||||
|
||||
// 不是等待中,不再继续执行(可能上一流程已经失败)
|
||||
if (wfInstanceInfo.getStatus() != WorkflowInstanceStatus.WAITING.getV()) {
|
||||
log.info("[Workflow-{}|{}] workflowInstance({}) needn't running any more.", wfInfo.getId(), wfInstanceId, wfInstanceInfo);
|
||||
return;
|
||||
}
|
||||
// 最大实例数量 <= 0 表示不限制
|
||||
if (wfInfo.getMaxWfInstanceNum() > 0) {
|
||||
// 并发度控制
|
||||
int instanceConcurrency = workflowInstanceInfoRepository.countByWorkflowIdAndStatusIn(wfInfo.getId(), WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS);
|
||||
if (instanceConcurrency > wfInfo.getMaxWfInstanceNum()) {
|
||||
handleWfInstanceFinalStatus(wfInstanceInfo, String.format(SystemInstanceResult.TOO_MANY_INSTANCES, instanceConcurrency, wfInfo.getMaxWfInstanceNum()), WorkflowInstanceStatus.FAILED);
|
||||
return;
|
||||
}
|
||||
}
|
||||
try {
|
||||
// 从实例中读取工作流信息
|
||||
PEWorkflowDAG dag = JSON.parseObject(wfInstanceInfo.getDag(), PEWorkflowDAG.class);
|
||||
// 根节点有可能被 disable
|
||||
List<PEWorkflowDAG.Node> readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
|
||||
// 先处理其中的控制节点
|
||||
List<PEWorkflowDAG.Node> controlNodes = findControlNodes(readyNodes);
|
||||
while (!controlNodes.isEmpty()) {
|
||||
workflowNodeHandleService.handleControlNodes(controlNodes, dag, wfInstanceInfo);
|
||||
readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
|
||||
controlNodes = findControlNodes(readyNodes);
|
||||
}
|
||||
if (readyNodes.isEmpty()) {
|
||||
// 没有就绪的节点(所有节点都被禁用)
|
||||
wfInstanceInfo.setFinishedTime(System.currentTimeMillis());
|
||||
wfInstanceInfo.setDag(JSON.toJSONString(dag));
|
||||
log.warn("[Workflow-{}|{}] workflowInstance({}) needn't running ", wfInfo.getId(), wfInstanceId, wfInstanceInfo);
|
||||
handleWfInstanceFinalStatus(wfInstanceInfo, SystemInstanceResult.NO_ENABLED_NODES, WorkflowInstanceStatus.SUCCEED);
|
||||
return;
|
||||
}
|
||||
// 需要更新工作流实例状态
|
||||
wfInstanceInfo.setStatus(WorkflowInstanceStatus.RUNNING.getV());
|
||||
// 处理任务节点
|
||||
workflowNodeHandleService.handleTaskNodes(readyNodes, dag, wfInstanceInfo);
|
||||
log.info("[Workflow-{}|{}] start workflow successfully", wfInfo.getId(), wfInstanceId);
|
||||
} catch (Exception e) {
|
||||
log.error("[Workflow-{}|{}] start workflow: {} failed.", wfInfo.getId(), wfInstanceId, wfInfo, e);
|
||||
handleWfInstanceFinalStatus(wfInstanceInfo, e.getMessage(), WorkflowInstanceStatus.FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 下一步(当工作流的某个任务完成时调用该方法)
|
||||
* ********************************************
|
||||
* 2021-02-03 modify by Echo009
|
||||
* 1、工作流支持配置重复的任务节点
|
||||
* 2、不再获取上游任务的结果作为实例参数而是传递工作流
|
||||
* 实例的 wfContext 作为 实例参数
|
||||
* 3、通过 {@link WorkflowDAGUtils#listReadyNodes} 支持跳过禁用的节点
|
||||
* ********************************************
|
||||
*
|
||||
* @param wfInstanceId 工作流任务实例ID
|
||||
* @param instanceId 具体完成任务的某个任务实例ID
|
||||
* @param status 完成任务的任务实例状态(SUCCEED/FAILED/STOPPED)
|
||||
* @param result 完成任务的任务实例结果
|
||||
*/
|
||||
@SuppressWarnings({"squid:S3776", "squid:S2142", "squid:S1141"})
|
||||
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
|
||||
public void move(Long wfInstanceId, Long instanceId, InstanceStatus status, String result) {
|
||||
|
||||
Optional<WorkflowInstanceInfoDO> wfInstanceInfoOpt = workflowInstanceInfoRepository.findByWfInstanceId(wfInstanceId);
|
||||
if (!wfInstanceInfoOpt.isPresent()) {
|
||||
log.error("[WorkflowInstanceManager] can't find metadata by workflowInstanceId({}).", wfInstanceId);
|
||||
return;
|
||||
}
|
||||
WorkflowInstanceInfoDO wfInstance = wfInstanceInfoOpt.get();
|
||||
Long wfId = wfInstance.getWorkflowId();
|
||||
|
||||
// 特殊处理手动终止 且 工作流实例已经不在运行状态的情况
|
||||
if (status == InstanceStatus.STOPPED && !WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
|
||||
// 由用户手动停止工作流实例导致,不需要任何操作
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
PEWorkflowDAG dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
|
||||
// 更新完成节点状态
|
||||
boolean allFinished = true;
|
||||
PEWorkflowDAG.Node instanceNode = null;
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
if (instanceId.equals(node.getInstanceId())) {
|
||||
node.setStatus(status.getV());
|
||||
node.setResult(result);
|
||||
node.setFinishedTime(CommonUtils.formatTime(System.currentTimeMillis()));
|
||||
instanceNode = node;
|
||||
log.info("[Workflow-{}|{}] node(nodeId={},jobId={},instanceId={}) finished in workflowInstance, status={},result={}", wfId, wfInstanceId, node.getNodeId(), node.getJobId(), instanceId, status.name(), result);
|
||||
}
|
||||
if (InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(node.getStatus())) {
|
||||
allFinished = false;
|
||||
}
|
||||
}
|
||||
if (instanceNode == null) {
|
||||
// DAG 中的节点实例已经被覆盖(原地重试,生成了新的实例信息),直接忽略
|
||||
log.warn("[Workflow-{}|{}] current job instance(instanceId={}) is dissociative! it will be ignore! ", wfId, wfInstanceId, instanceId);
|
||||
return;
|
||||
}
|
||||
|
||||
wfInstance.setGmtModified(new Date());
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
// 工作流已经结束(某个节点失败导致工作流整体已经失败),仅更新最新的 DAG 图
|
||||
if (!WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
|
||||
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
log.info("[Workflow-{}|{}] workflow already finished(status={}), just update the dag info.", wfId, wfInstanceId, wfInstance.getStatus());
|
||||
return;
|
||||
}
|
||||
|
||||
// 任务失败 && 不允许失败跳过,DAG 流程被打断,整体失败
|
||||
if (status == InstanceStatus.FAILED && isNotAllowSkipWhenFailed(instanceNode)) {
|
||||
log.warn("[Workflow-{}|{}] workflow instance process failed because middle task(instanceId={}) failed", wfId, wfInstanceId, instanceId);
|
||||
handleWfInstanceFinalStatus(wfInstance, SystemInstanceResult.MIDDLE_JOB_FAILED, WorkflowInstanceStatus.FAILED);
|
||||
return;
|
||||
}
|
||||
|
||||
// 子任务被手动停止
|
||||
if (status == InstanceStatus.STOPPED) {
|
||||
handleWfInstanceFinalStatus(wfInstance, SystemInstanceResult.MIDDLE_JOB_STOPPED, WorkflowInstanceStatus.STOPPED);
|
||||
log.warn("[Workflow-{}|{}] workflow instance stopped because middle task(instanceId={}) stopped by user", wfId, wfInstanceId, instanceId);
|
||||
return;
|
||||
}
|
||||
// 注意:这里会直接跳过 disable 的节点
|
||||
List<PEWorkflowDAG.Node> readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
|
||||
// 如果没有就绪的节点,需要再次判断是否已经全部完成
|
||||
if (readyNodes.isEmpty() && isFinish(dag)) {
|
||||
allFinished = true;
|
||||
}
|
||||
// 工作流执行完毕(能执行到这里代表该工作流内所有子任务都执行成功了)
|
||||
if (allFinished) {
|
||||
// 这里得重新更新一下,因为 WorkflowDAGUtils#listReadyNodes 可能会更新节点状态
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
// 最终任务的结果作为整个 workflow 的结果
|
||||
handleWfInstanceFinalStatus(wfInstance, result, WorkflowInstanceStatus.SUCCEED);
|
||||
log.info("[Workflow-{}|{}] process successfully.", wfId, wfInstanceId);
|
||||
return;
|
||||
}
|
||||
// 先处理其中的控制节点
|
||||
List<PEWorkflowDAG.Node> controlNodes = findControlNodes(readyNodes);
|
||||
while (!controlNodes.isEmpty()) {
|
||||
workflowNodeHandleService.handleControlNodes(controlNodes, dag, wfInstance);
|
||||
readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
|
||||
controlNodes = findControlNodes(readyNodes);
|
||||
}
|
||||
// 再次判断是否已完成 (允许控制节点出现在末尾)
|
||||
if (readyNodes.isEmpty()) {
|
||||
if (isFinish(dag)) {
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
handleWfInstanceFinalStatus(wfInstance, result, WorkflowInstanceStatus.SUCCEED);
|
||||
log.info("[Workflow-{}|{}] process successfully.", wfId, wfInstanceId);
|
||||
return;
|
||||
}
|
||||
// 没有就绪的节点 但 还没执行完成,仅更新 DAG
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
return;
|
||||
}
|
||||
// 处理任务节点
|
||||
workflowNodeHandleService.handleTaskNodes(readyNodes, dag, wfInstance);
|
||||
} catch (Exception e) {
|
||||
handleWfInstanceFinalStatus(wfInstance, "MOVE NEXT STEP FAILED: " + e.getMessage(), WorkflowInstanceStatus.FAILED);
|
||||
log.error("[Workflow-{}|{}] update failed.", wfId, wfInstanceId, e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新工作流上下文
|
||||
* fix : 得和其他操作工作流实例的方法用同一把锁才行,不然有并发问题,会导致节点状态被覆盖
|
||||
*
|
||||
* @param wfInstanceId 工作流实例
|
||||
* @param appendedWfContextData 追加的上下文数据
|
||||
* @since 2021/02/05
|
||||
*/
|
||||
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
|
||||
public void updateWorkflowContext(Long wfInstanceId, Map<String, String> appendedWfContextData) {
|
||||
|
||||
try {
|
||||
Optional<WorkflowInstanceInfoDO> wfInstanceInfoOpt = workflowInstanceInfoRepository.findByWfInstanceId(wfInstanceId);
|
||||
if (!wfInstanceInfoOpt.isPresent()) {
|
||||
log.error("[WorkflowInstanceManager] can't find metadata by workflowInstanceId({}).", wfInstanceId);
|
||||
return;
|
||||
}
|
||||
WorkflowInstanceInfoDO wfInstance = wfInstanceInfoOpt.get();
|
||||
HashMap<String, String> wfContext = JSON.parseObject(wfInstance.getWfContext(), new TypeReference<HashMap<String, String>>() {
|
||||
});
|
||||
for (Map.Entry<String, String> entry : appendedWfContextData.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
String originValue = wfContext.put(key, entry.getValue());
|
||||
log.info("[Workflow-{}|{}] update workflow context {} : {} -> {}", wfInstance.getWorkflowId(), wfInstance.getWfInstanceId(), key, originValue, entry.getValue());
|
||||
}
|
||||
wfInstance.setWfContext(JSON.toJSONString(wfContext));
|
||||
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("[WorkflowInstanceManager] update workflow(workflowInstanceId={}) context failed.", wfInstanceId, e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void handleWfInstanceFinalStatus(WorkflowInstanceInfoDO wfInstance, String result, WorkflowInstanceStatus workflowInstanceStatus) {
|
||||
wfInstance.setStatus(workflowInstanceStatus.getV());
|
||||
wfInstance.setResult(result);
|
||||
wfInstance.setFinishedTime(System.currentTimeMillis());
|
||||
wfInstance.setGmtModified(new Date());
|
||||
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
|
||||
// 处理子工作流
|
||||
if (wfInstance.getParentWfInstanceId() != null) {
|
||||
// 先处理上下文
|
||||
if (workflowInstanceStatus == WorkflowInstanceStatus.SUCCEED){
|
||||
HashMap<String, String> wfContext = JSON.parseObject(wfInstance.getWfContext(), new TypeReference<HashMap<String, String>>() {
|
||||
});
|
||||
SpringUtils.getBean(this.getClass()).updateWorkflowContext(wfInstance.getParentWfInstanceId(), wfContext);
|
||||
}
|
||||
// 处理父工作流, fix https://github.com/PowerJob/PowerJob/issues/465
|
||||
SpringUtils.getBean(this.getClass()).move(wfInstance.getParentWfInstanceId(), wfInstance.getWfInstanceId(), StatusMappingHelper.toInstanceStatus(workflowInstanceStatus), result);
|
||||
}
|
||||
|
||||
// 报警
|
||||
if (workflowInstanceStatus == WorkflowInstanceStatus.FAILED) {
|
||||
try {
|
||||
workflowInfoRepository.findById(wfInstance.getWorkflowId()).ifPresent(wfInfo -> {
|
||||
WorkflowInstanceAlarm content = new WorkflowInstanceAlarm();
|
||||
|
||||
BeanUtils.copyProperties(wfInfo, content);
|
||||
BeanUtils.copyProperties(wfInstance, content);
|
||||
content.setResult(result);
|
||||
|
||||
List<UserInfoDO> userList = userService.fetchNotifyUserList(wfInfo.getNotifyUserIds());
|
||||
alarmCenter.alarmFailed(content, AlarmUtils.convertUserInfoList2AlarmTargetList(userList));
|
||||
});
|
||||
} catch (Exception ignore) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private List<PEWorkflowDAG.Node> findControlNodes(List<PEWorkflowDAG.Node> readyNodes) {
|
||||
return readyNodes.stream().filter(node -> {
|
||||
WorkflowNodeType nodeType = WorkflowNodeType.of(node.getNodeType());
|
||||
return nodeType.isControlNode();
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private boolean isFinish(PEWorkflowDAG dag) {
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
if (InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(node.getStatus())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,230 @@
|
||||
package tech.powerjob.server.core.workflow;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowInstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.response.WorkflowInstanceInfoDTO;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.utils.SpringUtils;
|
||||
import tech.powerjob.server.core.instance.InstanceService;
|
||||
import tech.powerjob.server.core.lock.UseCacheLock;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
|
||||
import tech.powerjob.server.remote.server.redirector.DesignateServer;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* 工作流实例服务
|
||||
*
|
||||
* @author tjq
|
||||
* @author Echo009
|
||||
* @since 2020/5/31
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class WorkflowInstanceService {
|
||||
|
||||
private final InstanceService instanceService;
|
||||
|
||||
private final WorkflowInstanceInfoRepository wfInstanceInfoRepository;
|
||||
|
||||
private final WorkflowInstanceManager workflowInstanceManager;
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
/**
|
||||
* 停止工作流实例(入口)
|
||||
*
|
||||
* @param wfInstanceId 工作流实例ID
|
||||
* @param appId 所属应用ID
|
||||
*/
|
||||
public void stopWorkflowInstanceEntrance(Long wfInstanceId, Long appId) {
|
||||
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
|
||||
if (!WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
|
||||
throw new PowerJobException("workflow instance already stopped");
|
||||
}
|
||||
// 如果这是一个被嵌套的工作流,则终止父工作流
|
||||
if (wfInstance.getParentWfInstanceId() != null) {
|
||||
SpringUtils.getBean(this.getClass()).stopWorkflowInstance(wfInstance.getParentWfInstanceId(), appId);
|
||||
return;
|
||||
}
|
||||
SpringUtils.getBean(this.getClass()).stopWorkflowInstance(wfInstanceId, appId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 停止工作流实例
|
||||
*
|
||||
* @param wfInstanceId 工作流实例ID
|
||||
* @param appId 所属应用ID
|
||||
*/
|
||||
@DesignateServer
|
||||
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
|
||||
public void stopWorkflowInstance(Long wfInstanceId, Long appId) {
|
||||
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
|
||||
if (!WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
|
||||
throw new PowerJobException("workflow instance already stopped");
|
||||
}
|
||||
// 停止所有已启动且未完成的服务
|
||||
PEWorkflowDAG dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
|
||||
// 遍历所有节点,终止正在运行的
|
||||
dag.getNodes().forEach(node -> {
|
||||
try {
|
||||
if (node.getInstanceId() != null && InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(node.getStatus())) {
|
||||
log.debug("[WfInstance-{}] instance({}) is running, try to stop it now.", wfInstanceId, node.getInstanceId());
|
||||
node.setStatus(InstanceStatus.STOPPED.getV());
|
||||
node.setResult(SystemInstanceResult.STOPPED_BY_USER);
|
||||
// 特殊处理嵌套工作流节点
|
||||
if (Objects.equals(node.getNodeType(), WorkflowNodeType.NESTED_WORKFLOW.getCode())) {
|
||||
stopWorkflowInstance(node.getInstanceId(), appId);
|
||||
//
|
||||
} else {
|
||||
// 注意,这里并不保证一定能终止正在运行的实例
|
||||
instanceService.stopInstance(appId, node.getInstanceId());
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[WfInstance-{}] stop instance({}) failed.", wfInstanceId, JSON.toJSONString(node), e);
|
||||
}
|
||||
});
|
||||
|
||||
// 修改数据库状态
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
wfInstance.setStatus(WorkflowInstanceStatus.STOPPED.getV());
|
||||
wfInstance.setResult(SystemInstanceResult.STOPPED_BY_USER);
|
||||
wfInstance.setGmtModified(new Date());
|
||||
wfInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
|
||||
log.info("[WfInstance-{}] stop workflow instance successfully~", wfInstanceId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add by Echo009 on 2021/02/07
|
||||
*
|
||||
* @param wfInstanceId 工作流实例ID
|
||||
* @param appId 应用ID
|
||||
*/
|
||||
@DesignateServer
|
||||
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
|
||||
public void retryWorkflowInstance(Long wfInstanceId, Long appId) {
|
||||
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
|
||||
// 仅允许重试 失败的工作流
|
||||
if (WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
|
||||
throw new PowerJobException("workflow instance is running");
|
||||
}
|
||||
if (wfInstance.getStatus() == WorkflowInstanceStatus.SUCCEED.getV()) {
|
||||
throw new PowerJobException("workflow instance is already successful");
|
||||
}
|
||||
// 因为 DAG 非法 或者 因任务信息缺失 而失败的工作流实例无法重试
|
||||
if (SystemInstanceResult.CAN_NOT_FIND_JOB.equals(wfInstance.getResult())) {
|
||||
throw new PowerJobException("you can't retry the workflow instance which is missing job info!");
|
||||
}
|
||||
// 校验 DAG 信息
|
||||
PEWorkflowDAG dag;
|
||||
try {
|
||||
dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
|
||||
if (!WorkflowDAGUtils.valid(dag)) {
|
||||
throw new PowerJobException(SystemInstanceResult.INVALID_DAG);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new PowerJobException("you can't retry the workflow instance whose DAG is illegal!");
|
||||
}
|
||||
// 检查当前工作流信息
|
||||
Optional<WorkflowInfoDO> workflowInfo = workflowInfoRepository.findById(wfInstance.getWorkflowId());
|
||||
if (!workflowInfo.isPresent() || workflowInfo.get().getStatus() == SwitchableStatus.DISABLE.getV()) {
|
||||
throw new PowerJobException("you can't retry the workflow instance whose metadata is unavailable!");
|
||||
}
|
||||
WorkflowDAGUtils.resetRetryableNode(dag);
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
// 更新工作流实例状态,不覆盖实际触发时间
|
||||
wfInstance.setStatus(WorkflowInstanceStatus.WAITING.getV());
|
||||
wfInstance.setGmtModified(new Date());
|
||||
wfInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
// 立即开始
|
||||
workflowInstanceManager.start(workflowInfo.get(), wfInstanceId);
|
||||
}
|
||||
|
||||
|
||||
public WorkflowInstanceInfoDTO fetchWorkflowInstanceInfo(Long wfInstanceId, Long appId) {
|
||||
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
|
||||
WorkflowInstanceInfoDTO dto = new WorkflowInstanceInfoDTO();
|
||||
BeanUtils.copyProperties(wfInstance, dto);
|
||||
return dto;
|
||||
}
|
||||
|
||||
public WorkflowInstanceInfoDO fetchWfInstance(Long wfInstanceId, Long appId) {
|
||||
WorkflowInstanceInfoDO wfInstance = wfInstanceInfoRepository.findByWfInstanceId(wfInstanceId).orElseThrow(() -> new IllegalArgumentException("can't find workflow instance by wfInstanceId: " + wfInstanceId));
|
||||
if (!Objects.equals(appId, wfInstance.getAppId())) {
|
||||
throw new PowerJobException("Permission Denied!");
|
||||
}
|
||||
return wfInstance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add by Echo009 on 2021/02/20
|
||||
* 将节点标记成功
|
||||
* 注意:这里仅能标记真正执行失败的且不允许跳过的节点
|
||||
* 即处于 [失败且不允许跳过] 的节点
|
||||
* 而且仅会操作工作流实例 DAG 中的节点信息(状态、result)
|
||||
* 并不会改变对应任务实例中的任何信息
|
||||
* <p>
|
||||
* 还是加把锁保平安 ~
|
||||
*
|
||||
* @param wfInstanceId 工作流实例 ID
|
||||
* @param nodeId 节点 ID
|
||||
*/
|
||||
@DesignateServer
|
||||
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
|
||||
public void markNodeAsSuccess(Long appId, Long wfInstanceId, Long nodeId) {
|
||||
|
||||
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
|
||||
// 校验工作流实例状态,运行中的不允许处理
|
||||
if (WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
|
||||
throw new PowerJobException("you can't mark the node in a running workflow!");
|
||||
}
|
||||
// 这里一定能反序列化成功
|
||||
PEWorkflowDAG dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
|
||||
PEWorkflowDAG.Node targetNode = null;
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
if (node.getNodeId().equals(nodeId)) {
|
||||
targetNode = node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (targetNode == null) {
|
||||
throw new PowerJobException("can't find the node in current DAG!");
|
||||
}
|
||||
boolean allowSkipWhenFailed = targetNode.getSkipWhenFailed() != null && targetNode.getSkipWhenFailed();
|
||||
// 仅允许处理 执行失败的且不允许失败跳过的节点
|
||||
if (targetNode.getInstanceId() != null
|
||||
&& targetNode.getStatus() == InstanceStatus.FAILED.getV()
|
||||
// 不允许失败跳过
|
||||
&& !allowSkipWhenFailed) {
|
||||
// 仅处理工作流实例中的节点信息
|
||||
targetNode.setStatus(InstanceStatus.SUCCEED.getV())
|
||||
.setResult(SystemInstanceResult.MARK_AS_SUCCESSFUL_NODE);
|
||||
|
||||
wfInstance.setDag(JSON.toJSONString(dag));
|
||||
wfInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
return;
|
||||
}
|
||||
// 其他情况均拒绝处理
|
||||
throw new PowerJobException("you can only mark the node which is failed and not allow to skip!");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -0,0 +1,365 @@
|
||||
package tech.powerjob.server.core.workflow;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.LifeCycle;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.request.http.SaveWorkflowNodeRequest;
|
||||
import tech.powerjob.common.request.http.SaveWorkflowRequest;
|
||||
import tech.powerjob.server.common.SJ;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
|
||||
import tech.powerjob.server.core.scheduler.TimingStrategyService;
|
||||
import tech.powerjob.server.core.service.NodeValidateService;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
|
||||
import tech.powerjob.server.remote.server.redirector.DesignateServer;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import javax.transaction.Transactional;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Workflow 服务
|
||||
*
|
||||
* @author tjq
|
||||
* @author zenggonggu
|
||||
* @author Echo009
|
||||
* @since 2020/5/26
|
||||
*/
|
||||
@Slf4j
|
||||
@Service
|
||||
public class WorkflowService {
|
||||
|
||||
@Resource
|
||||
private WorkflowInstanceManager workflowInstanceManager;
|
||||
@Resource
|
||||
private WorkflowInfoRepository workflowInfoRepository;
|
||||
@Resource
|
||||
private WorkflowNodeInfoRepository workflowNodeInfoRepository;
|
||||
@Resource
|
||||
private NodeValidateService nodeValidateService;
|
||||
@Resource
|
||||
private TimingStrategyService timingStrategyService;
|
||||
|
||||
/**
|
||||
* 保存/修改工作流信息
|
||||
* <p>
|
||||
* 注意这里不会保存 DAG 信息
|
||||
*
|
||||
* @param req 请求
|
||||
* @return 工作流ID
|
||||
*/
|
||||
@Transactional(rollbackOn = Exception.class)
|
||||
public Long saveWorkflow(SaveWorkflowRequest req) {
|
||||
|
||||
req.valid();
|
||||
|
||||
Long wfId = req.getId();
|
||||
WorkflowInfoDO wf;
|
||||
if (wfId == null) {
|
||||
wf = new WorkflowInfoDO();
|
||||
wf.setGmtCreate(new Date());
|
||||
} else {
|
||||
Long finalWfId = wfId;
|
||||
wf = workflowInfoRepository.findById(wfId).orElseThrow(() -> new IllegalArgumentException("can't find workflow by id:" + finalWfId));
|
||||
}
|
||||
|
||||
BeanUtils.copyProperties(req, wf);
|
||||
wf.setGmtModified(new Date());
|
||||
wf.setStatus(req.isEnable() ? SwitchableStatus.ENABLE.getV() : SwitchableStatus.DISABLE.getV());
|
||||
wf.setTimeExpressionType(req.getTimeExpressionType().getV());
|
||||
|
||||
if (req.getNotifyUserIds() != null) {
|
||||
wf.setNotifyUserIds(SJ.COMMA_JOINER.join(req.getNotifyUserIds()));
|
||||
}
|
||||
if (req.getLifeCycle() != null) {
|
||||
wf.setLifecycle(JSON.toJSONString(req.getLifeCycle()));
|
||||
}
|
||||
if (TimeExpressionType.FREQUENT_TYPES.contains(req.getTimeExpressionType().getV())) {
|
||||
// 固定频率类型的任务不计算
|
||||
wf.setTimeExpression(null);
|
||||
} else {
|
||||
LifeCycle lifeCycle = Optional.ofNullable(req.getLifeCycle()).orElse(LifeCycle.EMPTY_LIFE_CYCLE);
|
||||
Long nextValidTime = timingStrategyService.calculateNextTriggerTimeWithInspection(TimeExpressionType.of(wf.getTimeExpressionType()), wf.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
|
||||
wf.setNextTriggerTime(nextValidTime);
|
||||
}
|
||||
// 新增工作流,需要先 save 一下获取 ID
|
||||
if (wfId == null) {
|
||||
wf = workflowInfoRepository.saveAndFlush(wf);
|
||||
wfId = wf.getId();
|
||||
}
|
||||
wf.setPeDAG(validateAndConvert2String(wfId, req.getDag()));
|
||||
workflowInfoRepository.saveAndFlush(wf);
|
||||
return wfId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存 DAG 信息
|
||||
* 这里会物理删除游离的节点信息
|
||||
*/
|
||||
private String validateAndConvert2String(Long wfId, PEWorkflowDAG dag) {
|
||||
if (dag == null || !WorkflowDAGUtils.valid(dag)) {
|
||||
throw new PowerJobException("illegal DAG");
|
||||
}
|
||||
// 注意:这里只会保存图相关的基础信息,nodeId,jobId,jobName(nodeAlias)
|
||||
// 其中 jobId,jobName 均以节点中的信息为准
|
||||
List<Long> nodeIdList = Lists.newArrayList();
|
||||
List<PEWorkflowDAG.Node> newNodes = Lists.newArrayList();
|
||||
WorkflowDAG complexDag = WorkflowDAGUtils.convert(dag);
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
WorkflowNodeInfoDO nodeInfo = workflowNodeInfoRepository.findById(node.getNodeId()).orElseThrow(() -> new PowerJobException("can't find node info by id :" + node.getNodeId()));
|
||||
// 更新工作流 ID
|
||||
if (nodeInfo.getWorkflowId() == null) {
|
||||
nodeInfo.setWorkflowId(wfId);
|
||||
nodeInfo.setGmtModified(new Date());
|
||||
workflowNodeInfoRepository.saveAndFlush(nodeInfo);
|
||||
}
|
||||
if (!wfId.equals(nodeInfo.getWorkflowId())) {
|
||||
throw new PowerJobException("can't use another workflow's node");
|
||||
}
|
||||
nodeValidateService.complexValidate(nodeInfo, complexDag);
|
||||
// 只保存节点的 ID 信息,清空其他信息
|
||||
newNodes.add(new PEWorkflowDAG.Node(node.getNodeId()));
|
||||
nodeIdList.add(node.getNodeId());
|
||||
}
|
||||
dag.setNodes(newNodes);
|
||||
int deleteCount = workflowNodeInfoRepository.deleteByWorkflowIdAndIdNotIn(wfId, nodeIdList);
|
||||
log.warn("[WorkflowService-{}] delete {} dissociative nodes of workflow", wfId, deleteCount);
|
||||
return JSON.toJSONString(dag);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 深度复制工作流
|
||||
*
|
||||
* @param wfId 工作流 ID
|
||||
* @param appId APP ID
|
||||
* @return 生成的工作流 ID
|
||||
*/
|
||||
@Transactional(rollbackOn = Exception.class)
|
||||
public long copyWorkflow(Long wfId, Long appId) {
|
||||
|
||||
WorkflowInfoDO originWorkflow = permissionCheck(wfId, appId);
|
||||
if (originWorkflow.getStatus() == SwitchableStatus.DELETED.getV()) {
|
||||
throw new IllegalStateException("can't copy the workflow which has been deleted!");
|
||||
}
|
||||
// 拷贝基础信息
|
||||
WorkflowInfoDO copyWorkflow = new WorkflowInfoDO();
|
||||
BeanUtils.copyProperties(originWorkflow, copyWorkflow);
|
||||
copyWorkflow.setId(null);
|
||||
copyWorkflow.setGmtCreate(new Date());
|
||||
copyWorkflow.setGmtModified(new Date());
|
||||
copyWorkflow.setWfName(copyWorkflow.getWfName() + "_COPY");
|
||||
// 先 save 获取 id
|
||||
copyWorkflow = workflowInfoRepository.saveAndFlush(copyWorkflow);
|
||||
|
||||
if (StringUtils.isEmpty(copyWorkflow.getPeDAG())) {
|
||||
return copyWorkflow.getId();
|
||||
}
|
||||
|
||||
PEWorkflowDAG dag = JSON.parseObject(copyWorkflow.getPeDAG(), PEWorkflowDAG.class);
|
||||
|
||||
// 拷贝节点信息,并且更新 DAG 中的节点信息
|
||||
if (!CollectionUtils.isEmpty(dag.getNodes())) {
|
||||
// originNodeId => copyNodeId
|
||||
HashMap<Long, Long> nodeIdMap = new HashMap<>(dag.getNodes().size(), 1);
|
||||
// 校正 节点信息
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
|
||||
WorkflowNodeInfoDO originNode = workflowNodeInfoRepository.findById(node.getNodeId()).orElseThrow(() -> new IllegalArgumentException("can't find workflow Node by id: " + node.getNodeId()));
|
||||
|
||||
WorkflowNodeInfoDO copyNode = new WorkflowNodeInfoDO();
|
||||
BeanUtils.copyProperties(originNode, copyNode);
|
||||
copyNode.setId(null);
|
||||
copyNode.setWorkflowId(copyWorkflow.getId());
|
||||
copyNode.setGmtCreate(new Date());
|
||||
copyNode.setGmtModified(new Date());
|
||||
|
||||
copyNode = workflowNodeInfoRepository.saveAndFlush(copyNode);
|
||||
nodeIdMap.put(originNode.getId(), copyNode.getId());
|
||||
|
||||
node.setNodeId(copyNode.getId());
|
||||
}
|
||||
// 校正 边信息
|
||||
for (PEWorkflowDAG.Edge edge : dag.getEdges()) {
|
||||
edge.setFrom(nodeIdMap.get(edge.getFrom()));
|
||||
edge.setTo(nodeIdMap.get(edge.getTo()));
|
||||
}
|
||||
}
|
||||
copyWorkflow.setPeDAG(JSON.toJSONString(dag));
|
||||
workflowInfoRepository.saveAndFlush(copyWorkflow);
|
||||
return copyWorkflow.getId();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取工作流元信息,这里获取到的 DAG 包含节点的完整信息(是否启用、是否允许失败跳过)
|
||||
*
|
||||
* @param wfId 工作流ID
|
||||
* @param appId 应用ID
|
||||
* @return 对外输出对象
|
||||
*/
|
||||
public WorkflowInfoDO fetchWorkflow(Long wfId, Long appId) {
|
||||
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
|
||||
fillWorkflow(wfInfo);
|
||||
return wfInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除工作流(软删除)
|
||||
*
|
||||
* @param wfId 工作流ID
|
||||
* @param appId 所属应用ID
|
||||
*/
|
||||
public void deleteWorkflow(Long wfId, Long appId) {
|
||||
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
|
||||
wfInfo.setStatus(SwitchableStatus.DELETED.getV());
|
||||
wfInfo.setGmtModified(new Date());
|
||||
workflowInfoRepository.saveAndFlush(wfInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* 禁用工作流
|
||||
*
|
||||
* @param wfId 工作流ID
|
||||
* @param appId 所属应用ID
|
||||
*/
|
||||
public void disableWorkflow(Long wfId, Long appId) {
|
||||
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
|
||||
wfInfo.setStatus(SwitchableStatus.DISABLE.getV());
|
||||
wfInfo.setGmtModified(new Date());
|
||||
workflowInfoRepository.saveAndFlush(wfInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* 启用工作流
|
||||
*
|
||||
* @param wfId 工作流ID
|
||||
* @param appId 所属应用ID
|
||||
*/
|
||||
public void enableWorkflow(Long wfId, Long appId) {
|
||||
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
|
||||
wfInfo.setStatus(SwitchableStatus.ENABLE.getV());
|
||||
wfInfo.setGmtModified(new Date());
|
||||
workflowInfoRepository.saveAndFlush(wfInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* 立即运行工作流
|
||||
*
|
||||
* @param wfId 工作流ID
|
||||
* @param appId 所属应用ID
|
||||
* @param initParams 启动参数
|
||||
* @param delay 延迟时间
|
||||
* @return 该 workflow 实例的 instanceId(wfInstanceId)
|
||||
*/
|
||||
@DesignateServer
|
||||
public Long runWorkflow(Long wfId, Long appId, String initParams, Long delay) {
|
||||
|
||||
delay = delay == null ? 0 : delay;
|
||||
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
|
||||
|
||||
log.info("[WorkflowService-{}] try to run workflow, initParams={},delay={} ms.", wfInfo.getId(), initParams, delay);
|
||||
Long wfInstanceId = workflowInstanceManager.create(wfInfo, initParams, System.currentTimeMillis() + delay, null);
|
||||
if (delay <= 0) {
|
||||
workflowInstanceManager.start(wfInfo, wfInstanceId);
|
||||
} else {
|
||||
InstanceTimeWheelService.schedule(wfInstanceId, delay, () -> workflowInstanceManager.start(wfInfo, wfInstanceId));
|
||||
}
|
||||
return wfInstanceId;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 保存工作流节点(新增 或者 保存)
|
||||
*
|
||||
* @param workflowNodeRequestList 工作流节点
|
||||
* @return 更新 或者 创建后的工作流节点信息
|
||||
*/
|
||||
@Transactional(rollbackOn = Exception.class)
|
||||
public List<WorkflowNodeInfoDO> saveWorkflowNode(List<SaveWorkflowNodeRequest> workflowNodeRequestList) {
|
||||
if (CollectionUtils.isEmpty(workflowNodeRequestList)) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
final Long appId = workflowNodeRequestList.get(0).getAppId();
|
||||
List<WorkflowNodeInfoDO> res = new ArrayList<>(workflowNodeRequestList.size());
|
||||
for (SaveWorkflowNodeRequest req : workflowNodeRequestList) {
|
||||
req.valid();
|
||||
// 必须位于同一个 APP 下
|
||||
if (!appId.equals(req.getAppId())) {
|
||||
throw new PowerJobException("node list must are in the same app");
|
||||
}
|
||||
WorkflowNodeInfoDO workflowNodeInfo;
|
||||
if (req.getId() != null) {
|
||||
workflowNodeInfo = workflowNodeInfoRepository.findById(req.getId()).orElseThrow(() -> new IllegalArgumentException("can't find workflow Node by id: " + req.getId()));
|
||||
} else {
|
||||
workflowNodeInfo = new WorkflowNodeInfoDO();
|
||||
workflowNodeInfo.setGmtCreate(new Date());
|
||||
}
|
||||
BeanUtils.copyProperties(req, workflowNodeInfo);
|
||||
workflowNodeInfo.setType(req.getType());
|
||||
nodeValidateService.simpleValidate(workflowNodeInfo);
|
||||
workflowNodeInfo.setGmtModified(new Date());
|
||||
workflowNodeInfo = workflowNodeInfoRepository.saveAndFlush(workflowNodeInfo);
|
||||
res.add(workflowNodeInfo);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
private void fillWorkflow(WorkflowInfoDO wfInfo) {
|
||||
|
||||
PEWorkflowDAG dagInfo = null;
|
||||
try {
|
||||
dagInfo = JSON.parseObject(wfInfo.getPeDAG(), PEWorkflowDAG.class);
|
||||
} catch (Exception e) {
|
||||
log.warn("[WorkflowService-{}]illegal DAG : {}", wfInfo.getId(), wfInfo.getPeDAG());
|
||||
}
|
||||
if (dagInfo == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<Long, WorkflowNodeInfoDO> nodeIdNodInfoMap = Maps.newHashMap();
|
||||
|
||||
workflowNodeInfoRepository.findByWorkflowId(wfInfo.getId()).forEach(
|
||||
e -> nodeIdNodInfoMap.put(e.getId(), e)
|
||||
);
|
||||
// 填充节点信息
|
||||
if (!CollectionUtils.isEmpty(dagInfo.getNodes())) {
|
||||
for (PEWorkflowDAG.Node node : dagInfo.getNodes()) {
|
||||
WorkflowNodeInfoDO nodeInfo = nodeIdNodInfoMap.get(node.getNodeId());
|
||||
if (nodeInfo != null) {
|
||||
node.setNodeType(nodeInfo.getType())
|
||||
.setJobId(nodeInfo.getJobId())
|
||||
.setEnable(nodeInfo.getEnable())
|
||||
.setSkipWhenFailed(nodeInfo.getSkipWhenFailed())
|
||||
.setNodeName(nodeInfo.getNodeName())
|
||||
.setNodeParams(nodeInfo.getNodeParams());
|
||||
}
|
||||
}
|
||||
}
|
||||
wfInfo.setPeDAG(JSON.toJSONString(dagInfo));
|
||||
}
|
||||
|
||||
|
||||
private WorkflowInfoDO permissionCheck(Long wfId, Long appId) {
|
||||
WorkflowInfoDO wfInfo = workflowInfoRepository.findById(wfId).orElseThrow(() -> new IllegalArgumentException("can't find workflow by id: " + wfId));
|
||||
if (!wfInfo.getAppId().equals(appId)) {
|
||||
throw new PowerJobException("Permission Denied! can't operate other app's workflow!");
|
||||
}
|
||||
return wfInfo;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,81 @@
|
||||
package tech.powerjob.server.core.workflow.algorithm;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import lombok.*;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* DAG 工作流对象
|
||||
* 节点中均记录了上游以及下游的连接关系(无法使用 JSON 来序列化以及反序列化)
|
||||
*
|
||||
* @author tjq
|
||||
* @author Echo009
|
||||
* @since 2020/5/26
|
||||
*/
|
||||
@Data
|
||||
@ToString(exclude = {"nodeMap"})
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class WorkflowDAG {
|
||||
|
||||
/**
|
||||
* DAG允许存在多个顶点
|
||||
*/
|
||||
private List<Node> roots;
|
||||
|
||||
private Map<Long, Node> nodeMap;
|
||||
|
||||
public Node getNode(Long nodeId) {
|
||||
if (nodeMap == null) {
|
||||
return null;
|
||||
}
|
||||
return nodeMap.get(nodeId);
|
||||
}
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@EqualsAndHashCode(exclude = {"dependencies", "dependenceEdgeMap", "successorEdgeMap", "holder","successors"})
|
||||
@ToString(exclude = {"dependencies", "dependenceEdgeMap", "successorEdgeMap", "holder"})
|
||||
@NoArgsConstructor
|
||||
public static final class Node {
|
||||
|
||||
public Node(PEWorkflowDAG.Node node) {
|
||||
this.nodeId = node.getNodeId();
|
||||
this.holder = node;
|
||||
this.dependencies = Lists.newLinkedList();
|
||||
this.dependenceEdgeMap = Maps.newHashMap();
|
||||
this.successors = Lists.newLinkedList();
|
||||
this.successorEdgeMap = Maps.newHashMap();
|
||||
}
|
||||
|
||||
/**
|
||||
* node id
|
||||
*
|
||||
* @since 20210128
|
||||
*/
|
||||
private Long nodeId;
|
||||
|
||||
private PEWorkflowDAG.Node holder;
|
||||
/**
|
||||
* 依赖的上游节点
|
||||
*/
|
||||
private List<Node> dependencies;
|
||||
/**
|
||||
* 连接依赖节点的边
|
||||
*/
|
||||
private Map<Node, PEWorkflowDAG.Edge> dependenceEdgeMap;
|
||||
/**
|
||||
* 后继者,子节点
|
||||
*/
|
||||
private List<Node> successors;
|
||||
/**
|
||||
* 连接后继节点的边
|
||||
*/
|
||||
private Map<Node, PEWorkflowDAG.Edge> successorEdgeMap;
|
||||
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,340 @@
|
||||
package tech.powerjob.server.core.workflow.algorithm;
|
||||
|
||||
import com.google.common.collect.*;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* DAG 工具类
|
||||
*
|
||||
* @author tjq
|
||||
* @author Echo009
|
||||
* @since 2020/5/26
|
||||
*/
|
||||
public class WorkflowDAGUtils {
|
||||
|
||||
private WorkflowDAGUtils() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 重置可重试节点的状态信息
|
||||
* @param dag 合法的有向无环图
|
||||
*/
|
||||
public static void resetRetryableNode(PEWorkflowDAG dag){
|
||||
// 将需要重试的节点状态重置(失败且不允许跳过的 或者 手动终止的)
|
||||
for (PEWorkflowDAG.Node node : dag.getNodes()) {
|
||||
boolean realFailed = node.getStatus() == InstanceStatus.FAILED.getV() && isNotAllowSkipWhenFailed(node);
|
||||
if (realFailed || node.getStatus() == InstanceStatus.STOPPED.getV()) {
|
||||
node.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
|
||||
// 仅重置任务节点的实例 id 信息
|
||||
if (node.getNodeType() == null || node.getNodeType() == WorkflowNodeType.JOB.getCode()){
|
||||
node.setInstanceId(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有根节点
|
||||
*
|
||||
* @param peWorkflowDAG 点线表示法的DAG图
|
||||
* @return 根节点列表
|
||||
*/
|
||||
public static List<PEWorkflowDAG.Node> listRoots(PEWorkflowDAG peWorkflowDAG) {
|
||||
|
||||
Map<Long, PEWorkflowDAG.Node> nodeId2Node = Maps.newHashMap();
|
||||
peWorkflowDAG.getNodes().forEach(node -> nodeId2Node.put(node.getNodeId(), node));
|
||||
peWorkflowDAG.getEdges().forEach(edge -> nodeId2Node.remove(edge.getTo()));
|
||||
|
||||
return Lists.newLinkedList(nodeId2Node.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* 校验 DAG 是否有效
|
||||
*
|
||||
* @param peWorkflowDAG 点线表示法的 DAG 图
|
||||
* @return true/false
|
||||
*/
|
||||
public static boolean valid(PEWorkflowDAG peWorkflowDAG) {
|
||||
|
||||
// 校验节点 ID 是否重复
|
||||
Set<Long> nodeIds = Sets.newHashSet();
|
||||
// 不允许为空
|
||||
if (peWorkflowDAG.getNodes() == null || peWorkflowDAG.getNodes().isEmpty()){
|
||||
return false;
|
||||
}
|
||||
for (PEWorkflowDAG.Node n : peWorkflowDAG.getNodes()) {
|
||||
if (nodeIds.contains(n.getNodeId())) {
|
||||
return false;
|
||||
}
|
||||
nodeIds.add(n.getNodeId());
|
||||
}
|
||||
|
||||
try {
|
||||
// 记录遍历过的所有节点 ID
|
||||
HashSet<Long> traversalNodeIds = Sets.newHashSet();
|
||||
WorkflowDAG dag = convert(peWorkflowDAG);
|
||||
// 检查所有顶点的路径
|
||||
for (WorkflowDAG.Node root : dag.getRoots()) {
|
||||
if (invalidPath(root, Sets.newHashSet(), traversalNodeIds)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// 理论上应该遍历过图中的所有节点,如果不相等则说明有环 (孤立的环)
|
||||
return traversalNodeIds.size() == nodeIds.size();
|
||||
|
||||
} catch (Exception ignore) {
|
||||
// ignore
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add by Echo009 on 2021/02/08
|
||||
* 获取准备好的节点(非完成状态的节点且,前置依赖节点为空或者均处于完成状态)
|
||||
* 注意,这里会直接将当前 disable(enable = false)的节点的状态置为完成
|
||||
*
|
||||
* @param dag 点线表示法的DAG图
|
||||
* @return 当前可执行的节点
|
||||
*/
|
||||
public static List<PEWorkflowDAG.Node> listReadyNodes(PEWorkflowDAG dag) {
|
||||
// 保存 nodeId -> Node 的映射关系
|
||||
Map<Long, PEWorkflowDAG.Node> nodeId2Node = Maps.newHashMap();
|
||||
|
||||
List<PEWorkflowDAG.Node> dagNodes = dag.getNodes();
|
||||
for (PEWorkflowDAG.Node node : dagNodes) {
|
||||
nodeId2Node.put(node.getNodeId(), node);
|
||||
}
|
||||
// 构建依赖树(下游任务需要哪些上游任务完成才能执行)
|
||||
Multimap<Long, Long> relyMap = LinkedListMultimap.create();
|
||||
// 后继节点 Map
|
||||
Multimap<Long, Long> successorMap = LinkedListMultimap.create();
|
||||
dag.getEdges().forEach(edge -> {
|
||||
relyMap.put(edge.getTo(), edge.getFrom());
|
||||
successorMap.put(edge.getFrom(), edge.getTo());
|
||||
});
|
||||
List<PEWorkflowDAG.Node> readyNodes = Lists.newArrayList();
|
||||
List<PEWorkflowDAG.Node> skipNodes = Lists.newArrayList();
|
||||
|
||||
for (PEWorkflowDAG.Node currentNode : dagNodes) {
|
||||
if (!isReadyNode(currentNode.getNodeId(), nodeId2Node, relyMap)) {
|
||||
continue;
|
||||
}
|
||||
// 需要直接跳过的节点
|
||||
if (currentNode.getEnable() != null && !currentNode.getEnable()) {
|
||||
skipNodes.add(currentNode);
|
||||
} else {
|
||||
readyNodes.add(currentNode);
|
||||
}
|
||||
}
|
||||
// 当前直接跳过的节点不为空
|
||||
if (!skipNodes.isEmpty()) {
|
||||
for (PEWorkflowDAG.Node skipNode : skipNodes) {
|
||||
// move
|
||||
readyNodes.addAll(moveAndObtainReadySuccessor(skipNode, nodeId2Node, relyMap, successorMap));
|
||||
}
|
||||
}
|
||||
return readyNodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* 移动并获取就绪的后继节点
|
||||
*
|
||||
* @param skippedNode 当前需要跳过的节点
|
||||
* @param nodeId2Node nodeId -> Node
|
||||
* @param relyMap to-node id -> list of from-node id
|
||||
* @param successorMap from-node id -> list of to-node id
|
||||
* @return 就绪的后继节点
|
||||
*/
|
||||
private static List<PEWorkflowDAG.Node> moveAndObtainReadySuccessor(PEWorkflowDAG.Node skippedNode, Map<Long, PEWorkflowDAG.Node> nodeId2Node, Multimap<Long, Long> relyMap, Multimap<Long, Long> successorMap) {
|
||||
|
||||
// 更新当前跳过节点的状态
|
||||
skippedNode.setStatus(InstanceStatus.SUCCEED.getV());
|
||||
skippedNode.setResult(SystemInstanceResult.DISABLE_NODE);
|
||||
// 有可能出现需要连续移动的情况
|
||||
List<PEWorkflowDAG.Node> readyNodes = Lists.newArrayList();
|
||||
List<PEWorkflowDAG.Node> skipNodes = Lists.newArrayList();
|
||||
// 获取当前跳过节点的后继节点
|
||||
Collection<Long> successors = successorMap.get(skippedNode.getNodeId());
|
||||
for (Long successor : successors) {
|
||||
// 判断后继节点是否处于 Ready 状态(前驱节点均处于完成状态)
|
||||
if (isReadyNode(successor, nodeId2Node, relyMap)) {
|
||||
PEWorkflowDAG.Node node = nodeId2Node.get(successor);
|
||||
if (node.getEnable() != null && !node.getEnable()) {
|
||||
// 需要跳过
|
||||
skipNodes.add(node);
|
||||
continue;
|
||||
}
|
||||
readyNodes.add(node);
|
||||
}
|
||||
}
|
||||
// 深度优先,继续移动
|
||||
if (!skipNodes.isEmpty()) {
|
||||
for (PEWorkflowDAG.Node node : skipNodes) {
|
||||
readyNodes.addAll(moveAndObtainReadySuccessor(node, nodeId2Node, relyMap, successorMap));
|
||||
}
|
||||
}
|
||||
return readyNodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前节点是否准备就绪
|
||||
*
|
||||
* @param nodeId Node id
|
||||
* @param nodeId2Node Node id -> Node
|
||||
* @param relyMap to-node id -> list of from-node id
|
||||
* @return true if current node is ready
|
||||
*/
|
||||
private static boolean isReadyNode(long nodeId, Map<Long, PEWorkflowDAG.Node> nodeId2Node, Multimap<Long, Long> relyMap) {
|
||||
PEWorkflowDAG.Node currentNode = nodeId2Node.get(nodeId);
|
||||
int currentNodeStatus = currentNode.getStatus() == null ? InstanceStatus.WAITING_DISPATCH.getV() : currentNode.getStatus();
|
||||
// 跳过已完成节点(处理成功 或者 处理失败)和已派发节点( 状态为运行中 )
|
||||
if (InstanceStatus.FINISHED_STATUS.contains(currentNodeStatus)
|
||||
|| currentNodeStatus == InstanceStatus.RUNNING.getV()) {
|
||||
return false;
|
||||
}
|
||||
Collection<Long> relyNodeIds = relyMap.get(nodeId);
|
||||
for (Long relyNodeId : relyNodeIds) {
|
||||
PEWorkflowDAG.Node relyNode = nodeId2Node.get(relyNodeId);
|
||||
int relyNodeStatus = relyNode.getStatus() == null ? InstanceStatus.WAITING_DISPATCH.getV() : relyNode.getStatus();
|
||||
// 只要依赖的节点有一个未完成,那么就不是就绪状态
|
||||
// 注意,这里允许失败的原因是有允许失败跳过节点的存在,对于不允许跳过的失败节点,一定走不到这里(工作流会被打断)
|
||||
if (InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(relyNodeStatus)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean isNotAllowSkipWhenFailed(PEWorkflowDAG.Node node) {
|
||||
// 默认不允许跳过
|
||||
return node.getSkipWhenFailed() == null || !node.getSkipWhenFailed();
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理被 disable 掉的边
|
||||
* 1. 将仅能通过被 disable 掉的边可达的节点标记为 disable(disableByControlNode),将状态更新为已取消
|
||||
* 2. 将这些被 disable 掉的节点的出口边都标记为 disable
|
||||
* 3. 递归调用自身,继续处理被 disable 的边
|
||||
*/
|
||||
@SuppressWarnings("squid:S3776")
|
||||
public static void handleDisableEdges(List<PEWorkflowDAG.Edge> disableEdges, WorkflowDAG dag) {
|
||||
if (disableEdges.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
List<PEWorkflowDAG.Node> disableNodes = Lists.newArrayList();
|
||||
// 处理边上的节点,如果该节点仅能通过被 disable 掉的边可达,那么将该节点标记为 disable ,disableByControlNode ,并且将状态更新为 已取消
|
||||
for (PEWorkflowDAG.Edge disableEdge : disableEdges) {
|
||||
WorkflowDAG.Node toNode = dag.getNode(disableEdge.getTo());
|
||||
// 判断是否仅能通过被 disable 掉的边可达
|
||||
Collection<PEWorkflowDAG.Edge> dependenceEdges = toNode.getDependenceEdgeMap().values();
|
||||
boolean shouldBeDisable = true;
|
||||
for (PEWorkflowDAG.Edge dependenceEdge : dependenceEdges) {
|
||||
if (dependenceEdge.getEnable() == null || dependenceEdge.getEnable()) {
|
||||
shouldBeDisable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (shouldBeDisable) {
|
||||
// disable
|
||||
PEWorkflowDAG.Node node = toNode.getHolder();
|
||||
node.setEnable(false)
|
||||
.setDisableByControlNode(true)
|
||||
.setStatus(InstanceStatus.CANCELED.getV());
|
||||
disableNodes.add(node);
|
||||
}
|
||||
}
|
||||
if (!disableNodes.isEmpty()) {
|
||||
// 被 disable 掉的节点的出口边都会被标记为 disable
|
||||
List<PEWorkflowDAG.Edge> targetEdges = Lists.newArrayList();
|
||||
for (PEWorkflowDAG.Node disableNode : disableNodes) {
|
||||
WorkflowDAG.Node node = dag.getNode(disableNode.getNodeId());
|
||||
Collection<PEWorkflowDAG.Edge> edges = node.getSuccessorEdgeMap().values();
|
||||
for (PEWorkflowDAG.Edge edge : edges) {
|
||||
edge.setEnable(false);
|
||||
targetEdges.add(edge);
|
||||
}
|
||||
}
|
||||
// 广度优先 继续处理被 disable 掉的边
|
||||
handleDisableEdges(targetEdges, dag);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 将点线表示法的DAG图转化为引用表达法的DAG图
|
||||
*
|
||||
* @param peWorkflowDAG 点线表示法的DAG图
|
||||
* @return 引用表示法的DAG图
|
||||
*/
|
||||
public static WorkflowDAG convert(PEWorkflowDAG peWorkflowDAG) {
|
||||
Set<Long> rootIds = Sets.newHashSet();
|
||||
Map<Long, WorkflowDAG.Node> id2Node = Maps.newHashMap();
|
||||
|
||||
if (peWorkflowDAG.getNodes() == null || peWorkflowDAG.getNodes().isEmpty()) {
|
||||
throw new PowerJobException("empty graph");
|
||||
}
|
||||
|
||||
// 创建节点
|
||||
peWorkflowDAG.getNodes().forEach(node -> {
|
||||
Long nodeId = node.getNodeId();
|
||||
WorkflowDAG.Node n = new WorkflowDAG.Node(node);
|
||||
id2Node.put(nodeId, n);
|
||||
// 初始阶段,每一个点都设为顶点
|
||||
rootIds.add(nodeId);
|
||||
});
|
||||
|
||||
// 连接图像
|
||||
peWorkflowDAG.getEdges().forEach(edge -> {
|
||||
WorkflowDAG.Node from = id2Node.get(edge.getFrom());
|
||||
WorkflowDAG.Node to = id2Node.get(edge.getTo());
|
||||
|
||||
if (from == null || to == null) {
|
||||
throw new PowerJobException("Illegal Edge: " + JsonUtils.toJSONString(edge));
|
||||
}
|
||||
|
||||
from.getSuccessors().add(to);
|
||||
from.getSuccessorEdgeMap().put(to, edge);
|
||||
to.getDependencies().add(from);
|
||||
to.getDependenceEdgeMap().put(from, edge);
|
||||
// 被连接的点不可能成为 root,移除
|
||||
rootIds.remove(to.getNodeId());
|
||||
});
|
||||
|
||||
// 合法性校验(至少存在一个顶点)
|
||||
if (rootIds.isEmpty()) {
|
||||
throw new PowerJobException("Illegal DAG: " + JsonUtils.toJSONString(peWorkflowDAG));
|
||||
}
|
||||
|
||||
List<WorkflowDAG.Node> roots = Lists.newLinkedList();
|
||||
rootIds.forEach(id -> roots.add(id2Node.get(id)));
|
||||
return new WorkflowDAG(roots, id2Node);
|
||||
}
|
||||
|
||||
|
||||
private static boolean invalidPath(WorkflowDAG.Node root, Set<Long> ids, Set<Long> nodeIdContainer) {
|
||||
|
||||
// 递归出口(出现之前的节点则代表有环,失败;出现无后继者节点,则说明该路径成功)
|
||||
if (ids.contains(root.getNodeId())) {
|
||||
return true;
|
||||
}
|
||||
nodeIdContainer.add(root.getNodeId());
|
||||
if (root.getSuccessors().isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
ids.add(root.getNodeId());
|
||||
for (WorkflowDAG.Node node : root.getSuccessors()) {
|
||||
if (invalidPath(node, Sets.newHashSet(ids), nodeIdContainer)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,22 @@
|
||||
package tech.powerjob.server.core.workflow.hanlder;
|
||||
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/9
|
||||
*/
|
||||
public interface ControlNodeHandler extends WorkflowNodeHandlerMarker {
|
||||
|
||||
/**
|
||||
* 处理控制节点
|
||||
*
|
||||
* @param node 需要被处理的目标节点
|
||||
* @param dag 节点所属 DAG
|
||||
* @param wfInstanceInfo 节点所属工作流实例
|
||||
*/
|
||||
void handle(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo);
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,29 @@
|
||||
package tech.powerjob.server.core.workflow.hanlder;
|
||||
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/9
|
||||
*/
|
||||
public interface TaskNodeHandler extends WorkflowNodeHandlerMarker {
|
||||
|
||||
/**
|
||||
* 创建任务实例
|
||||
*
|
||||
* @param node 目标节点
|
||||
* @param dag DAG
|
||||
* @param wfInstanceInfo 工作流实例
|
||||
*/
|
||||
void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo);
|
||||
|
||||
/**
|
||||
* 执行任务实例
|
||||
*
|
||||
* @param node 目标节点
|
||||
*/
|
||||
void startTaskInstance(PEWorkflowDAG.Node node);
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
package tech.powerjob.server.core.workflow.hanlder;
|
||||
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/9
|
||||
*/
|
||||
public interface WorkflowNodeHandlerMarker {
|
||||
|
||||
|
||||
/**
|
||||
* 返回能够处理的节点类型
|
||||
* @return matching node type
|
||||
*/
|
||||
WorkflowNodeType matchingType();
|
||||
|
||||
|
||||
|
||||
}
|
||||
@ -0,0 +1,98 @@
|
||||
package tech.powerjob.server.core.workflow.hanlder.impl;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.serialize.JsonUtils;
|
||||
import tech.powerjob.server.core.evaluator.GroovyEvaluator;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
|
||||
import tech.powerjob.server.core.workflow.hanlder.ControlNodeHandler;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/9
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class DecisionNodeHandler implements ControlNodeHandler {
|
||||
|
||||
private final GroovyEvaluator groovyEvaluator = new GroovyEvaluator();
|
||||
|
||||
/**
|
||||
* 处理判断节点
|
||||
* 1. 执行脚本
|
||||
* 2. 根据返回值 disable 掉相应的边以及节点
|
||||
*/
|
||||
@Override
|
||||
public void handle(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
|
||||
String script = node.getNodeParams();
|
||||
if (StringUtils.isBlank(script)) {
|
||||
log.error("[Workflow-{}|{}]decision node's param is blank! nodeId:{}", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId());
|
||||
throw new PowerJobException("decision node's param is blank!");
|
||||
}
|
||||
// wfContext must be a map
|
||||
HashMap<String, String> wfContext = JSON.parseObject(wfInstanceInfo.getWfContext(), new TypeReference<HashMap<String, String>>() {
|
||||
});
|
||||
Object result;
|
||||
try {
|
||||
result = groovyEvaluator.evaluate(script, wfContext);
|
||||
} catch (Exception e) {
|
||||
log.error("[Workflow-{}|{}]failed to evaluate decision node,nodeId:{}", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), e);
|
||||
throw new PowerJobException("can't evaluate decision node!");
|
||||
}
|
||||
boolean finalRes;
|
||||
if (result instanceof Boolean) {
|
||||
finalRes = ((Boolean) result);
|
||||
} else if (result instanceof Number) {
|
||||
finalRes = ((Number) result).doubleValue() > 0;
|
||||
} else {
|
||||
log.error("[Workflow-{}|{}]decision node's return value is illegal,nodeId:{},result:{}", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), JsonUtils.toJSONString(result));
|
||||
throw new PowerJobException("decision node's return value is illegal!");
|
||||
}
|
||||
handleDag(finalRes, node, dag);
|
||||
}
|
||||
|
||||
|
||||
private void handleDag(boolean res, PEWorkflowDAG.Node node, PEWorkflowDAG peDag) {
|
||||
// 更新判断节点的状态为成功
|
||||
node.setResult(String.valueOf(res));
|
||||
node.setStatus(InstanceStatus.SUCCEED.getV());
|
||||
WorkflowDAG dag = WorkflowDAGUtils.convert(peDag);
|
||||
// 根据节点的计算结果,将相应的边 disable
|
||||
WorkflowDAG.Node targetNode = dag.getNode(node.getNodeId());
|
||||
Collection<PEWorkflowDAG.Edge> edges = targetNode.getSuccessorEdgeMap().values();
|
||||
if (edges.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
List<PEWorkflowDAG.Edge> disableEdges = new ArrayList<>(edges.size());
|
||||
for (PEWorkflowDAG.Edge edge : edges) {
|
||||
// 这里一定不会出现异常
|
||||
boolean property = Boolean.parseBoolean(edge.getProperty());
|
||||
if (res != property) {
|
||||
// disable
|
||||
edge.setEnable(false);
|
||||
disableEdges.add(edge);
|
||||
}
|
||||
}
|
||||
WorkflowDAGUtils.handleDisableEdges(disableEdges,dag);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public WorkflowNodeType matchingType() {
|
||||
return WorkflowNodeType.DECISION;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,52 @@
|
||||
package tech.powerjob.server.core.workflow.hanlder.impl;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.TimeExpressionType;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.server.common.utils.SpringUtils;
|
||||
import tech.powerjob.server.core.DispatchService;
|
||||
import tech.powerjob.server.core.instance.InstanceService;
|
||||
import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler;
|
||||
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/9
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class JobNodeHandler implements TaskNodeHandler {
|
||||
|
||||
private final JobInfoRepository jobInfoRepository;
|
||||
|
||||
@Override
|
||||
public void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
|
||||
// instanceParam 传递的是工作流实例的 wfContext
|
||||
Long instanceId = SpringUtils.getBean(InstanceService.class).create(node.getJobId(), wfInstanceInfo.getAppId(), node.getNodeParams(), wfInstanceInfo.getWfContext(), wfInstanceInfo.getWfInstanceId(), System.currentTimeMillis(), null, null).getInstanceId();
|
||||
node.setInstanceId(instanceId);
|
||||
node.setStatus(InstanceStatus.RUNNING.getV());
|
||||
log.info("[Workflow-{}|{}] create readyNode(JOB) instance(nodeId={},jobId={},instanceId={}) successfully~", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId(), instanceId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startTaskInstance(PEWorkflowDAG.Node node) {
|
||||
JobInfoDO jobInfo = jobInfoRepository.findById(node.getJobId()).orElseGet(JobInfoDO::new);
|
||||
// 洗去时间表达式类型
|
||||
jobInfo.setTimeExpressionType(TimeExpressionType.WORKFLOW.getV());
|
||||
SpringUtils.getBean(DispatchService.class).dispatch(jobInfo, node.getInstanceId(), Optional.empty(), Optional.empty());
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkflowNodeType matchingType() {
|
||||
return WorkflowNodeType.JOB;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,96 @@
|
||||
package tech.powerjob.server.core.workflow.hanlder.impl;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import tech.powerjob.common.SystemInstanceResult;
|
||||
import tech.powerjob.common.enums.InstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowInstanceStatus;
|
||||
import tech.powerjob.common.enums.WorkflowNodeType;
|
||||
import tech.powerjob.common.exception.PowerJobException;
|
||||
import tech.powerjob.common.model.PEWorkflowDAG;
|
||||
import tech.powerjob.common.utils.CommonUtils;
|
||||
import tech.powerjob.common.enums.SwitchableStatus;
|
||||
import tech.powerjob.server.common.utils.SpringUtils;
|
||||
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
|
||||
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
|
||||
import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
|
||||
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* @author Echo009
|
||||
* @since 2021/12/13
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class NestedWorkflowNodeHandler implements TaskNodeHandler {
|
||||
|
||||
private final WorkflowInfoRepository workflowInfoRepository;
|
||||
|
||||
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
|
||||
|
||||
@Override
|
||||
public void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
|
||||
// check
|
||||
Long wfId = node.getJobId();
|
||||
WorkflowInfoDO targetWf = workflowInfoRepository.findById(wfId).orElse(null);
|
||||
if (targetWf == null || targetWf.getStatus() == SwitchableStatus.DELETED.getV()) {
|
||||
if (targetWf == null) {
|
||||
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow({}) is not exist!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId());
|
||||
} else {
|
||||
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow({}) has been deleted!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId());
|
||||
}
|
||||
throw new PowerJobException("invalid nested workflow node," + node.getNodeId());
|
||||
}
|
||||
if (node.getInstanceId() != null) {
|
||||
// 处理重试的情形,不需要创建实例,仅需要更改对应实例的状态,以及相应的节点状态
|
||||
WorkflowInstanceInfoDO wfInstance = workflowInstanceInfoRepository.findByWfInstanceId(node.getInstanceId()).orElse(null);
|
||||
if (wfInstance == null) {
|
||||
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow instance({}) is not exist!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getInstanceId());
|
||||
throw new PowerJobException("invalid nested workflow instance id " + node.getInstanceId());
|
||||
}
|
||||
// 不用考虑状态,只有失败的工作流嵌套节点状态会被重置
|
||||
// 需要将子工作流中失败的节点状态重置为 等待 派发
|
||||
try {
|
||||
PEWorkflowDAG nodeDag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
|
||||
if (!WorkflowDAGUtils.valid(nodeDag)) {
|
||||
throw new PowerJobException(SystemInstanceResult.INVALID_DAG);
|
||||
}
|
||||
WorkflowDAGUtils.resetRetryableNode(nodeDag);
|
||||
wfInstance.setDag(JSON.toJSONString(nodeDag));
|
||||
wfInstance.setStatus(WorkflowInstanceStatus.WAITING.getV());
|
||||
wfInstance.setGmtModified(new Date());
|
||||
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
|
||||
} catch (Exception e) {
|
||||
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow instance({})'s DAG is illegal!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getInstanceId(),e);
|
||||
throw new PowerJobException("illegal nested workflow instance, id : "+ node.getInstanceId());
|
||||
}
|
||||
} else {
|
||||
// 透传当前的上下文创建新的工作流实例
|
||||
String wfContext = wfInstanceInfo.getWfContext();
|
||||
Long instanceId = SpringUtils.getBean(WorkflowInstanceManager.class).create(targetWf, wfContext, System.currentTimeMillis(), wfInstanceInfo.getWfInstanceId());
|
||||
node.setInstanceId(instanceId);
|
||||
}
|
||||
node.setStartTime(CommonUtils.formatTime(System.currentTimeMillis()));
|
||||
node.setStatus(InstanceStatus.RUNNING.getV());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startTaskInstance(PEWorkflowDAG.Node node) {
|
||||
Long wfId = node.getJobId();
|
||||
WorkflowInfoDO targetWf = workflowInfoRepository.findById(wfId).orElse(null);
|
||||
SpringUtils.getBean(WorkflowInstanceManager.class).start(targetWf, node.getInstanceId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public WorkflowNodeType matchingType() {
|
||||
return WorkflowNodeType.NESTED_WORKFLOW;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user