---初始化项目

This commit is contained in:
2025-09-19 16:14:08 +08:00
parent 902d3d7e3b
commit afee7c03ac
767 changed files with 75809 additions and 82 deletions

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>powerjob-server</artifactId>
<groupId>tech.powerjob</groupId>
<version>5.1.2</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>powerjob-server-core</artifactId>
<version>${project.parent.version}</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>tech.powerjob</groupId>
<artifactId>powerjob-server-extension</artifactId>
</dependency>
<dependency>
<groupId>tech.powerjob</groupId>
<artifactId>powerjob-server-remote</artifactId>
</dependency>
<dependency>
<groupId>tech.powerjob</groupId>
<artifactId>powerjob-server-common</artifactId>
</dependency>
<dependency>
<groupId>tech.powerjob</groupId>
<artifactId>powerjob-server-persistence</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,235 @@
package tech.powerjob.server.core;
import com.google.common.collect.Lists;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import tech.powerjob.common.RemoteConstant;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.enums.ExecuteType;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.ProcessorType;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.request.ServerScheduleJobReq;
import tech.powerjob.remote.framework.base.URL;
import tech.powerjob.server.common.Holder;
import tech.powerjob.server.common.module.WorkerInfo;
import tech.powerjob.server.core.instance.InstanceManager;
import tech.powerjob.server.core.instance.InstanceMetadataService;
import tech.powerjob.server.core.lock.UseCacheLock;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.remote.transporter.TransportService;
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
import tech.powerjob.server.remote.worker.selector.TaskTrackerSelectorService;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import static tech.powerjob.common.enums.InstanceStatus.*;
/**
* 派送服务将任务从Server派发到Worker
*
* @author tjq
* @author Echo009
* @since 2020/4/5
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class DispatchService {
private final TransportService transportService;
private final WorkerClusterQueryService workerClusterQueryService;
private final InstanceManager instanceManager;
private final InstanceMetadataService instanceMetadataService;
private final InstanceInfoRepository instanceInfoRepository;
private final TaskTrackerSelectorService taskTrackerSelectorService;
/**
* 异步重新派发
*
* @param instanceId 实例 ID
*/
@UseCacheLock(type = "processJobInstance", key = "#instanceId", concurrencyLevel = 1024)
public void redispatchAsync(Long instanceId, int originStatus) {
// 将状态重置为等待派发
instanceInfoRepository.updateStatusAndGmtModifiedByInstanceIdAndOriginStatus(instanceId, originStatus, InstanceStatus.WAITING_DISPATCH.getV(), new Date());
}
/**
* 异步批量重新派发,不加锁
*/
public void redispatchBatchAsyncLockFree(List<Long> instanceIdList, int originStatus) {
// 将状态重置为等待派发
instanceInfoRepository.updateStatusAndGmtModifiedByInstanceIdListAndOriginStatus(instanceIdList, originStatus, InstanceStatus.WAITING_DISPATCH.getV(), new Date());
}
/**
* 将任务从Server派发到WorkerTaskTracker
* 只会派发当前状态为等待派发的任务实例
* **************************************************
* 2021-02-03 modify by Echo009
* 1、移除参数 当前运行次数、工作流实例ID、实例参数
* 更改为从当前任务实例中获取获取以上信息
* 2、移除运行次数相关的runningTimes处理逻辑
* 迁移至 {@link InstanceManager#updateStatus} 中处理
* **************************************************
*
* @param jobInfo 任务的元信息
* @param instanceId 任务实例ID
* @param instanceInfoOptional 任务实例信息,可选
* @param overloadOptional 超载信息,可选
*/
@UseCacheLock(type = "processJobInstance", key = "#jobInfo.getMaxInstanceNum() > 0 || T(tech.powerjob.common.enums.TimeExpressionType).FREQUENT_TYPES.contains(#jobInfo.getTimeExpressionType()) ? #jobInfo.getId() : #instanceId", concurrencyLevel = 1024)
public void dispatch(JobInfoDO jobInfo, Long instanceId, Optional<InstanceInfoDO> instanceInfoOptional, Optional<Holder<Boolean>> overloadOptional) {
// 允许从外部传入实例信息,减少 io 次数
// 检查当前任务是否被取消
InstanceInfoDO instanceInfo = instanceInfoOptional.orElseGet(() -> instanceInfoRepository.findByInstanceId(instanceId));
Long jobId = instanceInfo.getJobId();
if (CANCELED.getV() == instanceInfo.getStatus()) {
log.info("[Dispatcher-{}|{}] cancel dispatch due to instance has been canceled", jobId, instanceId);
return;
}
// 已经被派发过则不再派发
// fix 并发场景下重复派发的问题
if (instanceInfo.getStatus() != WAITING_DISPATCH.getV()) {
log.info("[Dispatcher-{}|{}] cancel dispatch due to instance has been dispatched", jobId, instanceId);
return;
}
// 任务信息已经被删除
if (jobInfo.getId() == null) {
log.warn("[Dispatcher-{}|{}] cancel dispatch due to job(id={}) has been deleted!", jobId, instanceId, jobId);
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, "can't find job by id " + jobId);
return;
}
Date now = new Date();
String dbInstanceParams = instanceInfo.getInstanceParams() == null ? "" : instanceInfo.getInstanceParams();
log.info("[Dispatcher-{}|{}] start to dispatch job: {};instancePrams: {}.", jobId, instanceId, jobInfo, dbInstanceParams);
// 查询当前运行的实例数
long current = System.currentTimeMillis();
Integer maxInstanceNum = jobInfo.getMaxInstanceNum();
// 秒级任务只派发到一台机器,具体的 maxInstanceNum 由 TaskTracker 控制
if (TimeExpressionType.FREQUENT_TYPES.contains(jobInfo.getTimeExpressionType())) {
maxInstanceNum = 1;
}
// 0 代表不限制在线任务,还能省去一次 DB 查询
if (maxInstanceNum > 0) {
// 不统计 WAITING_DISPATCH 的状态:使用 OpenAPI 触发的延迟任务不应该统计进去(比如 delay 是 1 天)
// 由于不统计 WAITING_DISPATCH所以这个 runningInstanceCount 不包含本任务自身
long runningInstanceCount = instanceInfoRepository.countByJobIdAndStatusIn(jobId, Lists.newArrayList(WAITING_WORKER_RECEIVE.getV(), RUNNING.getV()));
// 超出最大同时运行限制,不执行调度
if (runningInstanceCount >= maxInstanceNum) {
String result = String.format(SystemInstanceResult.TOO_MANY_INSTANCES, runningInstanceCount, maxInstanceNum);
log.warn("[Dispatcher-{}|{}] cancel dispatch job due to too much instance is running ({} > {}).", jobId, instanceId, runningInstanceCount, maxInstanceNum);
instanceInfoRepository.update4TriggerFailed(instanceId, FAILED.getV(), current, current, RemoteConstant.EMPTY_ADDRESS, result, now);
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, result);
return;
}
}
// 获取当前最合适的 worker 列表
List<WorkerInfo> suitableWorkers = workerClusterQueryService.geAvailableWorkers(jobInfo);
if (CollectionUtils.isEmpty(suitableWorkers)) {
log.warn("[Dispatcher-{}|{}] cancel dispatch job due to no worker available", jobId, instanceId);
instanceInfoRepository.update4TriggerFailed(instanceId, FAILED.getV(), current, current, RemoteConstant.EMPTY_ADDRESS, SystemInstanceResult.NO_WORKER_AVAILABLE, now);
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), FAILED, SystemInstanceResult.NO_WORKER_AVAILABLE);
return;
}
// 判断是否超载,在所有可用 worker 超载的情况下直接跳过当前任务
suitableWorkers = filterOverloadWorker(suitableWorkers);
if (suitableWorkers.isEmpty()) {
// 直接取消派发,减少一次数据库 io
overloadOptional.ifPresent(booleanHolder -> booleanHolder.set(true));
log.warn("[Dispatcher-{}|{}] cancel to dispatch job due to all worker is overload", jobId, instanceId);
return;
}
List<String> workerIpList = suitableWorkers.stream().map(WorkerInfo::getAddress).collect(Collectors.toList());
// 构造任务调度请求
ServerScheduleJobReq req = constructServerScheduleJobReq(jobInfo, instanceInfo, workerIpList);
// 发送请求(不可靠,需要一个后台线程定期轮询状态)
WorkerInfo taskTracker = taskTrackerSelectorService.select(jobInfo, instanceInfo, suitableWorkers);
String taskTrackerAddress = taskTracker.getAddress();
URL workerUrl = ServerURLFactory.dispatchJob2Worker(taskTrackerAddress);
transportService.tell(taskTracker.getProtocol(), workerUrl, req);
log.info("[Dispatcher-{}|{}] send schedule request to TaskTracker[protocol:{},address:{}] successfully: {}.", jobId, instanceId, taskTracker.getProtocol(), taskTrackerAddress, req);
// 修改状态
instanceInfoRepository.update4TriggerSucceed(instanceId, WAITING_WORKER_RECEIVE.getV(), current, taskTrackerAddress, now, instanceInfo.getStatus());
// 装载缓存
instanceMetadataService.loadJobInfo(instanceId, jobInfo);
}
private List<WorkerInfo> filterOverloadWorker(List<WorkerInfo> suitableWorkers) {
List<WorkerInfo> res = new ArrayList<>(suitableWorkers.size());
for (WorkerInfo suitableWorker : suitableWorkers) {
if (suitableWorker.overload()){
continue;
}
res.add(suitableWorker);
}
return res;
}
/**
* 构造任务调度请求
*/
private ServerScheduleJobReq constructServerScheduleJobReq(JobInfoDO jobInfo, InstanceInfoDO instanceInfo, List<String> finalWorkersIpList) {
// 构造请求
ServerScheduleJobReq req = new ServerScheduleJobReq();
BeanUtils.copyProperties(jobInfo, req);
// 传入 JobId
req.setJobId(jobInfo.getId());
// 传入 InstanceParams
if (StringUtils.isEmpty(instanceInfo.getInstanceParams())) {
req.setInstanceParams(null);
} else {
req.setInstanceParams(instanceInfo.getInstanceParams());
}
// 覆盖静态参数
if (!StringUtils.isEmpty(instanceInfo.getJobParams())) {
req.setJobParams(instanceInfo.getJobParams());
}
req.setInstanceId(instanceInfo.getInstanceId());
req.setAllWorkerAddress(finalWorkersIpList);
req.setMaxWorkerCount(jobInfo.getMaxWorkerCount());
// 设置工作流ID
req.setWfInstanceId(instanceInfo.getWfInstanceId());
req.setExecuteType(ExecuteType.of(jobInfo.getExecuteType()).name());
req.setProcessorType(ProcessorType.of(jobInfo.getProcessorType()).name());
req.setTimeExpressionType(TimeExpressionType.of(jobInfo.getTimeExpressionType()).name());
if (jobInfo.getInstanceTimeLimit() != null) {
req.setInstanceTimeoutMS(jobInfo.getInstanceTimeLimit());
}
req.setThreadConcurrency(jobInfo.getConcurrency());
req.setMeta(instanceInfo.getMeta());
return req;
}
}

View File

@ -0,0 +1,53 @@
package tech.powerjob.server.core.alarm;
import com.google.common.collect.Lists;
import com.google.common.collect.Queues;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import tech.powerjob.common.utils.SysUtils;
import tech.powerjob.server.extension.alarm.Alarm;
import tech.powerjob.server.extension.alarm.AlarmTarget;
import tech.powerjob.server.extension.alarm.Alarmable;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* 报警服务
*
* @author tjq
* @since 2020/4/19
*/
@Slf4j
@Component
public class AlarmCenter {
private final ExecutorService POOL;
private final List<Alarmable> BEANS = Lists.newLinkedList();
public AlarmCenter(List<Alarmable> alarmables) {
int cores = SysUtils.availableProcessors();
ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("AlarmPool-%d").build();
POOL = new ThreadPoolExecutor(cores, cores, 5, TimeUnit.MINUTES, Queues.newLinkedBlockingQueue(), factory);
alarmables.forEach(bean -> {
BEANS.add(bean);
log.info("[AlarmCenter] bean(className={},obj={}) register to AlarmCenter successfully!", bean.getClass().getName(), bean);
});
}
public void alarmFailed(Alarm alarm, List<AlarmTarget> alarmTargets) {
POOL.execute(() -> BEANS.forEach(alarmable -> {
try {
alarmable.onFailed(alarm, alarmTargets);
}catch (Exception e) {
log.warn("[AlarmCenter] alarm failed.", e);
}
}));
}
}

View File

@ -0,0 +1,35 @@
package tech.powerjob.server.core.alarm;
import org.springframework.beans.BeanUtils;
import tech.powerjob.common.utils.CollectionUtils;
import tech.powerjob.server.extension.alarm.AlarmTarget;
import tech.powerjob.server.persistence.remote.model.UserInfoDO;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
/**
* AlarmUtils
*
* @author tjq
* @since 2023/7/31
*/
public class AlarmUtils {
public static List<AlarmTarget> convertUserInfoList2AlarmTargetList(List<UserInfoDO> userInfoDOS) {
if (CollectionUtils.isEmpty(userInfoDOS)) {
return Collections.emptyList();
}
return userInfoDOS.stream().map(AlarmUtils::convertUserInfo2AlarmTarget).collect(Collectors.toList());
}
public static AlarmTarget convertUserInfo2AlarmTarget(UserInfoDO userInfoDO) {
AlarmTarget alarmTarget = new AlarmTarget();
BeanUtils.copyProperties(userInfoDO, alarmTarget);
alarmTarget.setName(userInfoDO.getUsername());
return alarmTarget;
}
}

View File

@ -0,0 +1,114 @@
package tech.powerjob.server.core.alarm.impl;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.core.env.Environment;
import org.springframework.stereotype.Service;
import tech.powerjob.common.OmsConstant;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.utils.NetUtils;
import tech.powerjob.server.common.PowerJobServerConfigKey;
import tech.powerjob.server.common.SJ;
import tech.powerjob.server.extension.alarm.AlarmTarget;
import tech.powerjob.server.extension.alarm.Alarmable;
import tech.powerjob.server.extension.alarm.Alarm;
import javax.annotation.PostConstruct;
import java.util.List;
import java.util.Set;
/**
* 钉钉告警服务
*
* @author tjq
* @since 2020/8/6
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class DingTalkAlarmService implements Alarmable {
private final Environment environment;
private Long agentId;
private DingTalkUtils dingTalkUtils;
private Cache<String, String> mobile2UserIdCache;
private static final int CACHE_SIZE = 8192;
/**
* 防止缓存击穿
*/
private static final String EMPTY_TAG = "EMPTY";
@Override
public void onFailed(Alarm alarm, List<AlarmTarget> targetUserList) {
if (dingTalkUtils == null) {
return;
}
Set<String> userIds = Sets.newHashSet();
targetUserList.forEach(user -> {
String phone = user.getPhone();
if (StringUtils.isEmpty(phone)) {
return;
}
try {
String userId = mobile2UserIdCache.get(phone, () -> {
try {
return dingTalkUtils.fetchUserIdByMobile(phone);
} catch (PowerJobException ignore) {
return EMPTY_TAG;
} catch (Exception ignore) {
return null;
}
});
if (!EMPTY_TAG.equals(userId)) {
userIds .add(userId);
}
}catch (Exception ignore) {
}
});
userIds.remove(null);
if (!userIds.isEmpty()) {
String userListStr = SJ.COMMA_JOINER.skipNulls().join(userIds);
List<DingTalkUtils.MarkdownEntity> markdownEntities = Lists.newLinkedList();
markdownEntities.add(new DingTalkUtils.MarkdownEntity("server", NetUtils.getLocalHost()));
String content = alarm.fetchContent().replaceAll(OmsConstant.LINE_SEPARATOR, OmsConstant.COMMA);
markdownEntities.add(new DingTalkUtils.MarkdownEntity("content", content));
try {
dingTalkUtils.sendMarkdownAsync(alarm.fetchTitle(), markdownEntities, userListStr, agentId);
}catch (Exception e) {
log.error("[DingTalkAlarmService] send ding message failed, reason is {}", e.getMessage());
}
}
}
@PostConstruct
public void init() {
String agentId = environment.getProperty(PowerJobServerConfigKey.DING_AGENT_ID);
String appKey = environment.getProperty(PowerJobServerConfigKey.DING_APP_KEY);
String appSecret = environment.getProperty(PowerJobServerConfigKey.DING_APP_SECRET);
log.info("[DingTalkAlarmService] init with appKey:{},appSecret:{},agentId:{}", appKey, appSecret, agentId);
if (StringUtils.isAnyBlank(agentId, appKey, appSecret)) {
log.warn("[DingTalkAlarmService] cannot get agentId, appKey, appSecret at the same time, this service is unavailable");
return;
}
if (!StringUtils.isNumeric(agentId)) {
log.warn("[DingTalkAlarmService] DingTalkAlarmService is unavailable due to invalid agentId: {}", agentId);
return;
}
this.agentId = Long.valueOf(agentId);
dingTalkUtils = new DingTalkUtils(appKey, appSecret);
mobile2UserIdCache = CacheBuilder.newBuilder().maximumSize(CACHE_SIZE).softValues().build();
log.info("[DingTalkAlarmService] init DingTalkAlarmService successfully!");
}
}

View File

@ -0,0 +1,131 @@
package tech.powerjob.server.core.alarm.impl;
import com.dingtalk.api.DefaultDingTalkClient;
import com.dingtalk.api.DingTalkClient;
import com.dingtalk.api.request.OapiGettokenRequest;
import com.dingtalk.api.request.OapiMessageCorpconversationAsyncsendV2Request;
import com.dingtalk.api.request.OapiUserGetByMobileRequest;
import com.dingtalk.api.response.OapiGettokenResponse;
import com.dingtalk.api.response.OapiUserGetByMobileResponse;
import tech.powerjob.common.exception.PowerJobException;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.HttpMethod;
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
/**
* 钉钉工具类
* 工作通知消息https://ding-doc.dingtalk.com/doc#/serverapi2/pgoxpy
*
* @author tjq
* @since 2020/8/8
*/
@Slf4j
public class DingTalkUtils implements Closeable {
private String accessToken;
private final DingTalkClient sendMsgClient;
private final DingTalkClient accessTokenClient;
private final DingTalkClient userIdClient;
private final ScheduledExecutorService scheduledPool;
private static final long FLUSH_ACCESS_TOKEN_RATE = 6000;
private static final String GET_TOKEN_URL = "https://oapi.dingtalk.com/gettoken";
private static final String SEND_URL = "https://oapi.dingtalk.com/topapi/message/corpconversation/asyncsend_v2";
private static final String GET_USER_ID_URL = "https://oapi.dingtalk.com/user/get_by_mobile";
public DingTalkUtils(String appKey, String appSecret) {
this.sendMsgClient = new DefaultDingTalkClient(SEND_URL);
this.accessTokenClient = new DefaultDingTalkClient(GET_TOKEN_URL);
this.userIdClient = new DefaultDingTalkClient(GET_USER_ID_URL);
refreshAccessToken(appKey, appSecret);
if (StringUtils.isEmpty(accessToken)) {
throw new PowerJobException("fetch AccessToken failed, please check your appKey & appSecret");
}
scheduledPool = Executors.newSingleThreadScheduledExecutor();
scheduledPool.scheduleAtFixedRate(() -> refreshAccessToken(appKey, appSecret), FLUSH_ACCESS_TOKEN_RATE, FLUSH_ACCESS_TOKEN_RATE, TimeUnit.SECONDS);
}
/**
* 获取 AccessTokenAccessToken 是调用其他接口的基础,有效期 7200 秒,需要不断刷新
* @param appKey 应用 appKey
* @param appSecret 应用 appSecret
*/
private void refreshAccessToken(String appKey, String appSecret) {
try {
OapiGettokenRequest req = new OapiGettokenRequest();
req.setAppkey(appKey);
req.setAppsecret(appSecret);
req.setHttpMethod(HttpMethod.GET.name());
OapiGettokenResponse rsp = accessTokenClient.execute(req);
if (rsp.isSuccess()) {
accessToken = rsp.getAccessToken();
}else {
log.warn("[DingTalkUtils] flush accessToken failed with req({}),code={},msg={}.", req.getTextParams(), rsp.getErrcode(), rsp.getErrmsg());
}
} catch (Exception e) {
log.warn("[DingTalkUtils] flush accessToken failed.", e);
}
}
public String fetchUserIdByMobile(String mobile) throws Exception {
OapiUserGetByMobileRequest request = new OapiUserGetByMobileRequest();
request.setMobile(mobile);
OapiUserGetByMobileResponse execute = userIdClient.execute(request, accessToken);
if (execute.isSuccess()) {
return execute.getUserid();
}
log.info("[DingTalkUtils] fetch userId by mobile({}) failed,reason is {}.", mobile, execute.getErrmsg());
throw new PowerJobException("fetch userId by phone number failed, reason is " + execute.getErrmsg());
}
public void sendMarkdownAsync(String title, List<MarkdownEntity> entities, String userList, Long agentId) throws Exception {
OapiMessageCorpconversationAsyncsendV2Request request = new OapiMessageCorpconversationAsyncsendV2Request();
request.setUseridList(userList);
request.setAgentId(agentId);
request.setToAllUser(false);
OapiMessageCorpconversationAsyncsendV2Request.Msg msg = new OapiMessageCorpconversationAsyncsendV2Request.Msg();
StringBuilder mdBuilder=new StringBuilder();
mdBuilder.append("## ").append(title).append("\n");
for (MarkdownEntity entity:entities){
mdBuilder.append("#### ").append(entity.title).append("\n");
mdBuilder.append("> ").append(entity.detail).append("\n\n");
}
msg.setMsgtype("markdown");
msg.setMarkdown(new OapiMessageCorpconversationAsyncsendV2Request.Markdown());
msg.getMarkdown().setTitle(title);
msg.getMarkdown().setText(mdBuilder.toString());
request.setMsg(msg);
sendMsgClient.execute(request, accessToken);
}
@Override
public void close() throws IOException {
scheduledPool.shutdownNow();
}
@AllArgsConstructor
public static final class MarkdownEntity {
private final String title;
private final String detail;
}
}

View File

@ -0,0 +1,62 @@
package tech.powerjob.server.core.alarm.impl;
import org.springframework.beans.factory.annotation.Value;
import org.apache.commons.lang3.StringUtils;
import tech.powerjob.server.extension.alarm.AlarmTarget;
import tech.powerjob.server.extension.alarm.Alarm;
import tech.powerjob.server.extension.alarm.Alarmable;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.env.Environment;
import org.springframework.mail.SimpleMailMessage;
import org.springframework.mail.javamail.JavaMailSender;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import javax.annotation.Resource;
import java.util.List;
import java.util.Objects;
/**
* 邮件通知服务
*
* @author tjq
* @since 2020/4/30
*/
@Slf4j
@Service
public class MailAlarmService implements Alarmable {
@Resource
private Environment environment;
private JavaMailSender javaMailSender;
@Value("${spring.mail.username:''}")
private String from;
@Override
public void onFailed(Alarm alarm, List<AlarmTarget> targetUserList) {
if (CollectionUtils.isEmpty(targetUserList) || javaMailSender == null || StringUtils.isEmpty(from)) {
return;
}
SimpleMailMessage sm = new SimpleMailMessage();
try {
sm.setFrom(from);
sm.setTo(targetUserList.stream().map(AlarmTarget::getEmail).filter(Objects::nonNull).filter(email -> !email.isEmpty()).toArray(String[]::new));
sm.setSubject(alarm.fetchTitle());
sm.setText(alarm.fetchContent());
javaMailSender.send(sm);
}catch (Exception e) {
log.warn("[MailAlarmService] send mail failed, reason is {}", e.getMessage());
}
}
@Autowired(required = false)
public void setJavaMailSender(JavaMailSender javaMailSender) {
this.javaMailSender = javaMailSender;
}
}

View File

@ -0,0 +1,58 @@
package tech.powerjob.server.core.alarm.impl;
import com.alibaba.fastjson.JSONObject;
import tech.powerjob.common.OmsConstant;
import tech.powerjob.common.utils.HttpUtils;
import tech.powerjob.server.extension.alarm.AlarmTarget;
import tech.powerjob.server.extension.alarm.Alarm;
import tech.powerjob.server.extension.alarm.Alarmable;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.RequestBody;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.List;
/**
* http 回调报警
*
* @author tjq
* @since 11/14/20
*/
@Slf4j
@Service
public class WebHookAlarmService implements Alarmable {
private static final String HTTP_PROTOCOL_PREFIX = "http://";
private static final String HTTPS_PROTOCOL_PREFIX = "https://";
@Override
public void onFailed(Alarm alarm, List<AlarmTarget> targetUserList) {
if (CollectionUtils.isEmpty(targetUserList)) {
return;
}
targetUserList.forEach(user -> {
String webHook = user.getWebHook();
if (StringUtils.isEmpty(webHook)) {
return;
}
// 自动添加协议头
if (!webHook.startsWith(HTTP_PROTOCOL_PREFIX) && !webHook.startsWith(HTTPS_PROTOCOL_PREFIX)) {
webHook = HTTP_PROTOCOL_PREFIX + webHook;
}
MediaType jsonType = MediaType.parse(OmsConstant.JSON_MEDIA_TYPE);
RequestBody requestBody = RequestBody.create(jsonType, JSONObject.toJSONString(alarm));
try {
String response = HttpUtils.post(webHook, requestBody);
log.info("[WebHookAlarmService] invoke webhook[url={}] successfully, response is {}", webHook, response);
}catch (Exception e) {
log.warn("[WebHookAlarmService] invoke webhook[url={}] failed!", webHook, e);
}
});
}
}

View File

@ -0,0 +1,86 @@
package tech.powerjob.server.core.alarm.module;
import lombok.Data;
import lombok.experimental.Accessors;
import tech.powerjob.server.extension.alarm.Alarm;
/**
* 任务执行失败告警对象
*
* @author tjq
* @since 2020/4/30
*/
@Data
@Accessors(chain = true)
public class JobInstanceAlarm implements Alarm {
/**
* 应用ID
*/
private long appId;
/**
* 任务ID
*/
private long jobId;
/**
* 任务实例ID
*/
private long instanceId;
/**
* 任务名称
*/
private String jobName;
/**
* 任务自带的参数
*/
private String jobParams;
/**
* 时间表达式类型CRON/API/FIX_RATE/FIX_DELAY
*/
private Integer timeExpressionType;
/**
* 时间表达式CRON/NULL/LONG/LONG
*/
private String timeExpression;
/**
* 执行类型,单机/广播/MR
*/
private Integer executeType;
/**
* 执行器类型Java/Shell
*/
private Integer processorType;
/**
* 执行器信息
*/
private String processorInfo;
/**
* 任务实例参数
*/
private String instanceParams;
/**
* 执行结果
*/
private String result;
/**
* 预计触发时间
*/
private Long expectedTriggerTime;
/**
* 实际触发时间
*/
private Long actualTriggerTime;
/**
* 结束时间
*/
private Long finishedTime;
/**
*
*/
private String taskTrackerAddress;
@Override
public String fetchTitle() {
return "PowerJob AlarmService: Job Running Failed";
}
}

View File

@ -0,0 +1,57 @@
package tech.powerjob.server.core.alarm.module;
import tech.powerjob.common.model.PEWorkflowDAG;
import lombok.Data;
import tech.powerjob.server.extension.alarm.Alarm;
/**
* 工作流执行失败告警对象
*
* @author tjq
* @since 2020/6/12
*/
@Data
public class WorkflowInstanceAlarm implements Alarm {
private String workflowName;
/**
* 任务所属应用的ID冗余提高查询效率
*/
private Long appId;
private Long workflowId;
/**
* workflowInstanceId任务实例表都使用单独的ID作为主键以支持潜在的分表需求
*/
private Long wfInstanceId;
/**
* workflow 状态WorkflowInstanceStatus
*/
private Integer status;
private PEWorkflowDAG peWorkflowDAG;
private String result;
/**
* 实际触发时间
*/
private Long actualTriggerTime;
/**
* 结束时间
*/
private Long finishedTime;
/**
* 时间表达式类型CRON/API/FIX_RATE/FIX_DELAY
*/
private Integer timeExpressionType;
/**
* 时间表达式CRON/NULL/LONG/LONG
*/
private String timeExpression;
@Override
public String fetchTitle() {
return "PowerJob AlarmService: Workflow Running Failed";
}
}

View File

@ -0,0 +1,514 @@
package tech.powerjob.server.core.container;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.maven.shared.invoker.DefaultInvocationRequest;
import org.apache.maven.shared.invoker.DefaultInvoker;
import org.apache.maven.shared.invoker.InvocationRequest;
import org.apache.maven.shared.invoker.Invoker;
import org.eclipse.jgit.api.CloneCommand;
import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.transport.CredentialsProvider;
import org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider;
import org.springframework.core.env.Environment;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.springframework.web.multipart.MultipartFile;
import tech.powerjob.common.OmsConstant;
import tech.powerjob.common.exception.ImpossibleException;
import tech.powerjob.common.model.DeployedContainerInfo;
import tech.powerjob.common.model.GitRepoInfo;
import tech.powerjob.common.request.ServerDeployContainerRequest;
import tech.powerjob.common.request.ServerDestroyContainerRequest;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.common.utils.NetUtils;
import tech.powerjob.common.utils.SegmentLock;
import tech.powerjob.remote.framework.base.URL;
import tech.powerjob.server.common.constants.ContainerSourceType;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.module.WorkerInfo;
import tech.powerjob.server.common.utils.OmsFileUtils;
import tech.powerjob.server.extension.LockService;
import tech.powerjob.server.extension.dfs.*;
import tech.powerjob.server.persistence.remote.model.ContainerInfoDO;
import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository;
import tech.powerjob.server.persistence.storage.Constants;
import tech.powerjob.server.remote.server.redirector.DesignateServer;
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
import tech.powerjob.server.remote.transporter.TransportService;
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
import javax.annotation.Resource;
import javax.websocket.RemoteEndpoint;
import javax.websocket.Session;
import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
/**
* 容器服务
*
* @author tjq
* @since 2020/5/16
*/
@Slf4j
@Service
public class ContainerService {
@Resource
private Environment environment;
@Resource
private LockService lockService;
@Resource
private ContainerInfoRepository containerInfoRepository;
@Resource
private DFsService dFsService;
@Resource
private TransportService transportService;
@Resource
private WorkerClusterQueryService workerClusterQueryService;
// 下载用的分段锁
private final SegmentLock segmentLock = new SegmentLock(4);
// 并发部署的机器数量
private static final int DEPLOY_BATCH_NUM = 50;
// 部署间隔
private static final long DEPLOY_MIN_INTERVAL = 10 * 60 * 1000L;
// 最长部署时间
private static final long DEPLOY_MAX_COST_TIME = 10 * 60 * 1000L;
/**
* 保存容器
* @param container 容器保存请求
*/
public void save(ContainerInfoDO container) {
Long originId = container.getId();
if (originId != null) {
// just validate
containerInfoRepository.findById(originId).orElseThrow(() -> new IllegalArgumentException("can't find container by id: " + originId));
} else {
container.setGmtCreate(new Date());
}
container.setGmtModified(new Date());
// 文件上传形式的 sourceInfo 为该文件的 md5 值Git形式的 md5 在部署阶段生成
if (container.getSourceType() == ContainerSourceType.FatJar.getV()) {
container.setVersion(container.getSourceInfo());
}else {
container.setVersion("init");
}
containerInfoRepository.saveAndFlush(container);
}
/**
* 删除容器(通知 Worker 销毁容器 & 删除数据库)
* @param appId 应用ID用于保护性判断
* @param containerId 容器ID
*/
public void delete(Long appId, Long containerId) {
ContainerInfoDO container = containerInfoRepository.findById(containerId).orElseThrow(() -> new IllegalArgumentException("can't find container by id: " + containerId));
if (!Objects.equals(appId, container.getAppId())) {
throw new RuntimeException("Permission Denied!");
}
ServerDestroyContainerRequest destroyRequest = new ServerDestroyContainerRequest(container.getId());
workerClusterQueryService.getAllAliveWorkers(container.getAppId()).forEach(workerInfo -> {
final URL url = ServerURLFactory.destroyContainer2Worker(workerInfo.getAddress());
transportService.tell(workerInfo.getProtocol(), url, destroyRequest);
});
log.info("[ContainerService] delete container: {}.", container);
// 软删除
container.setStatus(SwitchableStatus.DELETED.getV());
container.setGmtModified(new Date());
containerInfoRepository.saveAndFlush(container);
}
/**
* 上传用于部署的容器的 Jar 文件
* @param file 接受的文件
* @return 该文件的 md5 值
* @throws IOException 异常
*/
public String uploadContainerJarFile(MultipartFile file) throws IOException {
log.info("[ContainerService] start to uploadContainerJarFile, fileName={},size={}", file.getName(), file.getSize());
String workerDirStr = OmsFileUtils.genTemporaryWorkPath();
String tmpFileStr = workerDirStr + "tmp.jar";
File workerDir = new File(workerDirStr);
File tmpFile = new File(tmpFileStr);
try {
// 下载到本地
FileUtils.forceMkdirParent(tmpFile);
file.transferTo(tmpFile);
// 生成MD5这兄弟耗时有点小严重
String md5 = OmsFileUtils.md5(tmpFile);
String fileName = genContainerJarName(md5);
// 上传到 DFS这兄弟耗时也有点小严重导致这个接口整体比较慢...不过也没必要开线程去处理
FileLocation fl = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(fileName);
StoreRequest storeRequest = new StoreRequest().setLocalFile(tmpFile).setFileLocation(fl);
dFsService.store(storeRequest);
// 将文件拷贝到正确的路径
String finalFileStr = OmsFileUtils.genContainerJarPath() + fileName;
File finalFile = new File(finalFileStr);
if (finalFile.exists()) {
FileUtils.forceDelete(finalFile);
}
FileUtils.moveFile(tmpFile, finalFile);
log.info("[ContainerService] uploadContainerJarFile successfully,md5={}", md5);
return md5;
} catch (Throwable t) {
log.error("[ContainerService] uploadContainerJarFile failed!", t);
ExceptionUtils.rethrow(t);
throw new ImpossibleException();
} finally {
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(workerDir));
}
}
/**
* 获取构建容器所需要的 Jar 文件
* @param version 版本
* @return 本地Jar文件
*/
public File fetchContainerJarFile(String version) {
String fileName = genContainerJarName(version);
String filePath = OmsFileUtils.genContainerJarPath() + fileName;
File localFile = new File(filePath);
if (localFile.exists()) {
return localFile;
}
FileLocation fileLocation = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(fileName);
try {
Optional<FileMeta> fileMetaOpt = dFsService.fetchFileMeta(fileLocation);
if (fileMetaOpt.isPresent()) {
dFsService.download(new DownloadRequest().setFileLocation(fileLocation).setTarget(localFile));
}
} catch (Exception e) {
log.warn("[ContainerService] fetchContainerJarFile from dsf failed, version: {}", version, e);
}
return localFile;
}
/**
* 部署容器
* @param containerId 容器ID
* @param session WebSocket Session
* @throws Exception 异常
*/
public void deploy(Long containerId, Session session) throws Exception {
String deployLock = "containerDeployLock-" + containerId;
RemoteEndpoint.Async remote = session.getAsyncRemote();
// 最长部署时间10分钟
boolean lock = lockService.tryLock(deployLock, DEPLOY_MAX_COST_TIME);
if (!lock) {
remote.sendText("SYSTEM: acquire deploy lock failed, maybe other user is deploying, please wait until the running deploy task finished.");
return;
}
try {
Optional<ContainerInfoDO> containerInfoOpt = containerInfoRepository.findById(containerId);
if (!containerInfoOpt.isPresent()) {
remote.sendText("SYSTEM: can't find container by id: " + containerId);
return;
}
ContainerInfoDO container = containerInfoOpt.get();
Date lastDeployTime = container.getLastDeployTime();
if (lastDeployTime != null) {
if ((System.currentTimeMillis() - lastDeployTime.getTime()) < DEPLOY_MIN_INTERVAL) {
remote.sendText("SYSTEM: [warn] deploy too frequent, last deploy time is: " + DateFormatUtils.format(lastDeployTime, OmsConstant.TIME_PATTERN));
}
}
// 准备文件
File jarFile = prepareJarFile(container, session);
if (jarFile == null) {
return;
}
double sizeMB = 1.0 * jarFile.length() / FileUtils.ONE_MB;
remote.sendText(String.format("SYSTEM: the jarFile(size=%fMB) is prepared and ready to be deployed to the worker.", sizeMB));
// 修改数据库,更新 MD5和最新部署时间
Date now = new Date();
container.setGmtModified(now);
container.setLastDeployTime(now);
containerInfoRepository.saveAndFlush(container);
remote.sendText(String.format("SYSTEM: update current container version=%s successfully!", container.getVersion()));
// 开始部署(需要分批进行)
final List<WorkerInfo> allAliveWorkers = workerClusterQueryService.getAllAliveWorkers(container.getAppId());
if (allAliveWorkers.isEmpty()) {
remote.sendText("SYSTEM: there is no worker available now, deploy failed!");
return;
}
String port = environment.getProperty("local.server.port");
String downloadURL = String.format("http://%s:%s/container/downloadJar?version=%s", NetUtils.getLocalHost(), port, container.getVersion());
ServerDeployContainerRequest req = new ServerDeployContainerRequest(containerId, container.getContainerName(), container.getVersion(), downloadURL);
long sleepTime = calculateSleepTime(jarFile.length());
AtomicInteger count = new AtomicInteger();
allAliveWorkers.forEach(workerInfo -> {
final URL url = ServerURLFactory.deployContainer2Worker(workerInfo.getAddress());
transportService.tell(workerInfo.getProtocol(), url, req);
remote.sendText("SYSTEM: send deploy request to " + url.getAddress());
if (count.incrementAndGet() % DEPLOY_BATCH_NUM == 0) {
CommonUtils.executeIgnoreException(() -> Thread.sleep(sleepTime));
}
});
remote.sendText("SYSTEM: deploy finished, congratulations!");
}finally {
lockService.unlock(deployLock);
}
}
/**
* 获取部署信息
* @param appId 容器所属应用ID
* @param containerId 容器ID
* @return 拼接好的可阅读字符串
*/
@DesignateServer
public String fetchDeployedInfo(Long appId, Long containerId) {
List<DeployedContainerInfo> infoList = workerClusterQueryService.getDeployedContainerInfos(appId, containerId);
Set<String> aliveWorkers = workerClusterQueryService.getAllAliveWorkers(appId)
.stream()
.map(WorkerInfo::getAddress)
.collect(Collectors.toSet());
Set<String> deployedList = Sets.newLinkedHashSet();
Multimap<String, DeployedContainerInfo> version2DeployedContainerInfoList = ArrayListMultimap.create();
infoList.forEach(info -> {
String targetWorkerAddress = info.getWorkerAddress();
if (aliveWorkers.contains(targetWorkerAddress)) {
deployedList.add(targetWorkerAddress);
version2DeployedContainerInfoList.put(info.getVersion(), info);
}
});
Set<String> unDeployedList = Sets.newHashSet(aliveWorkers);
unDeployedList.removeAll(deployedList);
StringBuilder sb = new StringBuilder("========== DeployedInfo ==========").append(System.lineSeparator());
// 集群分裂各worker版本不统一问题很大
if (version2DeployedContainerInfoList.keySet().size() > 1) {
sb.append("WARN: there exists multi version container now, please redeploy to fix this problem").append(System.lineSeparator());
}
version2DeployedContainerInfoList.asMap().forEach((version, deployedContainerInfos) -> {
sb.append("[version] ").append(version).append(System.lineSeparator());
deployedContainerInfos.forEach(deployedContainerInfo -> sb.append(String.format("Address: %s, DeployedTime: %s", deployedContainerInfo.getWorkerAddress(), CommonUtils.formatTime(deployedContainerInfo.getDeployedTime()))).append(System.lineSeparator()));
});
// 当前在线未部署机器
if (!CollectionUtils.isEmpty(unDeployedList)) {
sb.append("WARN: there exists unDeployed worker(PowerJob will auto fix when some job need to process)").append(System.lineSeparator());
sb.append("unDeployed worker list ==> ").append(unDeployedList).append(System.lineSeparator());
}
if (CollectionUtils.isEmpty(deployedList)) {
sb.append("no worker deployed this container now~");
}
return sb.toString();
}
private File prepareJarFile(ContainerInfoDO container, Session session) throws Exception {
RemoteEndpoint.Async remote = session.getAsyncRemote();
// 获取JarGit需要先 clone成Jar计算MD5JarFile则直接下载
ContainerSourceType sourceType = ContainerSourceType.of(container.getSourceType());
if (sourceType == ContainerSourceType.Git) {
String workerDirStr = OmsFileUtils.genTemporaryWorkPath();
File workerDir = new File(workerDirStr);
FileUtils.forceMkdir(workerDir);
try {
// git clone
remote.sendText("SYSTEM: start to git clone the code repo, using config: " + container.getSourceInfo());
GitRepoInfo gitRepoInfo = JsonUtils.parseObject(container.getSourceInfo(), GitRepoInfo.class);
CloneCommand cloneCommand = Git.cloneRepository()
.setDirectory(workerDir)
.setURI(gitRepoInfo.getRepo())
.setBranch(gitRepoInfo.getBranch());
if (!StringUtils.isEmpty(gitRepoInfo.getUsername())) {
CredentialsProvider credentialsProvider = new UsernamePasswordCredentialsProvider(gitRepoInfo.getUsername(), gitRepoInfo.getPassword());
cloneCommand.setCredentialsProvider(credentialsProvider);
}
cloneCommand.call();
// 获取最新的 commitId 作为版本
String oldVersion = container.getVersion();
try (Repository repository = Git.open(workerDir).getRepository()) {
Ref head = repository.getRefDatabase().findRef("HEAD");
container.setVersion(head.getObjectId().getName());
}
if (container.getVersion().equals(oldVersion)) {
remote.sendText(String.format("SYSTEM: this commitId(%s) is the same as the last.", oldVersion));
}else {
remote.sendText(String.format("SYSTEM: new version detected, from %s to %s.", oldVersion, container.getVersion()));
}
remote.sendText("SYSTEM: git clone successfully, star to compile the project.");
// mvn clean package -DskipTests -U
Invoker mvnInvoker = new DefaultInvoker();
InvocationRequest ivkReq = new DefaultInvocationRequest();
// -U强制让Maven检查所有SNAPSHOT依赖更新确保集成基于最新的状态
// -e如果构建出现异常该参数能让Maven打印完整的stack trace
// -B让Maven使用批处理模式构建项目能够避免一些需要人工参与交互而造成的挂起状态
ivkReq.setGoals(Lists.newArrayList("clean", "package", "-DskipTests", "-U", "-e", "-B"));
ivkReq.setBaseDirectory(workerDir);
ivkReq.setOutputHandler(remote::sendText);
ivkReq.setBatchMode(true);
mvnInvoker.execute(ivkReq);
String targetDirStr = workerDirStr + "/target";
File targetDir = new File(targetDirStr);
IOFileFilter fileFilter = FileFilterUtils.asFileFilter((dir, name) -> name.endsWith("jar-with-dependencies.jar"));
Collection<File> jarFile = FileUtils.listFiles(targetDir, fileFilter, null);
if (CollectionUtils.isEmpty(jarFile)) {
remote.sendText("SYSTEM: can't find packaged jar(maybe maven build failed), so deploy failed.");
return null;
}
File jarWithDependency = jarFile.iterator().next();
String jarFileName = genContainerJarName(container.getVersion());
FileLocation dfsFL = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(jarFileName);
Optional<FileMeta> dfsMetaOpt = dFsService.fetchFileMeta(dfsFL);
if (dfsMetaOpt.isPresent()) {
remote.sendText("SYSTEM: find the jar resource in remote successfully, so it's no need to upload anymore.");
} else {
remote.sendText("SYSTEM: can't find the jar resource in remote, maybe this is a new version, start to upload new version.");
dFsService.store(new StoreRequest().setFileLocation(dfsFL).setLocalFile(jarWithDependency));
remote.sendText("SYSTEM: upload to GridFS successfully~");
}
// 将文件从临时工作目录移动到正式目录
String localFileStr = OmsFileUtils.genContainerJarPath() + jarFileName;
File localFile = new File(localFileStr);
if (localFile.exists()) {
FileUtils.forceDelete(localFile);
}
FileUtils.copyFile(jarWithDependency, localFile);
return localFile;
} catch (Throwable t) {
log.error("[ContainerService] prepareJarFile failed for container: {}", container, t);
remote.sendText("SYSTEM: [ERROR] prepare jar file failed: " + ExceptionUtils.getStackTrace(t));
} finally {
// 删除工作区数据
FileUtils.forceDelete(workerDir);
}
}
// 先查询本地是否存在目标 Jar 文件
String jarFileName = genContainerJarName(container.getVersion());
String localFileStr = OmsFileUtils.genContainerJarPath() + jarFileName;
File localFile = new File(localFileStr);
if (localFile.exists()) {
remote.sendText("SYSTEM: find the jar file in local disk.");
return localFile;
}
// 从 MongoDB 下载
remote.sendText(String.format("SYSTEM: try to find the jarFile(%s) in GridFS", jarFileName));
downloadJarFromGridFS(jarFileName, localFile);
remote.sendText("SYSTEM: download jar file from GridFS successfully~");
return localFile;
}
private void downloadJarFromGridFS(String mongoFileName, File targetFile) {
int lockId = mongoFileName.hashCode();
try {
segmentLock.lockInterruptibleSafe(lockId);
if (targetFile.exists()) {
return;
}
try {
FileLocation dfsFL = new FileLocation().setBucket(Constants.CONTAINER_BUCKET).setName(mongoFileName);
Optional<FileMeta> dfsMetaOpt = dFsService.fetchFileMeta(dfsFL);
if (!dfsMetaOpt.isPresent()) {
log.warn("[ContainerService] can't find container's jar file({}) in gridFS.", mongoFileName);
return;
}
FileUtils.forceMkdirParent(targetFile);
dFsService.download(new DownloadRequest().setTarget(targetFile).setFileLocation(dfsFL));
}catch (Exception e) {
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(targetFile));
ExceptionUtils.rethrow(e);
}
}finally {
segmentLock.unlock(lockId);
}
}
private static String genContainerJarName(String version) {
return String.format("oms-container-%s.jar", version);
}
/**
* 计算 sleep 时间每10M睡眠1S + 1
* @param fileLength 文件的字节数
* @return sleep 时间
*/
private long calculateSleepTime(long fileLength) {
return (fileLength / FileUtils.ONE_MB / 10 + 1) * 1000;
}
}

View File

@ -0,0 +1,112 @@
package tech.powerjob.server.core.container;
import tech.powerjob.common.ContainerConstant;
import net.lingala.zip4j.ZipFile;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import tech.powerjob.server.common.utils.OmsFileUtils;
import java.io.*;
import java.util.Objects;
/**
* oms-worker container 生成器
*
* @author tjq
* @since 2020/5/15
*/
public class ContainerTemplateGenerator {
private static final String ORIGIN_FILE_NAME = "oms-template-origin";
/**
* 生成 container 的模版文件
* @param group pom group标签
* @param artifact pom artifact标签
* @param name pom name标签
* @param packageName 包名
* @param javaVersion Java版本
* @return 压缩包
* @throws IOException 异常
*/
public static File generate(String group, String artifact, String name, String packageName, Integer javaVersion) throws IOException {
String workerDir = OmsFileUtils.genTemporaryWorkPath();
File originJar = new File(workerDir + "tmp.jar");
String tmpPath = workerDir + "/unzip/";
// CentOS 7 上 getResource 会报 FileNotFoundException原因不详...
try (InputStream is = ContainerTemplateGenerator.class.getClassLoader().getResourceAsStream(ORIGIN_FILE_NAME + ".zip")) {
Objects.requireNonNull(is, "generate container template failed, can't find zip file in classpath.");
FileUtils.copyInputStreamToFile(is, originJar);
}
ZipFile zipFile = new ZipFile(originJar);
zipFile.extractAll(tmpPath);
String rootPath = tmpPath + ORIGIN_FILE_NAME;
// 1. 修改 pom.xml (按行读,读取期间更改,然后回写)
String pomPath = rootPath + "/pom.xml";
String line;
StringBuilder buffer = new StringBuilder();
try (BufferedReader br = new BufferedReader(new FileReader(pomPath))) {
while ((line = br.readLine()) != null) {
if (line.contains("<groupId>groupId</groupId>")) {
buffer.append(" <groupId>").append(group).append("</groupId>");
}else if (line.contains("<artifactId>artifactId</artifactId>")) {
buffer.append(" <artifactId>").append(artifact).append("</artifactId>");
}else if (line.contains("<name>name</name>")) {
buffer.append(" <name>").append(name).append("</name>");
}else if (line.contains("<maven.compiler.source>")) {
buffer.append(" <maven.compiler.source>").append(javaVersion).append("</maven.compiler.source>");
}else if (line.contains("<maven.compiler.target>")) {
buffer.append(" <maven.compiler.target>").append(javaVersion).append("</maven.compiler.target>");
} else {
buffer.append(line);
}
buffer.append(System.lineSeparator());
}
}
OmsFileUtils.string2File(buffer.toString(), new File(pomPath));
// 2. 新建目录
String packagePath = StringUtils.replace(packageName, ".", "/");
String absPath = rootPath + "/src/main/java/" + packagePath;
FileUtils.forceMkdir(new File(absPath));
// 3. 修改 Spring 配置文件
String resourcePath = rootPath + "/src/main/resources/";
String springXMLPath = resourcePath + ContainerConstant.SPRING_CONTEXT_FILE_NAME;
buffer.setLength(0);
try (BufferedReader br = new BufferedReader(new FileReader(springXMLPath))) {
while ((line = br.readLine()) != null) {
if (line.contains("<context:component-scan base-package=\"")) {
buffer.append(" <context:component-scan base-package=\"").append(packageName).append("\"/>");
}else {
buffer.append(line);
}
buffer.append(System.lineSeparator());
}
}
OmsFileUtils.string2File(buffer.toString(), new File(springXMLPath));
// 4. 写入 packageName便于容器加载用户类
String propertiesPath = resourcePath + ContainerConstant.CONTAINER_PROPERTIES_FILE_NAME;
String properties = ContainerConstant.CONTAINER_PACKAGE_NAME_KEY + "=" + packageName;
OmsFileUtils.string2File(properties, new File(propertiesPath));
// 5. 再打包回去
String finPath = tmpPath + "template.zip";
ZipFile finZip = new ZipFile(finPath);
finZip.addFolder(new File(rootPath));
// 6. 删除源文件
FileUtils.forceDelete(originJar);
return finZip.getFile();
}
}

View File

@ -0,0 +1,18 @@
package tech.powerjob.server.core.evaluator;
/**
* @author Echo009
* @since 2021/12/10
*/
public interface Evaluator {
/**
* 使用给定输入计算表达式
*
* @param expression 可执行的表达式
* @param input 输入
* @return 计算结果
*/
Object evaluate(String expression, Object input);
}

View File

@ -0,0 +1,30 @@
package tech.powerjob.server.core.evaluator;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import javax.script.Bindings;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
/**
* @author Echo009
* @since 2021/12/10
*/
@Slf4j
@Component
public class GroovyEvaluator implements Evaluator {
private static final ScriptEngine ENGINE = new ScriptEngineManager().getEngineByName("groovy");
@Override
@SneakyThrows
public Object evaluate(String expression, Object input) {
Bindings bindings = ENGINE.createBindings();
bindings.put("context", input);
return ENGINE.eval(expression, bindings);
}
}

View File

@ -0,0 +1,171 @@
package tech.powerjob.server.core.handler;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.springframework.beans.BeanUtils;
import org.springframework.core.env.Environment;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.request.*;
import tech.powerjob.common.response.AskResponse;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.common.utils.NetUtils;
import tech.powerjob.remote.framework.actor.Handler;
import tech.powerjob.remote.framework.actor.ProcessType;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.module.WorkerInfo;
import tech.powerjob.server.common.utils.SpringUtils;
import tech.powerjob.server.monitor.MonitorService;
import tech.powerjob.server.monitor.events.w2s.TtReportInstanceStatusEvent;
import tech.powerjob.server.monitor.events.w2s.WorkerHeartbeatEvent;
import tech.powerjob.server.monitor.events.w2s.WorkerLogReportEvent;
import tech.powerjob.server.persistence.remote.model.ContainerInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.RejectedExecutionException;
import java.util.stream.Collectors;
import static tech.powerjob.common.RemoteConstant.*;
/**
* wrapper monitor for IWorkerRequestHandler
*
* @author tjq
* @since 2022/9/11
*/
@RequiredArgsConstructor
@Slf4j
public abstract class AbWorkerRequestHandler implements IWorkerRequestHandler {
protected final MonitorService monitorService;
protected final Environment environment;
protected final ContainerInfoRepository containerInfoRepository;
private final WorkerClusterQueryService workerClusterQueryService;
protected abstract void processWorkerHeartbeat0(WorkerHeartbeat heartbeat, WorkerHeartbeatEvent event);
protected abstract AskResponse processTaskTrackerReportInstanceStatus0(TaskTrackerReportInstanceStatusReq req, TtReportInstanceStatusEvent event) throws Exception;
protected abstract void processWorkerLogReport0(WorkerLogReportReq req, WorkerLogReportEvent event);
@Override
@Handler(path = S4W_HANDLER_WORKER_HEARTBEAT, processType = ProcessType.NO_BLOCKING)
public void processWorkerHeartbeat(WorkerHeartbeat heartbeat) {
long startMs = System.currentTimeMillis();
WorkerHeartbeatEvent event = new WorkerHeartbeatEvent()
.setAppName(heartbeat.getAppName())
.setAppId(heartbeat.getAppId())
.setVersion(heartbeat.getVersion())
.setProtocol(heartbeat.getProtocol())
.setTag(heartbeat.getTag())
.setWorkerAddress(heartbeat.getWorkerAddress())
.setDelayMs(startMs - heartbeat.getHeartbeatTime())
.setScore(heartbeat.getSystemMetrics().getScore());
processWorkerHeartbeat0(heartbeat, event);
monitorService.monitor(event);
}
@Override
@Handler(path = S4W_HANDLER_REPORT_INSTANCE_STATUS, processType = ProcessType.BLOCKING)
public AskResponse processTaskTrackerReportInstanceStatus(TaskTrackerReportInstanceStatusReq req) {
long startMs = System.currentTimeMillis();
TtReportInstanceStatusEvent event = new TtReportInstanceStatusEvent()
.setAppId(req.getAppId())
.setJobId(req.getJobId())
.setInstanceId(req.getInstanceId())
.setWfInstanceId(req.getWfInstanceId())
.setInstanceStatus(InstanceStatus.of(req.getInstanceStatus()))
.setDelayMs(startMs - req.getReportTime())
.setServerProcessStatus(TtReportInstanceStatusEvent.Status.SUCCESS);
try {
return processTaskTrackerReportInstanceStatus0(req, event);
} catch (Exception e) {
event.setServerProcessStatus(TtReportInstanceStatusEvent.Status.FAILED);
log.error("[WorkerRequestHandler] processTaskTrackerReportInstanceStatus failed for request: {}", req, e);
return AskResponse.failed(ExceptionUtils.getMessage(e));
} finally {
event.setServerProcessCost(System.currentTimeMillis() - startMs);
monitorService.monitor(event);
}
}
@Override
@Handler(path = S4W_HANDLER_REPORT_LOG, processType = ProcessType.NO_BLOCKING)
public void processWorkerLogReport(WorkerLogReportReq req) {
WorkerLogReportEvent event = new WorkerLogReportEvent()
.setWorkerAddress(req.getWorkerAddress())
.setLogNum(req.getInstanceLogContents().size());
try {
processWorkerLogReport0(req, event);
event.setStatus(WorkerLogReportEvent.Status.SUCCESS);
} catch (RejectedExecutionException re) {
event.setStatus(WorkerLogReportEvent.Status.REJECTED);
} catch (Throwable t) {
event.setStatus(WorkerLogReportEvent.Status.EXCEPTION);
log.warn("[WorkerRequestHandler] process worker report failed!", t);
} finally {
monitorService.monitor(event);
}
}
@Override
@Handler(path = S4W_HANDLER_QUERY_JOB_CLUSTER, processType = ProcessType.BLOCKING)
public AskResponse processWorkerQueryExecutorCluster(WorkerQueryExecutorClusterReq req) {
AskResponse askResponse;
Long jobId = req.getJobId();
Long appId = req.getAppId();
JobInfoRepository jobInfoRepository = SpringUtils.getBean(JobInfoRepository.class);
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(jobId);
if (jobInfoOpt.isPresent()) {
JobInfoDO jobInfo = jobInfoOpt.get();
if (!jobInfo.getAppId().equals(appId)) {
askResponse = AskResponse.failed("Permission Denied!");
}else {
List<String> sortedAvailableWorker = workerClusterQueryService.geAvailableWorkers(jobInfo)
.stream().map(WorkerInfo::getAddress).collect(Collectors.toList());
askResponse = AskResponse.succeed(sortedAvailableWorker);
}
}else {
askResponse = AskResponse.failed("can't find jobInfo by jobId: " + jobId);
}
return askResponse;
}
@Override
@Handler(path = S4W_HANDLER_WORKER_NEED_DEPLOY_CONTAINER, processType = ProcessType.BLOCKING)
public AskResponse processWorkerNeedDeployContainer(WorkerNeedDeployContainerRequest req) {
String port = environment.getProperty("local.server.port");
Optional<ContainerInfoDO> containerInfoOpt = containerInfoRepository.findById(req.getContainerId());
AskResponse askResponse = new AskResponse();
if (!containerInfoOpt.isPresent() || containerInfoOpt.get().getStatus() != SwitchableStatus.ENABLE.getV()) {
askResponse.setSuccess(false);
askResponse.setMessage("can't find container by id: " + req.getContainerId());
}else {
ContainerInfoDO containerInfo = containerInfoOpt.get();
askResponse.setSuccess(true);
ServerDeployContainerRequest dpReq = new ServerDeployContainerRequest();
BeanUtils.copyProperties(containerInfo, dpReq);
dpReq.setContainerId(containerInfo.getId());
String downloadURL = String.format("http://%s:%s/container/downloadJar?version=%s", NetUtils.getLocalHost(), port, containerInfo.getVersion());
dpReq.setDownloadURL(downloadURL);
askResponse.setData(JsonUtils.toBytes(dpReq));
}
return askResponse;
}
}

View File

@ -0,0 +1,46 @@
package tech.powerjob.server.core.handler;
import tech.powerjob.common.request.*;
import tech.powerjob.common.response.AskResponse;
/**
* 定义 server 与 worker 之间需要处理的协议
*
* @author tjq
* @since 2022/9/10
*/
public interface IWorkerRequestHandler {
/**
* 处理 worker 上报的心跳信息
* @param heartbeat 心跳信息
*/
void processWorkerHeartbeat(WorkerHeartbeat heartbeat);
/**
* 处理 TaskTracker 的任务实例上报
* @param req 上报请求
* @return 响应信息
*/
AskResponse processTaskTrackerReportInstanceStatus(TaskTrackerReportInstanceStatusReq req);
/**
* 处理 worker 查询执行器集群
* @param req 请求
* @return cluster info
*/
AskResponse processWorkerQueryExecutorCluster(WorkerQueryExecutorClusterReq req);
/**
* 处理 worker 日志推送请求(内部使用线程池异步处理,非阻塞)
* @param req 请求
*/
void processWorkerLogReport(WorkerLogReportReq req);
/**
* 处理 worker 的容器部署请求
* @param request 请求
* @return 容器部署信息
*/
AskResponse processWorkerNeedDeployContainer(WorkerNeedDeployContainerRequest request);
}

View File

@ -0,0 +1,78 @@
package tech.powerjob.server.core.handler;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.env.Environment;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
import tech.powerjob.common.RemoteConstant;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq;
import tech.powerjob.common.request.WorkerHeartbeat;
import tech.powerjob.common.request.WorkerLogReportReq;
import tech.powerjob.common.response.AskResponse;
import tech.powerjob.remote.framework.actor.Actor;
import tech.powerjob.server.core.instance.InstanceLogService;
import tech.powerjob.server.core.instance.InstanceManager;
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
import tech.powerjob.server.monitor.MonitorService;
import tech.powerjob.server.monitor.events.w2s.TtReportInstanceStatusEvent;
import tech.powerjob.server.monitor.events.w2s.WorkerHeartbeatEvent;
import tech.powerjob.server.monitor.events.w2s.WorkerLogReportEvent;
import tech.powerjob.server.persistence.remote.repository.ContainerInfoRepository;
import tech.powerjob.server.remote.worker.WorkerClusterManagerService;
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
/**
* receive and process worker's request
*
* @author tjq
* @since 2022/9/11
*/
@Slf4j
@Component
@Actor(path = RemoteConstant.S4W_PATH)
public class WorkerRequestHandlerImpl extends AbWorkerRequestHandler {
private final InstanceManager instanceManager;
private final WorkflowInstanceManager workflowInstanceManager;
private final InstanceLogService instanceLogService;
public WorkerRequestHandlerImpl(InstanceManager instanceManager, WorkflowInstanceManager workflowInstanceManager, InstanceLogService instanceLogService,
MonitorService monitorService, Environment environment, ContainerInfoRepository containerInfoRepository, WorkerClusterQueryService workerClusterQueryService) {
super(monitorService, environment, containerInfoRepository, workerClusterQueryService);
this.instanceManager = instanceManager;
this.workflowInstanceManager = workflowInstanceManager;
this.instanceLogService = instanceLogService;
}
@Override
protected void processWorkerHeartbeat0(WorkerHeartbeat heartbeat, WorkerHeartbeatEvent event) {
WorkerClusterManagerService.updateStatus(heartbeat);
}
@Override
protected AskResponse processTaskTrackerReportInstanceStatus0(TaskTrackerReportInstanceStatusReq req, TtReportInstanceStatusEvent event) throws Exception {
// 2021/02/05 如果是工作流中的实例先尝试更新上下文信息,再更新实例状态,这里一定不会有异常
if (req.getWfInstanceId() != null && !CollectionUtils.isEmpty(req.getAppendedWfContext())) {
// 更新工作流上下文信息
workflowInstanceManager.updateWorkflowContext(req.getWfInstanceId(),req.getAppendedWfContext());
}
instanceManager.updateStatus(req);
// 结束状态(成功/失败)需要回复消息
if (InstanceStatus.FINISHED_STATUS.contains(req.getInstanceStatus())) {
return AskResponse.succeed(null);
}
return null;
}
@Override
protected void processWorkerLogReport0(WorkerLogReportReq req, WorkerLogReportEvent event) {
// 这个效率应该不会拉垮吧...也就是一些判断 + Map#get 吧...
instanceLogService.submitLogs(req.getWorkerAddress(), req.getInstanceLogContents());
}
}

View File

@ -0,0 +1,35 @@
package tech.powerjob.server.core.helper;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowInstanceStatus;
/**
* @author Echo009
* @since 2021/12/13
*/
public class StatusMappingHelper {
private StatusMappingHelper(){
}
/**
* 工作流实例状态转任务实例状态
*/
public static InstanceStatus toInstanceStatus(WorkflowInstanceStatus workflowInstanceStatus) {
switch (workflowInstanceStatus) {
case FAILED:
return InstanceStatus.FAILED;
case SUCCEED:
return InstanceStatus.SUCCEED;
case RUNNING:
return InstanceStatus.RUNNING;
case STOPPED:
return InstanceStatus.STOPPED;
default:
return null;
}
}
}

View File

@ -0,0 +1,440 @@
package tech.powerjob.server.core.instance;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.task.AsyncTaskExecutor;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.support.TransactionTemplate;
import org.springframework.util.CollectionUtils;
import tech.powerjob.common.OmsConstant;
import tech.powerjob.common.enums.LogLevel;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.model.InstanceLogContent;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.common.utils.NetUtils;
import tech.powerjob.common.utils.SegmentLock;
import tech.powerjob.server.common.constants.PJThreadPool;
import tech.powerjob.server.common.utils.OmsFileUtils;
import tech.powerjob.server.extension.dfs.*;
import tech.powerjob.server.persistence.StringPage;
import tech.powerjob.server.persistence.local.LocalInstanceLogDO;
import tech.powerjob.server.persistence.local.LocalInstanceLogRepository;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.storage.Constants;
import tech.powerjob.server.remote.server.redirector.DesignateServer;
import javax.annotation.Resource;
import java.io.*;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* 任务实例运行时日志服务
*
* @author tjq
* @since 2020/4/27
*/
@Slf4j
@Service
public class InstanceLogService {
@Value("${server.port}")
private int port;
@Resource
private InstanceMetadataService instanceMetadataService;
@Resource
private DFsService dFsService;
/**
* 本地数据库操作bean
*/
@Resource(name = "localTransactionTemplate")
private TransactionTemplate localTransactionTemplate;
@Resource
private LocalInstanceLogRepository localInstanceLogRepository;
/**
* 本地维护了在线日志的任务实例ID
*/
private final Map<Long, Long> instanceId2LastReportTime = Maps.newConcurrentMap();
@Resource(name = PJThreadPool.BACKGROUND_POOL)
private AsyncTaskExecutor powerJobBackgroundPool;
@Value("${server.servlet.context-path:#{null}}")
private String servletContextPath;
private static final String DOWNLOAD_URL_PATTERN = "http://%s:%d%s/instance/downloadLog?instanceId=%d";
/**
* 分段锁
*/
private final SegmentLock segmentLock = new SegmentLock(8);
/**
* 格式化时间戳
*/
private static final FastDateFormat DATE_FORMAT = FastDateFormat.getInstance(OmsConstant.TIME_PATTERN_PLUS);
/**
* 每一个展示的行数
*/
private static final int MAX_LINE_COUNT = 100;
/**
* 更新中的日志缓存时间
*/
private static final long LOG_CACHE_TIME = 10000;
/**
* 提交日志记录,持久化到本地数据库中
* @param workerAddress 上报机器地址
* @param logs 任务实例运行时日志
*/
@Async(value = PJThreadPool.LOCAL_DB_POOL)
public void submitLogs(String workerAddress, List<InstanceLogContent> logs) {
List<LocalInstanceLogDO> logList = logs.stream().map(x -> {
instanceId2LastReportTime.put(x.getInstanceId(), System.currentTimeMillis());
LocalInstanceLogDO y = new LocalInstanceLogDO();
BeanUtils.copyProperties(x, y);
y.setWorkerAddress(workerAddress);
return y;
}).collect(Collectors.toList());
try {
CommonUtils.executeWithRetry0(() -> localInstanceLogRepository.saveAll(logList));
}catch (Exception e) {
log.warn("[InstanceLogService] persistent instance logs failed, these logs will be dropped: {}.", logs, e);
}
}
/**
* 获取任务实例运行日志(默认存在本地数据,需要由生成完成请求的路由与转发)
* @param appId appIdAOP 专用
* @param instanceId 任务实例ID
* @param index 页码从0开始
* @return 文本字符串
*/
@DesignateServer
public StringPage fetchInstanceLog(Long appId, Long instanceId, Long index) {
try {
Future<File> fileFuture = prepareLogFile(instanceId);
// 超时并不会打断正在执行的任务
File logFile = fileFuture.get(5, TimeUnit.SECONDS);
// 分页展示数据
long lines = 0;
StringBuilder sb = new StringBuilder();
String lineStr;
long left = index * MAX_LINE_COUNT;
long right = left + MAX_LINE_COUNT;
try (LineNumberReader lr = new LineNumberReader(new FileReader(logFile))) {
while ((lineStr = lr.readLine()) != null) {
// 指定范围内,读出
if (lines >= left && lines < right) {
sb.append(lineStr).append(System.lineSeparator());
}
++lines;
}
}catch (Exception e) {
log.warn("[InstanceLog-{}] read logFile from disk failed for app: {}.", instanceId, appId, e);
return StringPage.simple("oms-server execution exception, caused by " + ExceptionUtils.getRootCauseMessage(e));
}
double totalPage = Math.ceil(1.0 * lines / MAX_LINE_COUNT);
return new StringPage(index, (long) totalPage, sb.toString());
}catch (TimeoutException te) {
return StringPage.simple("log file is being prepared, please try again later.");
}catch (Exception e) {
log.warn("[InstanceLog-{}] fetch instance log failed.", instanceId, e);
return StringPage.simple("oms-server execution exception, caused by " + ExceptionUtils.getRootCauseMessage(e));
}
}
/**
* 获取日志的下载链接
* @param appId AOP 专用
* @param instanceId 任务实例 ID
* @return 下载链接
*/
@DesignateServer
public String fetchDownloadUrl(Long appId, Long instanceId) {
String path = Optional.ofNullable(servletContextPath).orElse(StringUtils.EMPTY);
String url = String.format(DOWNLOAD_URL_PATTERN, NetUtils.getLocalHost(), port, path, instanceId);
log.info("[InstanceLog-{}] downloadURL for appId[{}]: {}", instanceId, appId, url);
return url;
}
/**
* 下载全部的任务日志文件
* @param instanceId 任务实例ID
* @return 日志文件
* @throws Exception 异常
*/
public File downloadInstanceLog(long instanceId) throws Exception {
Future<File> fileFuture = prepareLogFile(instanceId);
return fileFuture.get(1, TimeUnit.MINUTES);
}
/**
* 异步准备日志文件
* @param instanceId 任务实例ID
* @return 异步结果
*/
private Future<File> prepareLogFile(long instanceId) {
return powerJobBackgroundPool.submit(() -> {
// 在线日志还在不断更新,需要使用本地数据库中的数据
if (instanceId2LastReportTime.containsKey(instanceId)) {
return genTemporaryLogFile(instanceId);
}
return genStableLogFile(instanceId);
});
}
/**
* 将本地的任务实例运行日志同步到 mongoDB 存储,在任务执行结束后异步执行
* @param instanceId 任务实例ID
*/
@Async(PJThreadPool.BACKGROUND_POOL)
public void sync(Long instanceId) {
Stopwatch sw = Stopwatch.createStarted();
try {
// 先持久化到本地文件
File stableLogFile = genStableLogFile(instanceId);
// 将文件推送到 MongoDB
FileLocation dfsFL = new FileLocation().setBucket(Constants.LOG_BUCKET).setName(genMongoFileName(instanceId));
try {
dFsService.store(new StoreRequest().setLocalFile(stableLogFile).setFileLocation(dfsFL));
log.info("[InstanceLog-{}] push local instanceLogs to mongoDB succeed, using: {}.", instanceId, sw.stop());
}catch (Exception e) {
log.warn("[InstanceLog-{}] push local instanceLogs to mongoDB failed.", instanceId, e);
}
}catch (Exception e) {
log.warn("[InstanceLog-{}] sync local instanceLogs failed.", instanceId, e);
}
// 删除本地数据库数据
try {
instanceId2LastReportTime.remove(instanceId);
CommonUtils.executeWithRetry0(() -> localInstanceLogRepository.deleteByInstanceId(instanceId));
log.info("[InstanceLog-{}] delete local instanceLog successfully.", instanceId);
}catch (Exception e) {
log.warn("[InstanceLog-{}] delete local instanceLog failed.", instanceId, e);
}
}
private File genTemporaryLogFile(long instanceId) {
String path = genLogFilePath(instanceId, false);
int lockId = ("tpFileLock-" + instanceId).hashCode();
try {
segmentLock.lockInterruptibleSafe(lockId);
// Stream 需要在事务的包裹之下使用
return localTransactionTemplate.execute(status -> {
File f = new File(path);
// 如果文件存在且有效,则不再重新构建日志文件(这个判断也需要放在锁内,否则构建到一半的文件会被返回)
if (f.exists() && (System.currentTimeMillis() - f.lastModified()) < LOG_CACHE_TIME) {
return f;
}
try {
// 创建父文件夹(文件在开流时自动会被创建)
FileUtils.forceMkdirParent(f);
// 重新构建文件
try (Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId)) {
stream2File(allLogStream, f);
}
return f;
}catch (Exception e) {
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(f));
throw new RuntimeException(e);
}
});
}finally {
segmentLock.unlock(lockId);
}
}
private File genStableLogFile(long instanceId) {
String path = genLogFilePath(instanceId, true);
int lockId = ("stFileLock-" + instanceId).hashCode();
try {
segmentLock.lockInterruptibleSafe(lockId);
return localTransactionTemplate.execute(status -> {
File f = new File(path);
if (f.exists()) {
return f;
}
try {
// 创建父文件夹(文件在开流时自动会被创建)
FileUtils.forceMkdirParent(f);
// 本地存在数据,从本地持久化(对应 SYNC 的情况)
if (instanceId2LastReportTime.containsKey(instanceId)) {
try (Stream<LocalInstanceLogDO> allLogStream = localInstanceLogRepository.findByInstanceIdOrderByLogTime(instanceId)) {
stream2File(allLogStream, f);
}
}else {
FileLocation dfl = new FileLocation().setBucket(Constants.LOG_BUCKET).setName(genMongoFileName(instanceId));
Optional<FileMeta> dflMetaOpt = dFsService.fetchFileMeta(dfl);
if (!dflMetaOpt.isPresent()) {
OmsFileUtils.string2File("SYSTEM: There is no online log for this job instance.", f);
return f;
}
dFsService.download(new DownloadRequest().setTarget(f).setFileLocation(dfl));
}
return f;
}catch (Exception e) {
CommonUtils.executeIgnoreException(() -> FileUtils.forceDelete(f));
throw new RuntimeException(e);
}
});
}finally {
segmentLock.unlock(lockId);
}
}
/**
* 将数据库中存储的日志流转化为磁盘日志文件
* @param stream 流
* @param logFile 目标日志文件
*/
private void stream2File(Stream<LocalInstanceLogDO> stream, File logFile) {
try (FileWriter fw = new FileWriter(logFile); BufferedWriter bfw = new BufferedWriter(fw)) {
stream.forEach(instanceLog -> {
try {
bfw.write(convertLog(instanceLog) + System.lineSeparator());
}catch (Exception ignore) {
}
});
}catch (IOException ie) {
ExceptionUtils.rethrow(ie);
}
}
/**
* 拼接日志 -> 2020-04-29 22:07:10.059 [192.168.1.1:2777] INFO XXX
* @param instanceLog 日志对象
* @return 字符串
*/
private static String convertLog(LocalInstanceLogDO instanceLog) {
return String.format("%s [%s] %s %s",
DATE_FORMAT.format(instanceLog.getLogTime()),
instanceLog.getWorkerAddress(),
LogLevel.genLogLevelString(instanceLog.getLogLevel()),
instanceLog.getLogContent());
}
@Async(PJThreadPool.TIMING_POOL)
@Scheduled(fixedDelay = 120000)
public void timingCheck() {
// 定时删除秒级任务的日志
List<Long> frequentInstanceIds = Lists.newLinkedList();
instanceId2LastReportTime.keySet().forEach(instanceId -> {
try {
JobInfoDO jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(instanceId);
if (TimeExpressionType.FREQUENT_TYPES.contains(jobInfo.getTimeExpressionType())) {
frequentInstanceIds.add(instanceId);
}
}catch (Exception ignore) {
}
});
if (!CollectionUtils.isEmpty(frequentInstanceIds)) {
// 只保留最近10分钟的日志
long time = System.currentTimeMillis() - 10 * 60 * 1000;
Lists.partition(frequentInstanceIds, 100).forEach(p -> {
try {
localInstanceLogRepository.deleteByInstanceIdInAndLogTimeLessThan(p, time);
}catch (Exception e) {
log.warn("[InstanceLogService] delete expired logs for instance: {} failed.", p, e);
}
});
}
// 删除长时间未 REPORT 的日志(必要性考证中......
}
private static String genLogFilePath(long instanceId, boolean stable) {
if (stable) {
return OmsFileUtils.genLogDirPath() + String.format("%d-stable.log", instanceId);
}else {
return OmsFileUtils.genLogDirPath() + String.format("%d-temporary.log", instanceId);
}
}
private static String genMongoFileName(long instanceId) {
return String.format("oms-%d.log", instanceId);
}
/**
* description 在重跑之前移除老的文件,避免重跑后还看到的是之前的示例日志。
* 因为当重跑完读取稳定日志的时候 目前逻辑会先判断本地文件是否存在 若存在则直接返回了 故需要先移除掉
* 参考tech.powerjob.server.core.instance.InstanceLogService#genStableLogFile(long)
* @author jian chen jiang
* date 2024/2/5 17:01
* @param instanceId
* @return void
*/
public void removeOldFile(Long instanceId) {
// 库中的数据不删,删了就会丢失全部的历史日志
try {
//删除本地缓存
String s = genLogFilePath(instanceId, true);
File file = new File(s);
if(!file.exists()){
return;
}
boolean delete = file.delete();
if(!delete){
log.warn("[InstanceLogService] delete old logs{} for instance: {} failed.", s,instanceId);
}
//删除临时文件
String tempFilePath = genLogFilePath(instanceId, false);
File tempFile = new File(tempFilePath);
if(!tempFile.exists()){
return;
}
delete = tempFile.delete();
if(!delete){
log.warn("[InstanceLogService] delete old temp logs{} for instance: {} failed.", s,instanceId);
}
} catch (Throwable t) {
log.error("[InstanceLogService] delete old logs for instance[{}] failed.", instanceId, t);
}
}
}

View File

@ -0,0 +1,250 @@
package tech.powerjob.server.core.instance;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.model.LifeCycle;
import tech.powerjob.common.request.ServerStopInstanceReq;
import tech.powerjob.common.request.TaskTrackerReportInstanceStatusReq;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.remote.framework.base.URL;
import tech.powerjob.server.common.module.WorkerInfo;
import tech.powerjob.server.common.timewheel.holder.HashedWheelTimerHolder;
import tech.powerjob.server.common.utils.SpringUtils;
import tech.powerjob.server.core.alarm.AlarmUtils;
import tech.powerjob.server.core.service.UserService;
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
import tech.powerjob.server.core.alarm.AlarmCenter;
import tech.powerjob.server.core.alarm.module.JobInstanceAlarm;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.model.UserInfoDO;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.remote.aware.TransportServiceAware;
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
import tech.powerjob.server.remote.transporter.TransportService;
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
/**
* 管理被调度的任务实例(状态更新相关)
*
* @author tjq
* @since 2020/4/7
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class InstanceManager implements TransportServiceAware {
private final AlarmCenter alarmCenter;
private final InstanceLogService instanceLogService;
private final InstanceMetadataService instanceMetadataService;
private final InstanceInfoRepository instanceInfoRepository;
private final WorkflowInstanceManager workflowInstanceManager;
private final WorkerClusterQueryService workerClusterQueryService;
/**
* 基础组件通过 aware 注入,避免循环依赖
*/
private TransportService transportService;
/**
* 更新任务状态
* ********************************************
* 2021-02-03 modify by Echo009
* 实例的执行次数统一在这里管理,对于非固定频率的任务
* 当 db 中实例的状态为等待派发时runningTimes + 1
* ********************************************
*
* @param req TaskTracker上报任务实例状态的请求
*/
public void updateStatus(TaskTrackerReportInstanceStatusReq req) throws ExecutionException {
Long instanceId = req.getInstanceId();
// 获取相关数据
JobInfoDO jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(req.getInstanceId());
InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
if (instanceInfo == null) {
log.warn("[InstanceManager-{}] can't find InstanceInfo from database", instanceId);
return;
}
// 考虑极端情况Processor 处理耗时小于 server 写 DB 耗时,会导致状态上报时无 taskTracker 地址此处等待后重新从DB获取数据 GitHub#620
if (StringUtils.isEmpty(instanceInfo.getTaskTrackerAddress())) {
log.warn("[InstanceManager-{}] TaskTrackerAddress is empty, server will wait then acquire again!", instanceId);
CommonUtils.easySleep(277);
instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
}
int originStatus = instanceInfo.getStatus();
// 丢弃过期的上报数据
if (req.getReportTime() <= instanceInfo.getLastReportTime()) {
log.warn("[InstanceManager-{}] receive the expired status report request: {}, this report will be dropped.", instanceId, req);
return;
}
// 丢弃非目标 TaskTracker 的上报数据(脑裂情况)
if (!req.getSourceAddress().equals(instanceInfo.getTaskTrackerAddress())) {
log.warn("[InstanceManager-{}] receive the other TaskTracker's report: {}, but current TaskTracker is {}, this report will be dropped.", instanceId, req, instanceInfo.getTaskTrackerAddress());
return;
}
InstanceStatus receivedInstanceStatus = InstanceStatus.of(req.getInstanceStatus());
Integer timeExpressionType = jobInfo.getTimeExpressionType();
// 更新 最后上报时间 和 修改时间
instanceInfo.setLastReportTime(req.getReportTime());
instanceInfo.setGmtModified(new Date());
// FREQUENT 任务没有失败重试机制TaskTracker一直运行即可只需要将存活信息同步到DB即可
// FREQUENT 任务的 newStatus 只有2中情况一种是 RUNNING一种是 FAILED表示该机器 overload需要重新选一台机器执行
// 综上,直接把 status 和 runningNum 同步到DB即可
if (TimeExpressionType.FREQUENT_TYPES.contains(timeExpressionType)) {
// 如果实例处于失败状态,则说明该 worker 失联了一段时间,被 server 判定为宕机,而此时该秒级任务有可能已经重新派发了,故需要 Kill 掉该实例
// fix issue 375
if (instanceInfo.getStatus() == InstanceStatus.FAILED.getV()) {
log.warn("[InstanceManager-{}] receive TaskTracker's report: {}, but current instance is already failed, this instance should be killed.", instanceId, req);
stopInstance(instanceId, instanceInfo);
return;
}
LifeCycle lifeCycle = LifeCycle.parse(jobInfo.getLifecycle());
// 检查生命周期是否已结束
if (lifeCycle.getEnd() != null && lifeCycle.getEnd() <= System.currentTimeMillis()) {
stopInstance(instanceId, instanceInfo);
instanceInfo.setStatus(InstanceStatus.SUCCEED.getV());
} else {
instanceInfo.setStatus(receivedInstanceStatus.getV());
}
instanceInfo.setResult(req.getResult());
instanceInfo.setRunningTimes(req.getTotalTaskNum());
instanceInfoRepository.saveAndFlush(instanceInfo);
// 任务需要告警
if (req.isNeedAlert()) {
log.info("[InstanceManager-{}] receive frequent task alert req,time:{},content:{}", instanceId, req.getReportTime(), req.getAlertContent());
alert(instanceId, req.getAlertContent());
}
return;
}
// 更新运行次数
if (instanceInfo.getStatus() == InstanceStatus.WAITING_WORKER_RECEIVE.getV()) {
// 这里不会存在并发问题
instanceInfo.setRunningTimes(instanceInfo.getRunningTimes() + 1);
}
// QAQ ,不能提前变更 status否则会导致更新运行次数的逻辑不生效继而导致普通任务 无限重试
instanceInfo.setStatus(receivedInstanceStatus.getV());
boolean finished = false;
if (receivedInstanceStatus == InstanceStatus.SUCCEED) {
instanceInfo.setResult(req.getResult());
instanceInfo.setFinishedTime(req.getEndTime() == null ? System.currentTimeMillis() : req.getEndTime());
finished = true;
} else if (receivedInstanceStatus == InstanceStatus.FAILED) {
// 当前重试次数 <= 最大重试次数,进行重试 第一次运行runningTimes为1重试一次instanceRetryNum也为1故需要 =
if (instanceInfo.getRunningTimes() <= jobInfo.getInstanceRetryNum()) {
log.info("[InstanceManager-{}] instance execute failed but will take the {}th retry.", instanceId, instanceInfo.getRunningTimes());
// 延迟10S重试由于重试不改变 instanceId如果派发到同一台机器上一个 TaskTracker 还处于资源释放阶段无法创建新的TaskTracker任务失败
instanceInfo.setExpectedTriggerTime(System.currentTimeMillis() + 10000);
// 修改状态为 等待派发,正式开始重试
// 问题会丢失以往的调度记录actualTriggerTime什么的都会被覆盖
instanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
} else {
instanceInfo.setResult(req.getResult());
instanceInfo.setFinishedTime(req.getEndTime() == null ? System.currentTimeMillis() : req.getEndTime());
finished = true;
log.info("[InstanceManager-{}] instance execute failed and have no chance to retry.", instanceId);
}
}
if (finished) {
// 最终状态允许直接覆盖更新
instanceInfoRepository.saveAndFlush(instanceInfo);
// 这里的 InstanceStatus 只有 成功/失败 两种,手动停止不会由 TaskTracker 上报
processFinishedInstance(instanceId, req.getWfInstanceId(), receivedInstanceStatus, req.getResult());
return;
}
// 带条件更新
final int i = instanceInfoRepository.updateStatusChangeInfoByInstanceIdAndStatus(instanceInfo.getLastReportTime(), instanceInfo.getGmtModified(), instanceInfo.getRunningTimes(), instanceInfo.getStatus(), instanceInfo.getInstanceId(), originStatus);
if (i == 0) {
log.warn("[InstanceManager-{}] update instance status failed, maybe the instance status has been changed by other thread. discard this status change,{}", instanceId, instanceInfo);
}
}
private void stopInstance(Long instanceId, InstanceInfoDO instanceInfo) {
Optional<WorkerInfo> workerInfoOpt = workerClusterQueryService.getWorkerInfoByAddress(instanceInfo.getAppId(), instanceInfo.getTaskTrackerAddress());
if (workerInfoOpt.isPresent()) {
ServerStopInstanceReq stopInstanceReq = new ServerStopInstanceReq(instanceId);
WorkerInfo workerInfo = workerInfoOpt.get();
final URL url = ServerURLFactory.stopInstance2Worker(workerInfo.getAddress());
transportService.tell(workerInfo.getProtocol(), url, stopInstanceReq);
}
}
/**
* 收尾完成的任务实例
*
* @param instanceId 任务实例ID
* @param wfInstanceId 工作流实例ID非必须
* @param status 任务状态,有 成功/失败/手动停止
* @param result 执行结果
*/
public void processFinishedInstance(Long instanceId, Long wfInstanceId, InstanceStatus status, String result) {
log.info("[Instance-{}] process finished, final status is {}.", instanceId, status.name());
// 上报日志数据
HashedWheelTimerHolder.INACCURATE_TIMER.schedule(() -> instanceLogService.sync(instanceId), 60, TimeUnit.SECONDS);
// workflow 特殊处理
if (wfInstanceId != null) {
// 手动停止在工作流中也认为是失败(理论上不应该发生)
workflowInstanceManager.move(wfInstanceId, instanceId, status, result);
}
// 告警
if (status == InstanceStatus.FAILED) {
alert(instanceId, result);
}
// 主动移除缓存,减小内存占用
instanceMetadataService.invalidateJobInfo(instanceId);
}
private void alert(Long instanceId, String alertContent) {
InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
JobInfoDO jobInfo;
try {
jobInfo = instanceMetadataService.fetchJobInfoByInstanceId(instanceId);
} catch (Exception e) {
log.warn("[InstanceManager-{}] can't find jobInfo, alarm failed.", instanceId);
return;
}
JobInstanceAlarm content = new JobInstanceAlarm();
BeanUtils.copyProperties(jobInfo, content);
BeanUtils.copyProperties(instanceInfo, content);
List<UserInfoDO> userList = SpringUtils.getBean(UserService.class).fetchNotifyUserList(jobInfo.getNotifyUserIds());
if (!StringUtils.isEmpty(alertContent)) {
content.setResult(alertContent);
}
alarmCenter.alarmFailed(content, AlarmUtils.convertUserInfoList2AlarmTargetList(userList));
}
@Override
public void setTransportService(TransportService transportService) {
this.transportService = transportService;
}
}

View File

@ -0,0 +1,84 @@
package tech.powerjob.server.core.instance;
import lombok.RequiredArgsConstructor;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
/**
* 存储 instance 对应的 JobInfo 信息
*
* @author tjq
* @since 2020/6/23
*/
@Service
@RequiredArgsConstructor
public class InstanceMetadataService implements InitializingBean {
private final JobInfoRepository jobInfoRepository;
private final InstanceInfoRepository instanceInfoRepository;
/**
* 缓存,一旦生成任务实例,其对应的 JobInfo 不应该再改变(即使源数据改变)
*/
private Cache<Long, JobInfoDO> instanceId2JobInfoCache;
@Value("${oms.instance.metadata.cache.size}")
private int instanceMetadataCacheSize;
private static final int CACHE_CONCURRENCY_LEVEL = 16;
@Override
public void afterPropertiesSet() throws Exception {
instanceId2JobInfoCache = CacheBuilder.newBuilder()
.concurrencyLevel(CACHE_CONCURRENCY_LEVEL)
.maximumSize(instanceMetadataCacheSize)
.softValues()
.build();
}
/**
* 根据 instanceId 获取 JobInfo
* @param instanceId instanceId
* @return JobInfoDO
* @throws ExecutionException 异常
*/
public JobInfoDO fetchJobInfoByInstanceId(Long instanceId) throws ExecutionException {
return instanceId2JobInfoCache.get(instanceId, () -> {
InstanceInfoDO instanceInfo = instanceInfoRepository.findByInstanceId(instanceId);
if (instanceInfo != null) {
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(instanceInfo.getJobId());
return jobInfoOpt.orElseThrow(() -> new IllegalArgumentException("can't find JobInfo by jobId: " + instanceInfo.getJobId()));
}
throw new IllegalArgumentException("can't find Instance by instanceId: " + instanceId);
});
}
/**
* 装载缓存
* @param instanceId instanceId
* @param jobInfoDO 原始的任务数据
*/
public void loadJobInfo(Long instanceId, JobInfoDO jobInfoDO) {
instanceId2JobInfoCache.put(instanceId, jobInfoDO);
}
/**
* 失效缓存
* @param instanceId instanceId
*/
public void invalidateJobInfo(Long instanceId) {
instanceId2JobInfoCache.invalidate(instanceId);
}
}

View File

@ -0,0 +1,351 @@
package tech.powerjob.server.core.instance;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.domain.Specification;
import org.springframework.stereotype.Service;
import tech.powerjob.common.RemoteConstant;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.InstanceDetail;
import tech.powerjob.common.model.InstanceMeta;
import tech.powerjob.common.request.ServerQueryInstanceStatusReq;
import tech.powerjob.common.request.ServerStopInstanceReq;
import tech.powerjob.common.request.query.InstancePageQuery;
import tech.powerjob.common.response.AskResponse;
import tech.powerjob.common.response.InstanceInfoDTO;
import tech.powerjob.common.response.PageResult;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.remote.framework.base.URL;
import tech.powerjob.server.common.constants.InstanceType;
import tech.powerjob.server.common.module.WorkerInfo;
import tech.powerjob.server.common.timewheel.TimerFuture;
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
import tech.powerjob.server.core.DispatchService;
import tech.powerjob.server.core.uid.IdGenerateService;
import tech.powerjob.server.persistence.QueryConvertUtils;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import tech.powerjob.server.remote.server.redirector.DesignateServer;
import tech.powerjob.server.remote.transporter.TransportService;
import tech.powerjob.server.remote.transporter.impl.ServerURLFactory;
import tech.powerjob.server.remote.worker.WorkerClusterQueryService;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import static tech.powerjob.common.enums.InstanceStatus.RUNNING;
import static tech.powerjob.common.enums.InstanceStatus.STOPPED;
/**
* 任务运行实例服务
*
* @author tjq
* @since 2020/4/11
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class InstanceService {
private final TransportService transportService;
private final DispatchService dispatchService;
private final IdGenerateService idGenerateService;
private final InstanceManager instanceManager;
private final JobInfoRepository jobInfoRepository;
private final InstanceInfoRepository instanceInfoRepository;
private final WorkerClusterQueryService workerClusterQueryService;
private final InstanceLogService instanceLogService;
/**
* 创建任务实例(注意,该方法并不调用 saveAndFlush如果有需要立即同步到DB的需求请在方法结束后手动调用 flush
* ********************************************
* 2021-02-03 modify by Echo009
* 新增 jobParams ,每次均记录任务静态参数
* ********************************************
*
* @param jobId 任务ID
* @param appId 所属应用ID
* @param jobParams 任务静态参数
* @param instanceParams 任务实例参数,仅 OpenAPI 创建 或者 工作流任务 时存在
* @param wfInstanceId 工作流任务实例ID仅工作流下的任务实例存在
* @param expectTriggerTime 预期执行时间
* @return 任务实例ID
*/
public InstanceInfoDO create(Long jobId, Long appId, String jobParams, String instanceParams, Long wfInstanceId, Long expectTriggerTime, String outerKey, String extendValue) {
Long instanceId = idGenerateService.allocate();
Date now = new Date();
InstanceInfoDO newInstanceInfo = new InstanceInfoDO();
newInstanceInfo.setJobId(jobId);
newInstanceInfo.setAppId(appId);
newInstanceInfo.setInstanceId(instanceId);
newInstanceInfo.setJobParams(jobParams);
newInstanceInfo.setInstanceParams(instanceParams);
newInstanceInfo.setType(wfInstanceId == null ? InstanceType.NORMAL.getV() : InstanceType.WORKFLOW.getV());
newInstanceInfo.setWfInstanceId(wfInstanceId);
newInstanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
newInstanceInfo.setRunningTimes(0L);
newInstanceInfo.setExpectedTriggerTime(expectTriggerTime);
newInstanceInfo.setLastReportTime(-1L);
newInstanceInfo.setOuterKey(outerKey);
newInstanceInfo.setExtendValue(extendValue);
newInstanceInfo.setGmtCreate(now);
newInstanceInfo.setGmtModified(now);
// 写入调度元信息
InstanceMeta instanceMeta = new InstanceMeta();
instanceMeta.setEtt(expectTriggerTime);
newInstanceInfo.setMeta(JsonUtils.toJSONString(instanceMeta));
instanceInfoRepository.save(newInstanceInfo);
return newInstanceInfo;
}
/**
* 停止任务实例
*
* @param instanceId 任务实例ID
*/
@DesignateServer
public void stopInstance(Long appId,Long instanceId) {
log.info("[Instance-{}] try to stop the instance instance in appId: {}", instanceId,appId);
try {
InstanceInfoDO instanceInfo = fetchInstanceInfo(instanceId);
// 判断状态,只有运行中才能停止
if (!InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(instanceInfo.getStatus())) {
throw new IllegalArgumentException("can't stop finished instance!");
}
// 更新数据库,将状态置为停止
instanceInfo.setStatus(STOPPED.getV());
instanceInfo.setGmtModified(new Date());
instanceInfo.setFinishedTime(System.currentTimeMillis());
instanceInfo.setResult(SystemInstanceResult.STOPPED_BY_USER);
instanceInfoRepository.saveAndFlush(instanceInfo);
instanceManager.processFinishedInstance(instanceId, instanceInfo.getWfInstanceId(), STOPPED, SystemInstanceResult.STOPPED_BY_USER);
/*
不可靠通知停止 TaskTracker
假如没有成功关闭,之后 TaskTracker 会再次 reportStatus按照流程instanceLog 会被更新为 RUNNING开发者可以再次手动关闭
*/
Optional<WorkerInfo> workerInfoOpt = workerClusterQueryService.getWorkerInfoByAddress(instanceInfo.getAppId(), instanceInfo.getTaskTrackerAddress());
if (workerInfoOpt.isPresent()) {
ServerStopInstanceReq req = new ServerStopInstanceReq(instanceId);
WorkerInfo workerInfo = workerInfoOpt.get();
transportService.tell(workerInfo.getProtocol(), ServerURLFactory.stopInstance2Worker(workerInfo.getAddress()), req);
log.info("[Instance-{}] update instanceInfo and send 'stopInstance' request succeed.", instanceId);
} else {
log.warn("[Instance-{}] update instanceInfo successfully but can't find TaskTracker to stop instance", instanceId);
}
} catch (IllegalArgumentException ie) {
throw ie;
} catch (Exception e) {
log.error("[Instance-{}] stopInstance failed.", instanceId, e);
throw e;
}
}
/**
* 重试任务(只有结束的任务运行重试)
*
* @param instanceId 任务实例ID
*/
@DesignateServer
public void retryInstance(Long appId, Long instanceId) {
log.info("[Instance-{}] retry instance in appId: {}", instanceId, appId);
InstanceInfoDO instanceInfo = fetchInstanceInfo(instanceId);
if (!InstanceStatus.FINISHED_STATUS.contains(instanceInfo.getStatus())) {
throw new PowerJobException("Only stopped instance can be retry!");
}
// 暂时不支持工作流任务的重试
if (instanceInfo.getWfInstanceId() != null) {
throw new PowerJobException("Workflow's instance do not support retry!");
}
instanceInfo.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
instanceInfo.setExpectedTriggerTime(System.currentTimeMillis());
instanceInfo.setFinishedTime(null);
instanceInfo.setActualTriggerTime(null);
instanceInfo.setTaskTrackerAddress(null);
instanceInfo.setResult(null);
instanceInfoRepository.saveAndFlush(instanceInfo);
// 派发任务
Long jobId = instanceInfo.getJobId();
JobInfoDO jobInfo = jobInfoRepository.findById(jobId).orElseThrow(() -> new PowerJobException("can't find job info by jobId: " + jobId));
//删除掉之前的日志文件
instanceLogService.removeOldFile(instanceId);
dispatchService.dispatch(jobInfo, instanceId,Optional.of(instanceInfo),Optional.empty());
}
/**
* 取消任务实例的运行
* 接口使用条件:调用接口时间与待取消任务的预计执行时间有一定时间间隔,否则不保证可靠性!
*
* @param instanceId 任务实例
*/
@DesignateServer
public void cancelInstance(Long appId, Long instanceId) {
log.info("[Instance-{}] try to cancel the instance with appId {}.", instanceId, appId);
try {
InstanceInfoDO instanceInfo = fetchInstanceInfo(instanceId);
TimerFuture timerFuture = InstanceTimeWheelService.fetchTimerFuture(instanceId);
boolean success;
// 本机时间轮中存在该任务且顺利取消,抢救成功!
if (timerFuture != null) {
success = timerFuture.cancel();
} else {
// 调用该接口时间和预计调度时间相近时理论上会出现问题cancel 状态还没写进去另一边就完成了 dispatch随后状态会被覆盖
// 解决该问题的成本极高(分布式锁),因此选择不解决
// 该接口使用条件:调用接口时间与待取消任务的预计执行时间有一定时间间隔,否则不保证可靠性
success = InstanceStatus.WAITING_DISPATCH.getV() == instanceInfo.getStatus();
}
if (success) {
instanceInfo.setStatus(InstanceStatus.CANCELED.getV());
instanceInfo.setResult(SystemInstanceResult.CANCELED_BY_USER);
// 如果写 DB 失败,抛异常,接口返回 false即取消失败任务会被 HA 机制重新调度执行,因此此处不需要任何处理
instanceInfoRepository.saveAndFlush(instanceInfo);
log.info("[Instance-{}] cancel the instance successfully.", instanceId);
} else {
log.warn("[Instance-{}] cancel the instance failed.", instanceId);
throw new PowerJobException("instance already up and running");
}
} catch (Exception e) {
log.error("[Instance-{}] cancelInstance failed.", instanceId, e);
throw e;
}
}
public PageResult<InstanceInfoDTO> queryInstanceInfo(InstancePageQuery instancePageQuery) {
Specification<InstanceInfoDO> specification = QueryConvertUtils.toSpecification(instancePageQuery);
Pageable pageable = QueryConvertUtils.toPageable(instancePageQuery);
Page<InstanceInfoDO> instanceInfoDOPage = instanceInfoRepository.findAll(specification, pageable);
PageResult<InstanceInfoDTO> ret = new PageResult<>();
List<InstanceInfoDTO> instanceInfoDTOList = instanceInfoDOPage.get().map(InstanceService::directConvert).collect(Collectors.toList());
ret.setData(instanceInfoDTOList)
.setIndex(instanceInfoDOPage.getNumber())
.setPageSize(instanceInfoDOPage.getSize())
.setTotalPages(instanceInfoDOPage.getTotalPages())
.setTotalItems(instanceInfoDOPage.getTotalElements());
return ret;
}
/**
* 获取任务实例的信息
*
* @param instanceId 任务实例ID
* @return 任务实例的信息
*/
public InstanceInfoDTO getInstanceInfo(Long instanceId) {
return directConvert(fetchInstanceInfo(instanceId));
}
/**
* 获取任务实例的状态
*
* @param instanceId 任务实例ID
* @return 任务实例的状态
*/
public InstanceStatus getInstanceStatus(Long instanceId) {
InstanceInfoDO instanceInfoDO = fetchInstanceInfo(instanceId);
return InstanceStatus.of(instanceInfoDO.getStatus());
}
/**
* 获取任务实例的详细运行详细
*
* @param appId 用于远程 server 路由,勿删!
* @param instanceId 任务实例ID
* @return 详细运行状态
*/
@DesignateServer
public InstanceDetail getInstanceDetail(Long appId, Long instanceId, String customQuery) {
InstanceInfoDO instanceInfoDO = fetchInstanceInfo(instanceId);
InstanceStatus instanceStatus = InstanceStatus.of(instanceInfoDO.getStatus());
InstanceDetail detail = new InstanceDetail();
detail.setStatus(instanceStatus.getV());
// 只要不是运行状态,只需要返回简要信息
if (instanceStatus != RUNNING) {
BeanUtils.copyProperties(instanceInfoDO, detail);
return detail;
}
Optional<WorkerInfo> workerInfoOpt = workerClusterQueryService.getWorkerInfoByAddress(instanceInfoDO.getAppId(), instanceInfoDO.getTaskTrackerAddress());
if (workerInfoOpt.isPresent()) {
WorkerInfo workerInfo = workerInfoOpt.get();
ServerQueryInstanceStatusReq req = new ServerQueryInstanceStatusReq(instanceId, customQuery);
try {
final URL url = ServerURLFactory.queryInstance2Worker(workerInfo.getAddress());
AskResponse askResponse = transportService.ask(workerInfo.getProtocol(), url, req, AskResponse.class)
.toCompletableFuture()
.get(RemoteConstant.DEFAULT_TIMEOUT_MS, TimeUnit.MILLISECONDS);
if (askResponse.isSuccess()) {
InstanceDetail instanceDetail = askResponse.getData(InstanceDetail.class);
instanceDetail.setRunningTimes(instanceInfoDO.getRunningTimes());
instanceDetail.setInstanceParams(instanceInfoDO.getInstanceParams());
return instanceDetail;
}else {
log.warn("[Instance-{}] ask InstanceStatus from TaskTracker failed, the message is {}.", instanceId, askResponse.getMessage());
}
} catch (Exception e) {
log.warn("[Instance-{}] ask InstanceStatus from TaskTracker failed, exception is {}", instanceId, e.toString());
}
}
// 失败则返回基础版信息
BeanUtils.copyProperties(instanceInfoDO, detail);
return detail;
}
private InstanceInfoDO fetchInstanceInfo(Long instanceId) {
InstanceInfoDO instanceInfoDO = instanceInfoRepository.findByInstanceId(instanceId);
if (instanceInfoDO == null) {
log.warn("[Instance-{}] can't find InstanceInfo by instanceId", instanceId);
throw new IllegalArgumentException("invalid instanceId: " + instanceId);
}
return instanceInfoDO;
}
private static InstanceInfoDTO directConvert(InstanceInfoDO instanceInfoDO) {
InstanceInfoDTO instanceInfoDTO = new InstanceInfoDTO();
BeanUtils.copyProperties(instanceInfoDO, instanceInfoDTO);
return instanceInfoDTO;
}
}

View File

@ -0,0 +1,74 @@
package tech.powerjob.server.core.lock;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.common.utils.NetUtils;
import tech.powerjob.server.extension.LockService;
import tech.powerjob.server.persistence.remote.model.OmsLockDO;
import tech.powerjob.server.persistence.remote.repository.OmsLockRepository;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.dao.DataIntegrityViolationException;
import org.springframework.stereotype.Service;
/**
* 基于数据库实现的分布式锁
*
* @author tjq
* @since 2020/4/5
*/
@Slf4j
@Service
public class DatabaseLockService implements LockService {
private final String ownerIp;
private final OmsLockRepository omsLockRepository;
@Autowired
public DatabaseLockService(OmsLockRepository omsLockRepository) {
this.ownerIp = NetUtils.getLocalHost();
this.omsLockRepository = omsLockRepository;
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
int num = omsLockRepository.deleteByOwnerIP(ownerIp);
log.info("[DatabaseLockService] execute shutdown hook, release all lock(owner={},num={})", ownerIp, num);
}));
}
@Override
public boolean tryLock(String name, long maxLockTime) {
OmsLockDO newLock = new OmsLockDO(name, ownerIp, maxLockTime);
try {
omsLockRepository.saveAndFlush(newLock);
return true;
} catch (DataIntegrityViolationException ignore) {
} catch (Exception e) {
log.warn("[DatabaseLockService] write lock to database failed, lockName = {}.", name, e);
}
OmsLockDO omsLockDO = omsLockRepository.findByLockName(name);
long lockedMillions = System.currentTimeMillis() - omsLockDO.getGmtCreate().getTime();
// 锁超时,强制释放锁并重新尝试获取
if (lockedMillions > omsLockDO.getMaxLockTime()) {
log.warn("[DatabaseLockService] The lock[{}] already timeout, will be unlocked now.", omsLockDO);
unlock(name);
return tryLock(name, maxLockTime);
}
return false;
}
@Override
public void unlock(String name) {
try {
CommonUtils.executeWithRetry0(() -> omsLockRepository.deleteByLockName(name));
}catch (Exception e) {
log.error("[DatabaseLockService] unlock {} failed.", name, e);
}
}
}

View File

@ -0,0 +1,24 @@
package tech.powerjob.server.core.lock;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* use cached lock to make concurrent safe
*
* @author tjq
* @author Echo009
* @since 1/16/21
*/
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface UseCacheLock {
String type();
String key();
int concurrencyLevel();
}

View File

@ -0,0 +1,83 @@
package tech.powerjob.server.core.lock;
import com.alibaba.fastjson.JSON;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.Maps;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.springframework.core.annotation.Order;
import org.springframework.stereotype.Component;
import tech.powerjob.server.common.utils.AOPUtils;
import tech.powerjob.server.monitor.MonitorService;
import tech.powerjob.server.monitor.events.lock.SlowLockEvent;
import javax.annotation.Resource;
import java.lang.reflect.Method;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
/**
* aspect for @UseSegmentLock
*
* @author tjq
* @since 1/16/21
*/
@Slf4j
@Aspect
@Component
@Order(1)
@RequiredArgsConstructor
public class UseCacheLockAspect {
private final MonitorService monitorService;
private final Map<String, Cache<String, ReentrantLock>> lockContainer = Maps.newConcurrentMap();
private static final long SLOW_THRESHOLD = 100;
@Around(value = "@annotation(useCacheLock))")
public Object execute(ProceedingJoinPoint point, UseCacheLock useCacheLock) throws Throwable {
Cache<String, ReentrantLock> lockCache = lockContainer.computeIfAbsent(useCacheLock.type(), ignore -> {
int concurrencyLevel = useCacheLock.concurrencyLevel();
log.info("[UseSegmentLockAspect] create Lock Cache for [{}] with concurrencyLevel: {}", useCacheLock.type(), concurrencyLevel);
return CacheBuilder.newBuilder()
.initialCapacity(300000)
.maximumSize(500000)
.concurrencyLevel(concurrencyLevel)
.expireAfterWrite(30, TimeUnit.MINUTES)
.build();
});
final Method method = AOPUtils.parseMethod(point);
Long key = AOPUtils.parseSpEl(method, point.getArgs(), useCacheLock.key(), Long.class, 1L);
final ReentrantLock reentrantLock = lockCache.get(String.valueOf(key), ReentrantLock::new);
long start = System.currentTimeMillis();
reentrantLock.lockInterruptibly();
try {
long timeCost = System.currentTimeMillis() - start;
if (timeCost > SLOW_THRESHOLD) {
final SlowLockEvent slowLockEvent = new SlowLockEvent()
.setType(SlowLockEvent.Type.LOCAL)
.setLockType(useCacheLock.type())
.setLockKey(String.valueOf(key))
.setCallerService(method.getDeclaringClass().getSimpleName())
.setCallerMethod(method.getName())
.setCost(timeCost);
monitorService.monitor(slowLockEvent);
log.warn("[UseSegmentLockAspect] wait lock for method({}#{}) cost {} ms! key = '{}', args = {}, ", method.getDeclaringClass().getSimpleName(), method.getName(), timeCost,
key,
JSON.toJSONString(point.getArgs()));
}
return point.proceed();
} finally {
reentrantLock.unlock();
}
}
}

View File

@ -0,0 +1,206 @@
package tech.powerjob.server.core.scheduler;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.DateUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowInstanceStatus;
import tech.powerjob.server.common.constants.PJThreadPool;
import tech.powerjob.server.common.utils.OmsFileUtils;
import tech.powerjob.server.extension.LockService;
import tech.powerjob.server.extension.dfs.DFsService;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
import tech.powerjob.server.persistence.storage.Constants;
import tech.powerjob.server.remote.worker.WorkerClusterManagerService;
import java.io.File;
import java.util.Date;
/**
* CCOChief Clean Officer
*
* @author tjq
* @since 2020/5/18
*/
@Slf4j
@Service
public class CleanService {
private final DFsService dFsService;
private final InstanceInfoRepository instanceInfoRepository;
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
private final WorkflowNodeInfoRepository workflowNodeInfoRepository;
private final LockService lockService;
private final int instanceInfoRetentionDay;
private final int localContainerRetentionDay;
private final int remoteContainerRetentionDay;
private static final int TEMPORARY_RETENTION_DAY = 3;
/**
* 每天凌晨3点定时清理
*/
private static final String CLEAN_TIME_EXPRESSION = "0 0 3 * * ?";
private static final String HISTORY_DELETE_LOCK = "history_delete_lock";
public CleanService(DFsService dFsService, InstanceInfoRepository instanceInfoRepository, WorkflowInstanceInfoRepository workflowInstanceInfoRepository,
WorkflowNodeInfoRepository workflowNodeInfoRepository, LockService lockService,
@Value("${oms.instanceinfo.retention}") int instanceInfoRetentionDay,
@Value("${oms.container.retention.local}") int localContainerRetentionDay,
@Value("${oms.container.retention.remote}") int remoteContainerRetentionDay) {
this.dFsService = dFsService;
this.instanceInfoRepository = instanceInfoRepository;
this.workflowInstanceInfoRepository = workflowInstanceInfoRepository;
this.workflowNodeInfoRepository = workflowNodeInfoRepository;
this.lockService = lockService;
this.instanceInfoRetentionDay = instanceInfoRetentionDay;
this.localContainerRetentionDay = localContainerRetentionDay;
this.remoteContainerRetentionDay = remoteContainerRetentionDay;
}
@Async(PJThreadPool.TIMING_POOL)
@Scheduled(cron = CLEAN_TIME_EXPRESSION)
public void timingClean() {
// 释放本地缓存
WorkerClusterManagerService.cleanUp();
// 释放磁盘空间
cleanLocal(OmsFileUtils.genLogDirPath(), instanceInfoRetentionDay);
cleanLocal(OmsFileUtils.genContainerJarPath(), localContainerRetentionDay);
cleanLocal(OmsFileUtils.genTemporaryPath(), TEMPORARY_RETENTION_DAY);
// 删除数据库历史的数据
cleanByOneServer();
}
/**
* 只能一台server清理的操作统一到这里执行
*/
private void cleanByOneServer() {
// 只要第一个server抢到锁其他server就会返回所以锁10分钟应该足够了
boolean lock = lockService.tryLock(HISTORY_DELETE_LOCK, 10 * 60 * 1000L);
if (!lock) {
log.info("[CleanService] clean job is already running, just return.");
return;
}
try {
// 删除数据库运行记录
cleanInstanceLog();
cleanWorkflowInstanceLog();
// 删除无用节点
cleanWorkflowNodeInfo();
// 删除 GridFS 过期文件
cleanRemote(Constants.LOG_BUCKET, instanceInfoRetentionDay);
cleanRemote(Constants.CONTAINER_BUCKET, remoteContainerRetentionDay);
} finally {
lockService.unlock(HISTORY_DELETE_LOCK);
}
}
@VisibleForTesting
public void cleanLocal(String path, int day) {
if (day < 0) {
log.info("[CleanService] won't clean up {} because of offset day <= 0.", path);
return;
}
Stopwatch stopwatch = Stopwatch.createStarted();
File dir = new File(path);
if (!dir.exists()) {
return;
}
File[] logFiles = dir.listFiles();
if (logFiles == null || logFiles.length == 0) {
return;
}
// 计算最大偏移量
long maxOffset = day * 24 * 60 * 60 * 1000L;
for (File f : logFiles) {
long offset = System.currentTimeMillis() - f.lastModified();
if (offset >= maxOffset) {
if (!f.delete()) {
log.warn("[CleanService] delete file({}) failed.", f.getName());
}else {
log.info("[CleanService] delete file({}) successfully.", f.getName());
}
}
}
log.info("[CleanService] clean {} successfully, using {}.", path, stopwatch.stop());
}
@VisibleForTesting
public void cleanRemote(String bucketName, int day) {
if (day < 0) {
log.info("[CleanService] won't clean up bucket({}) because of offset day <= 0.", bucketName);
return;
}
Stopwatch stopwatch = Stopwatch.createStarted();
try {
dFsService.cleanExpiredFiles(bucketName, day);
}catch (Exception e) {
log.warn("[CleanService] clean remote bucket({}) failed.", bucketName, e);
}
log.info("[CleanService] clean remote bucket({}) successfully, using {}.", bucketName, stopwatch.stop());
}
@VisibleForTesting
public void cleanInstanceLog() {
if (instanceInfoRetentionDay < 0) {
return;
}
try {
Date t = DateUtils.addDays(new Date(), -instanceInfoRetentionDay);
int num = instanceInfoRepository.deleteAllByGmtModifiedBeforeAndStatusIn(t, InstanceStatus.FINISHED_STATUS);
log.info("[CleanService] deleted {} instanceInfo records whose modify time before {}.", num, t);
}catch (Exception e) {
log.warn("[CleanService] clean instanceInfo failed.", e);
}
}
@VisibleForTesting
public void cleanWorkflowInstanceLog() {
if (instanceInfoRetentionDay < 0) {
return;
}
try {
Date t = DateUtils.addDays(new Date(), -instanceInfoRetentionDay);
int num = workflowInstanceInfoRepository.deleteAllByGmtModifiedBeforeAndStatusIn(t, WorkflowInstanceStatus.FINISHED_STATUS);
log.info("[CleanService] deleted {} workflow instanceInfo records whose modify time before {}.", num, t);
}catch (Exception e) {
log.warn("[CleanService] clean workflow instanceInfo failed.", e);
}
}
@VisibleForTesting
public void cleanWorkflowNodeInfo(){
try {
// 清理一天前创建的,且没有工作流 ID 的节点信息
Date t = DateUtils.addDays(new Date(), -1);
int num = workflowNodeInfoRepository.deleteAllByWorkflowIdIsNullAndGmtCreateBefore(t);
log.info("[CleanService] deleted {} node records whose create time before {} and workflowId is null.", num, t);
} catch (Exception e) {
log.warn("[CleanService] clean workflow node info failed.", e);
}
}
}

View File

@ -0,0 +1,85 @@
package tech.powerjob.server.core.scheduler;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.stereotype.Service;
import tech.powerjob.common.enums.TimeExpressionType;
import java.util.ArrayList;
import java.util.List;
/**
* @author Echo009
* @since 2022/10/12
*/
@Service
@Slf4j
@RequiredArgsConstructor
public class CoreScheduleTaskManager implements InitializingBean, DisposableBean {
private final PowerScheduleService powerScheduleService;
private final InstanceStatusCheckService instanceStatusCheckService;
private final List<Thread> coreThreadContainer = new ArrayList<>();
@SuppressWarnings("AlibabaAvoidManuallyCreateThread")
@Override
public void afterPropertiesSet() {
// 定时调度
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleCronJob", PowerScheduleService.SCHEDULE_RATE, () -> powerScheduleService.scheduleNormalJob(TimeExpressionType.CRON)), "Thread-ScheduleCronJob"));
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleDailyTimeIntervalJob", PowerScheduleService.SCHEDULE_RATE, () -> powerScheduleService.scheduleNormalJob(TimeExpressionType.DAILY_TIME_INTERVAL)), "Thread-ScheduleDailyTimeIntervalJob"));
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleCronWorkflow", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleCronWorkflow), "Thread-ScheduleCronWorkflow"));
coreThreadContainer.add(new Thread(new LoopRunnable("ScheduleFrequentJob", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::scheduleFrequentJob), "Thread-ScheduleFrequentJob"));
// 数据清理
coreThreadContainer.add(new Thread(new LoopRunnable("CleanWorkerData", PowerScheduleService.SCHEDULE_RATE, powerScheduleService::cleanData), "Thread-CleanWorkerData"));
// 状态检查
coreThreadContainer.add(new Thread(new LoopRunnable("CheckRunningInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkRunningInstance), "Thread-CheckRunningInstance"));
coreThreadContainer.add(new Thread(new LoopRunnable("CheckWaitingDispatchInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWaitingDispatchInstance), "Thread-CheckWaitingDispatchInstance"));
coreThreadContainer.add(new Thread(new LoopRunnable("CheckWaitingWorkerReceiveInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWaitingWorkerReceiveInstance), "Thread-CheckWaitingWorkerReceiveInstance"));
coreThreadContainer.add(new Thread(new LoopRunnable("CheckWorkflowInstance", InstanceStatusCheckService.CHECK_INTERVAL, instanceStatusCheckService::checkWorkflowInstance), "Thread-CheckWorkflowInstance"));
coreThreadContainer.forEach(Thread::start);
}
@Override
public void destroy() {
coreThreadContainer.forEach(Thread::interrupt);
}
@RequiredArgsConstructor
private static class LoopRunnable implements Runnable {
private final String taskName;
private final Long runningInterval;
private final Runnable innerRunnable;
@SuppressWarnings("BusyWait")
@Override
public void run() {
log.info("start task : {}.", taskName);
while (true) {
try {
// 倒置顺序为 先 sleep 再执行,解决异常情况 while true 打日志的问题 https://github.com/PowerJob/PowerJob/issues/769
Thread.sleep(runningInterval);
innerRunnable.run();
} catch (InterruptedException e) {
log.warn("[{}] task has been interrupted!", taskName, e);
break;
} catch (Exception e) {
log.error("[{}] task failed!", taskName, e);
}
}
}
}
}

View File

@ -0,0 +1,302 @@
package tech.powerjob.server.core.scheduler;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Lists;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.PageRequest;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.enums.WorkflowInstanceStatus;
import tech.powerjob.server.common.Holder;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.core.DispatchService;
import tech.powerjob.server.core.instance.InstanceManager;
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.brief.BriefInstanceInfo;
import tech.powerjob.server.persistence.remote.repository.*;
import tech.powerjob.server.remote.transporter.TransportService;
import java.util.*;
import java.util.stream.Collectors;
/**
* 定时状态检查
*
* @author tjq
* @since 2020/4/7
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class InstanceStatusCheckService {
private static final int MAX_BATCH_NUM_APP = 10;
private static final int MAX_BATCH_NUM_INSTANCE = 3000;
private static final int MAX_BATCH_UPDATE_NUM = 500;
private static final long DISPATCH_TIMEOUT_MS = 30000;
private static final long RECEIVE_TIMEOUT_MS = 60000;
private static final long RUNNING_TIMEOUT_MS = 60000;
private static final long WORKFLOW_WAITING_TIMEOUT_MS = 60000;
public static final long CHECK_INTERVAL = 10000;
private final TransportService transportService;
private final DispatchService dispatchService;
private final InstanceManager instanceManager;
private final WorkflowInstanceManager workflowInstanceManager;
private final AppInfoRepository appInfoRepository;
private final JobInfoRepository jobInfoRepository;
private final InstanceInfoRepository instanceInfoRepository;
private final WorkflowInfoRepository workflowInfoRepository;
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
public void checkWorkflowInstance() {
Stopwatch stopwatch = Stopwatch.createStarted();
// 查询 DB 获取该 Server 需要负责的 AppGroup
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[InstanceStatusChecker] current server has no app's job to check");
return;
}
try {
checkWorkflowInstance(allAppIds);
} catch (Exception e) {
log.error("[InstanceStatusChecker] WorkflowInstance status check failed.", e);
}
log.info("[InstanceStatusChecker] WorkflowInstance status check using {}.", stopwatch.stop());
}
/**
* 检查等待派发的实例
* WAITING_DISPATCH 超时:写入时间轮但未调度前 server down
*/
public void checkWaitingDispatchInstance() {
Stopwatch stopwatch = Stopwatch.createStarted();
// 查询 DB 获取该 Server 需要负责的 AppGroup
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[InstanceStatusChecker] current server has no app's job to check");
return;
}
try {
// 检查等待 WAITING_DISPATCH 状态的任务
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleWaitingDispatchInstance);
} catch (Exception e) {
log.error("[InstanceStatusChecker] WaitingDispatchInstance status check failed.", e);
}
log.info("[InstanceStatusChecker] WaitingDispatchInstance status check using {}.", stopwatch.stop());
}
/**
* 检查等待 worker 接收的实例
* WAITING_WORKER_RECEIVE 超时:由于网络错误导致 worker 未接受成功
*/
public void checkWaitingWorkerReceiveInstance() {
Stopwatch stopwatch = Stopwatch.createStarted();
// 查询 DB 获取该 Server 需要负责的 AppGroup
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[InstanceStatusChecker] current server has no app's job to check");
return;
}
try {
// 检查 WAITING_WORKER_RECEIVE 状态的任务
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleWaitingWorkerReceiveInstance);
} catch (Exception e) {
log.error("[InstanceStatusChecker] WaitingWorkerReceiveInstance status check failed.", e);
}
log.info("[InstanceStatusChecker] WaitingWorkerReceiveInstance status check using {}.", stopwatch.stop());
}
/**
* 检查运行中的实例
* RUNNING 超时TaskTracker down断开与 server 的心跳连接
*/
public void checkRunningInstance() {
Stopwatch stopwatch = Stopwatch.createStarted();
// 查询 DB 获取该 Server 需要负责的 AppGroup
List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[InstanceStatusChecker] current server has no app's job to check");
return;
}
try {
// 检查 RUNNING 状态的任务(一定时间没收到 TaskTracker 的状态报告,视为失败)
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(this::handleRunningInstance);
} catch (Exception e) {
log.error("[InstanceStatusChecker] RunningInstance status check failed.", e);
}
log.info("[InstanceStatusChecker] RunningInstance status check using {}.", stopwatch.stop());
}
private void handleWaitingDispatchInstance(List<Long> appIds) {
// 存在移除操作,需要重新创建集合,否则会导致外层抛出 NoSuchElementException: null
List<Long> partAppIds = Lists.newArrayList(appIds);
// 1. 检查等待 WAITING_DISPATCH 状态的任务
long threshold = System.currentTimeMillis() - DISPATCH_TIMEOUT_MS;
List<InstanceInfoDO> waitingDispatchInstances = instanceInfoRepository.findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
while (!waitingDispatchInstances.isEmpty()) {
List<Long> overloadAppIdList = new ArrayList<>();
long startTime = System.currentTimeMillis();
// 按照 appId 分组处理,方便处理超载的逻辑
Map<Long, List<InstanceInfoDO>> waitingDispatchInstancesMap = waitingDispatchInstances.stream().collect(Collectors.groupingBy(InstanceInfoDO::getAppId));
for (Map.Entry<Long, List<InstanceInfoDO>> entry : waitingDispatchInstancesMap.entrySet()) {
final Long currentAppId = entry.getKey();
final List<InstanceInfoDO> currentAppWaitingDispatchInstances = entry.getValue();
// collect job id
Set<Long> jobIds = currentAppWaitingDispatchInstances.stream().map(InstanceInfoDO::getJobId).collect(Collectors.toSet());
// query job info and map
Map<Long, JobInfoDO> jobInfoMap = jobInfoRepository.findByIdIn(jobIds).stream().collect(Collectors.toMap(JobInfoDO::getId, e -> e));
log.warn("[InstanceStatusChecker] find some instance in app({}) which is not triggered as expected: {}", currentAppId, currentAppWaitingDispatchInstances.stream().map(InstanceInfoDO::getInstanceId).collect(Collectors.toList()));
final Holder<Boolean> overloadFlag = new Holder<>(false);
// 先这么简单处理没问题,毕竟只有这一个地方用了 parallelStream
currentAppWaitingDispatchInstances.parallelStream().forEach(instance -> {
if (overloadFlag.get()) {
// 直接忽略
return;
}
Optional<JobInfoDO> jobInfoOpt = Optional.ofNullable(jobInfoMap.get(instance.getJobId()));
if (jobInfoOpt.isPresent()) {
// 处理等待派发的任务没有必要再重置一次状态,减少 io 次数
dispatchService.dispatch(jobInfoOpt.get(), instance.getInstanceId(), Optional.of(instance), Optional.of(overloadFlag));
} else {
log.warn("[InstanceStatusChecker] can't find job by jobId[{}], so redispatch failed, failed instance: {}", instance.getJobId(), instance);
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
opt.ifPresent(instanceInfoDO -> updateFailedInstance(instanceInfoDO, SystemInstanceResult.CAN_NOT_FIND_JOB_INFO));
}
});
threshold = System.currentTimeMillis() - DISPATCH_TIMEOUT_MS;
if (overloadFlag.get()) {
overloadAppIdList.add(currentAppId);
}
}
log.info("[InstanceStatusChecker] process {} task,use {} ms", waitingDispatchInstances.size(), System.currentTimeMillis() - startTime);
if (!overloadAppIdList.isEmpty()) {
log.warn("[InstanceStatusChecker] app[{}] is overload, so skip check waiting dispatch instance", overloadAppIdList);
partAppIds.removeAll(overloadAppIdList);
}
if (partAppIds.isEmpty()) {
break;
}
waitingDispatchInstances = instanceInfoRepository.findAllByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_DISPATCH.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
}
}
private void handleWaitingWorkerReceiveInstance(List<Long> partAppIds) {
// 2. 检查 WAITING_WORKER_RECEIVE 状态的任务
long threshold = System.currentTimeMillis() - RECEIVE_TIMEOUT_MS;
List<BriefInstanceInfo> waitingWorkerReceiveInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
while (!waitingWorkerReceiveInstances.isEmpty()) {
log.warn("[InstanceStatusChecker] find some instance didn't receive any reply from worker, try to redispatch: {}", waitingWorkerReceiveInstances.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()));
final List<List<BriefInstanceInfo>> partitions = Lists.partition(waitingWorkerReceiveInstances, MAX_BATCH_UPDATE_NUM);
for (List<BriefInstanceInfo> partition : partitions) {
dispatchService.redispatchBatchAsyncLockFree(partition.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()), InstanceStatus.WAITING_WORKER_RECEIVE.getV());
}
// 重新查询
threshold = System.currentTimeMillis() - RECEIVE_TIMEOUT_MS;
waitingWorkerReceiveInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndActualTriggerTimeLessThan(partAppIds, InstanceStatus.WAITING_WORKER_RECEIVE.getV(), threshold, PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
}
}
private void handleRunningInstance(List<Long> partAppIds) {
// 3. 检查 RUNNING 状态的任务(一定时间没收到 TaskTracker 的状态报告,视为失败)
long threshold = System.currentTimeMillis() - RUNNING_TIMEOUT_MS;
List<BriefInstanceInfo> failedInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold), PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
while (!failedInstances.isEmpty()) {
// collect job id
Set<Long> jobIds = failedInstances.stream().map(BriefInstanceInfo::getJobId).collect(Collectors.toSet());
// query job info and map
Map<Long, JobInfoDO> jobInfoMap = jobInfoRepository.findByIdIn(jobIds).stream().collect(Collectors.toMap(JobInfoDO::getId, e -> e));
log.warn("[InstanceStatusCheckService] find some instances have not received status report for a long time : {}", failedInstances.stream().map(BriefInstanceInfo::getInstanceId).collect(Collectors.toList()));
failedInstances.forEach(instance -> {
Optional<JobInfoDO> jobInfoOpt = Optional.ofNullable(jobInfoMap.get(instance.getJobId()));
if (!jobInfoOpt.isPresent()) {
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT));
return;
}
TimeExpressionType timeExpressionType = TimeExpressionType.of(jobInfoOpt.get().getTimeExpressionType());
SwitchableStatus switchableStatus = SwitchableStatus.of(jobInfoOpt.get().getStatus());
// 如果任务已关闭,则不进行重试,将任务置为失败即可;秒级任务也直接置为失败,由派发器重新调度
if (switchableStatus != SwitchableStatus.ENABLE || TimeExpressionType.FREQUENT_TYPES.contains(timeExpressionType.getV())) {
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT));
return;
}
// CRON 和 API一样失败次数 + 1根据重试配置进行重试
if (instance.getRunningTimes() < jobInfoOpt.get().getInstanceRetryNum()) {
dispatchService.redispatchAsync(instance.getInstanceId(), InstanceStatus.RUNNING.getV());
} else {
final Optional<InstanceInfoDO> opt = instanceInfoRepository.findById(instance.getId());
opt.ifPresent(e -> updateFailedInstance(e, SystemInstanceResult.REPORT_TIMEOUT));
}
});
threshold = System.currentTimeMillis() - RUNNING_TIMEOUT_MS;
failedInstances = instanceInfoRepository.selectBriefInfoByAppIdInAndStatusAndGmtModifiedBefore(partAppIds, InstanceStatus.RUNNING.getV(), new Date(threshold), PageRequest.of(0, MAX_BATCH_NUM_INSTANCE));
}
}
/**
* 定期检查工作流实例状态
* 此处仅检查并重试长时间处于 WAITING 状态的工作流实例,工作流的其他可靠性由 Instance 支撑,即子任务失败会反馈会 WorkflowInstance
*
* @param allAppIds 本系统所承担的所有 appIds
*/
private void checkWorkflowInstance(List<Long> allAppIds) {
// 重试长时间处于 WAITING 状态的工作流实例
long threshold = System.currentTimeMillis() - WORKFLOW_WAITING_TIMEOUT_MS;
Lists.partition(allAppIds, MAX_BATCH_NUM_APP).forEach(partAppIds -> {
List<WorkflowInstanceInfoDO> waitingWfInstanceList = workflowInstanceInfoRepository.findByAppIdInAndStatusAndExpectedTriggerTimeLessThan(partAppIds, WorkflowInstanceStatus.WAITING.getV(), threshold);
if (!CollectionUtils.isEmpty(waitingWfInstanceList)) {
List<Long> wfInstanceIds = waitingWfInstanceList.stream().map(WorkflowInstanceInfoDO::getWfInstanceId).collect(Collectors.toList());
log.warn("[WorkflowInstanceChecker] wfInstance({}) is not started as expected, oms try to restart these workflowInstance.", wfInstanceIds);
waitingWfInstanceList.forEach(wfInstance -> {
Optional<WorkflowInfoDO> workflowOpt = workflowInfoRepository.findById(wfInstance.getWorkflowId());
workflowOpt.ifPresent(workflowInfo -> {
workflowInstanceManager.start(workflowInfo, wfInstance.getWfInstanceId());
log.info("[Workflow-{}|{}] restart workflowInstance successfully~", workflowInfo.getId(), wfInstance.getWfInstanceId());
});
});
}
});
}
/**
* 处理失败的任务实例
*/
private void updateFailedInstance(InstanceInfoDO instance, String result) {
log.warn("[InstanceStatusChecker] instance[{}] failed due to {}, instanceInfo: {}", instance.getInstanceId(), result, instance);
instance.setStatus(InstanceStatus.FAILED.getV());
instance.setFinishedTime(System.currentTimeMillis());
instance.setGmtModified(new Date());
instance.setResult(result);
instanceInfoRepository.saveAndFlush(instance);
instanceManager.processFinishedInstance(instance.getInstanceId(), instance.getWfInstanceId(), InstanceStatus.FAILED, result);
}
}

View File

@ -0,0 +1,328 @@
package tech.powerjob.server.core.scheduler;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.model.LifeCycle;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.common.request.http.RunJobRequest;
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
import tech.powerjob.server.core.DispatchService;
import tech.powerjob.server.core.instance.InstanceService;
import tech.powerjob.server.core.service.JobService;
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.repository.AppInfoRepository;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import tech.powerjob.server.remote.transporter.TransportService;
import tech.powerjob.server.remote.worker.WorkerClusterManagerService;
import java.util.*;
/**
* 任务调度执行服务(调度 CRON 表达式的任务进行执行)
* 原FIX_RATE和FIX_DELAY任务不需要被调度创建后直接被派发到Worker执行只需要失败重试机制在InstanceStatusCheckService中完成
* 先:那样写不太优雅,东一坨代码西一坨代码的,还是牺牲点性能统一调度算了 (优雅,永不过时~ BY青钢影
*
* @author tjq
* @since 2020/4/5
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class PowerScheduleService {
/**
* 每次并发调度的应用数量
*/
private static final int MAX_APP_NUM = 10;
private final TransportService transportService;
private final DispatchService dispatchService;
private final InstanceService instanceService;
private final WorkflowInstanceManager workflowInstanceManager;
private final AppInfoRepository appInfoRepository;
private final JobInfoRepository jobInfoRepository;
private final WorkflowInfoRepository workflowInfoRepository;
private final InstanceInfoRepository instanceInfoRepository;
private final JobService jobService;
private final TimingStrategyService timingStrategyService;
public static final long SCHEDULE_RATE = 15000;
public void scheduleNormalJob(TimeExpressionType timeExpressionType) {
long start = System.currentTimeMillis();
// 调度 CRON 表达式 JOB
try {
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[NormalScheduler] current server has no app's job to schedule.");
return;
}
scheduleNormalJob0(timeExpressionType, allAppIds);
} catch (Exception e) {
log.error("[NormalScheduler] schedule cron job failed.", e);
}
long cost = System.currentTimeMillis() - start;
log.info("[NormalScheduler] {} job schedule use {} ms.", timeExpressionType, cost);
if (cost > SCHEDULE_RATE) {
log.warn("[NormalScheduler] The database query is using too much time({}ms), please check if the database load is too high!", cost);
}
}
public void scheduleCronWorkflow() {
long start = System.currentTimeMillis();
// 调度 CRON 表达式 WORKFLOW
try {
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[CronWorkflowSchedule] current server has no app's workflow to schedule.");
return;
}
scheduleWorkflowCore(allAppIds);
} catch (Exception e) {
log.error("[CronWorkflowSchedule] schedule cron workflow failed.", e);
}
long cost = System.currentTimeMillis() - start;
log.info("[CronWorkflowSchedule] cron workflow schedule use {} ms.", cost);
if (cost > SCHEDULE_RATE) {
log.warn("[CronWorkflowSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost);
}
}
public void scheduleFrequentJob() {
long start = System.currentTimeMillis();
// 调度 FIX_RATE/FIX_DELAY 表达式 JOB
try {
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (CollectionUtils.isEmpty(allAppIds)) {
log.info("[FrequentJobSchedule] current server has no app's job to schedule.");
return;
}
scheduleFrequentJobCore(allAppIds);
} catch (Exception e) {
log.error("[FrequentJobSchedule] schedule frequent job failed.", e);
}
long cost = System.currentTimeMillis() - start;
log.info("[FrequentJobSchedule] frequent job schedule use {} ms.", cost);
if (cost > SCHEDULE_RATE) {
log.warn("[FrequentJobSchedule] The database query is using too much time({}ms), please check if the database load is too high!", cost);
}
}
public void cleanData() {
try {
final List<Long> allAppIds = appInfoRepository.listAppIdByCurrentServer(transportService.defaultProtocol().getAddress());
if (allAppIds.isEmpty()) {
return;
}
WorkerClusterManagerService.clean(allAppIds);
} catch (Exception e) {
log.error("[CleanData] clean data failed.", e);
}
}
/**
* 调度普通服务端计算表达式类型CRON、DAILY_TIME_INTERVAL的任务
* @param timeExpressionType 表达式类型
* @param appIds appIds
*/
private void scheduleNormalJob0(TimeExpressionType timeExpressionType, List<Long> appIds) {
long nowTime = System.currentTimeMillis();
long timeThreshold = nowTime + 2 * SCHEDULE_RATE;
Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> {
try {
// 查询条件:任务开启 + 使用CRON表达调度时间 + 指定appId + 即将需要调度执行
List<JobInfoDO> jobInfos = jobInfoRepository.findByAppIdInAndStatusAndTimeExpressionTypeAndNextTriggerTimeLessThanEqual(partAppIds, SwitchableStatus.ENABLE.getV(), timeExpressionType.getV(), timeThreshold);
if (CollectionUtils.isEmpty(jobInfos)) {
return;
}
// 1. 批量写日志表
Map<Long, Long> jobId2InstanceId = Maps.newHashMap();
log.info("[NormalScheduler] These {} jobs will be scheduled: {}.", timeExpressionType.name(), jobInfos);
jobInfos.forEach(jobInfo -> {
Long instanceId = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(), null, null, jobInfo.getNextTriggerTime(), null, null).getInstanceId();
jobId2InstanceId.put(jobInfo.getId(), instanceId);
});
instanceInfoRepository.flush();
// 2. 推入时间轮中等待调度执行
jobInfos.forEach(jobInfoDO -> {
Long instanceId = jobId2InstanceId.get(jobInfoDO.getId());
long targetTriggerTime = jobInfoDO.getNextTriggerTime();
long delay = 0;
if (targetTriggerTime < nowTime) {
log.warn("[Job-{}] schedule delay, expect: {}, current: {}", jobInfoDO.getId(), targetTriggerTime, System.currentTimeMillis());
} else {
delay = targetTriggerTime - nowTime;
}
InstanceTimeWheelService.schedule(instanceId, delay, () -> dispatchService.dispatch(jobInfoDO, instanceId, Optional.empty(), Optional.empty()));
});
// 3. 计算下一次调度时间忽略5S内的重复执行即CRON模式下最小的连续执行间隔为 SCHEDULE_RATE ms
jobInfos.forEach(jobInfoDO -> {
try {
refreshJob(timeExpressionType, jobInfoDO);
} catch (Exception e) {
log.error("[Job-{}] refresh job failed.", jobInfoDO.getId(), e);
}
});
jobInfoRepository.flush();
} catch (Exception e) {
log.error("[NormalScheduler] schedule {} job failed.", timeExpressionType.name(), e);
}
});
}
private void scheduleWorkflowCore(List<Long> appIds) {
long nowTime = System.currentTimeMillis();
long timeThreshold = nowTime + 2 * SCHEDULE_RATE;
Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> {
List<WorkflowInfoDO> wfInfos = workflowInfoRepository.findByAppIdInAndStatusAndTimeExpressionTypeAndNextTriggerTimeLessThanEqual(partAppIds, SwitchableStatus.ENABLE.getV(), TimeExpressionType.CRON.getV(), timeThreshold);
if (CollectionUtils.isEmpty(wfInfos)) {
return;
}
wfInfos.forEach(wfInfo -> {
// 1. 先生成调度记录,防止不调度的情况发生
Long wfInstanceId = workflowInstanceManager.create(wfInfo, null, wfInfo.getNextTriggerTime(), null);
// 2. 推入时间轮,准备调度执行
long delay = wfInfo.getNextTriggerTime() - System.currentTimeMillis();
if (delay < 0) {
log.warn("[Workflow-{}] workflow schedule delay, expect:{}, actual: {}", wfInfo.getId(), wfInfo.getNextTriggerTime(), System.currentTimeMillis());
delay = 0;
}
InstanceTimeWheelService.schedule(wfInstanceId, delay, () -> workflowInstanceManager.start(wfInfo, wfInstanceId));
// 3. 重新计算下一次调度时间并更新
try {
refreshWorkflow(wfInfo);
} catch (Exception e) {
log.error("[Workflow-{}] refresh workflow failed.", wfInfo.getId(), e);
}
});
workflowInfoRepository.flush();
});
}
private void scheduleFrequentJobCore(List<Long> appIds) {
Lists.partition(appIds, MAX_APP_NUM).forEach(partAppIds -> {
try {
// 查询所有的秒级任务只包含ID
List<Long> jobIds = jobInfoRepository.findByAppIdInAndStatusAndTimeExpressionTypeIn(partAppIds, SwitchableStatus.ENABLE.getV(), TimeExpressionType.FREQUENT_TYPES);
if (CollectionUtils.isEmpty(jobIds)) {
return;
}
// 查询日志记录表中是否存在相关的任务
List<Long> runningJobIdList = instanceInfoRepository.findByJobIdInAndStatusIn(jobIds, InstanceStatus.GENERALIZED_RUNNING_STATUS);
Set<Long> runningJobIdSet = Sets.newHashSet(runningJobIdList);
List<Long> notRunningJobIds = Lists.newLinkedList();
jobIds.forEach(jobId -> {
if (!runningJobIdSet.contains(jobId)) {
notRunningJobIds.add(jobId);
}
});
if (CollectionUtils.isEmpty(notRunningJobIds)) {
return;
}
notRunningJobIds.forEach(jobId -> {
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(jobId);
jobInfoOpt.ifPresent(jobInfoDO -> {
LifeCycle lifeCycle = LifeCycle.parse(jobInfoDO.getLifecycle());
// 生命周期已经结束
if (lifeCycle.getEnd() != null && lifeCycle.getEnd() < System.currentTimeMillis()) {
jobInfoDO.setStatus(SwitchableStatus.DISABLE.getV());
jobInfoDO.setGmtModified(new Date());
jobInfoRepository.saveAndFlush(jobInfoDO);
log.info("[FrequentScheduler] disable frequent job,id:{}.", jobInfoDO.getId());
} else if (lifeCycle.getStart() == null || lifeCycle.getStart() < System.currentTimeMillis() + SCHEDULE_RATE * 2) {
log.info("[FrequentScheduler] schedule frequent job,id:{}.", jobInfoDO.getId());
RunJobRequest runJobRequest = new RunJobRequest()
.setAppId(jobInfoDO.getAppId()).setJobId(jobId).setDelay(Optional.ofNullable(lifeCycle.getStart()).orElse(0L) - System.currentTimeMillis());
jobService.runJob(runJobRequest.getAppId(), runJobRequest);
}
});
});
} catch (Exception e) {
log.error("[FrequentScheduler] schedule frequent job failed.", e);
}
});
}
private void refreshJob(TimeExpressionType timeExpressionType, JobInfoDO jobInfo) {
LifeCycle lifeCycle = LifeCycle.parse(jobInfo.getLifecycle());
Long nextTriggerTime = timingStrategyService.calculateNextTriggerTime(jobInfo.getNextTriggerTime(), timeExpressionType, jobInfo.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
JobInfoDO updatedJobInfo = new JobInfoDO();
BeanUtils.copyProperties(jobInfo, updatedJobInfo);
if (nextTriggerTime == null) {
log.warn("[Job-{}] this job won't be scheduled anymore, system will set the status to DISABLE!", jobInfo.getId());
updatedJobInfo.setStatus(SwitchableStatus.DISABLE.getV());
} else {
updatedJobInfo.setNextTriggerTime(nextTriggerTime);
}
updatedJobInfo.setGmtModified(new Date());
jobInfoRepository.save(updatedJobInfo);
}
private void refreshWorkflow(WorkflowInfoDO wfInfo) {
LifeCycle lifeCycle = LifeCycle.parse(wfInfo.getLifecycle());
Long nextTriggerTime = timingStrategyService.calculateNextTriggerTime(wfInfo.getNextTriggerTime(), TimeExpressionType.CRON, wfInfo.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
WorkflowInfoDO updateEntity = new WorkflowInfoDO();
BeanUtils.copyProperties(wfInfo, updateEntity);
if (nextTriggerTime == null) {
log.warn("[Workflow-{}] this workflow won't be scheduled anymore, system will set the status to DISABLE!", wfInfo.getId());
updateEntity.setStatus(SwitchableStatus.DISABLE.getV());
} else {
updateEntity.setNextTriggerTime(nextTriggerTime);
}
updateEntity.setGmtModified(new Date());
workflowInfoRepository.save(updateEntity);
}
}

View File

@ -0,0 +1,122 @@
package tech.powerjob.server.core.scheduler;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.springframework.stereotype.Service;
import tech.powerjob.common.OmsConstant;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.server.core.scheduler.auxiliary.TimingStrategyHandler;
import java.util.*;
import java.util.stream.Collectors;
/**
* @author Echo009
* @since 2022/3/21
*/
@Slf4j
@Service
public class TimingStrategyService {
private static final int NEXT_N_TIMES = 5;
private static final List<String> TIPS = Collections.singletonList("It is valid, but has not trigger time list!");
private final Map<TimeExpressionType, TimingStrategyHandler> strategyContainer;
public TimingStrategyService(List<TimingStrategyHandler> timingStrategyHandlers) {
// init
strategyContainer = new EnumMap<>(TimeExpressionType.class);
for (TimingStrategyHandler timingStrategyHandler : timingStrategyHandlers) {
strategyContainer.put(timingStrategyHandler.supportType(), timingStrategyHandler);
}
}
/**
* 计算接下来几次的调度时间
*
* @param timeExpressionType 定时表达式类型
* @param timeExpression 表达式
* @param startTime 起始时间(include)
* @param endTime 结束时间(include)
* @return 调度时间列表
*/
public List<String> calculateNextTriggerTimes(TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
TimingStrategyHandler timingStrategyHandler = getHandler(timeExpressionType);
List<Long> triggerTimeList = new ArrayList<>(NEXT_N_TIMES);
Long nextTriggerTime = System.currentTimeMillis();
do {
nextTriggerTime = timingStrategyHandler.calculateNextTriggerTime(nextTriggerTime, timeExpression, startTime, endTime);
if (nextTriggerTime == null) {
break;
}
triggerTimeList.add(nextTriggerTime);
} while (triggerTimeList.size() < NEXT_N_TIMES);
if (triggerTimeList.isEmpty()) {
return TIPS;
}
return triggerTimeList.stream().map(t -> DateFormatUtils.format(t, OmsConstant.TIME_PATTERN)).collect(Collectors.toList());
}
/**
* 计算下次的调度时间
*
* @param preTriggerTime 上次触发时间(nullable)
* @param timeExpressionType 定时表达式类型
* @param timeExpression 表达式
* @param startTime 起始时间(include)
* @param endTime 结束时间(include)
* @return 下次的调度时间
*/
public Long calculateNextTriggerTime(Long preTriggerTime, TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
if (preTriggerTime == null || preTriggerTime < System.currentTimeMillis()) {
preTriggerTime = System.currentTimeMillis();
}
return getHandler(timeExpressionType).calculateNextTriggerTime(preTriggerTime, timeExpression, startTime, endTime);
}
/**
* 计算下次的调度时间并检查校验规则
*
* @param timeExpressionType 定时表达式类型
* @param timeExpression 表达式
* @param startTime 起始时间(include)
* @param endTime 结束时间(include)
* @return 下次的调度时间
*/
public Long calculateNextTriggerTimeWithInspection( TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
Long nextTriggerTime = calculateNextTriggerTime(null, timeExpressionType, timeExpression, startTime, endTime);
if (TimeExpressionType.INSPECT_TYPES.contains(timeExpressionType.getV()) && nextTriggerTime == null) {
throw new PowerJobException("time expression is out of date: " + timeExpression);
}
return nextTriggerTime;
}
public void validate(TimeExpressionType timeExpressionType, String timeExpression, Long startTime, Long endTime) {
if (endTime != null) {
if (endTime <= System.currentTimeMillis()) {
throw new PowerJobException("lifecycle is out of date!");
}
if (startTime != null && startTime > endTime) {
throw new PowerJobException("lifecycle is invalid! start time must earlier then end time.");
}
}
getHandler(timeExpressionType).validate(timeExpression);
}
private TimingStrategyHandler getHandler(TimeExpressionType timeExpressionType) {
TimingStrategyHandler timingStrategyHandler = strategyContainer.get(timeExpressionType);
if (timingStrategyHandler == null) {
throw new PowerJobException("No matching TimingStrategyHandler for this TimeExpressionType:" + timeExpressionType);
}
return timingStrategyHandler;
}
}

View File

@ -0,0 +1,19 @@
package tech.powerjob.server.core.scheduler.auxiliary;
/**
* @author Echo009
* @since 2022/3/22
*/
public abstract class AbstractTimingStrategyHandler implements TimingStrategyHandler {
@Override
public void validate(String timeExpression) {
// do nothing
}
@Override
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
// do nothing
return null;
}
}

View File

@ -0,0 +1,238 @@
package tech.powerjob.server.core.scheduler.auxiliary;
import java.io.Serializable;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Date;
import java.util.TimeZone;
/**
* Represents a time in hour, minute and second of any given day.
*
* <p>The hour is in 24-hour convention, meaning values are from 0 to 23.</p>
* <a href="https://github.com/quartz-scheduler/quartz">PowerJob learn from quartz</a>
*
* @since 2.0.3
*
* @author James House
* @author Zemian Deng <saltnlight5@gmail.com>
*/
public class TimeOfDay implements Serializable {
private static final long serialVersionUID = 2964774315889061771L;
private final int hour;
private final int minute;
private final int second;
/**
* Create a TimeOfDay instance for the given hour, minute and second.
*
* @param hour The hour of day, between 0 and 23.
* @param minute The minute of the hour, between 0 and 59.
* @param second The second of the minute, between 0 and 59.
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
*/
public TimeOfDay(int hour, int minute, int second) {
this.hour = hour;
this.minute = minute;
this.second = second;
validate();
}
/**
* Create a TimeOfDay instance for the given hour and minute (at the zero second of the minute).
*
* @param hour The hour of day, between 0 and 23.
* @param minute The minute of the hour, between 0 and 59.
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
*/
public TimeOfDay(int hour, int minute) {
this.hour = hour;
this.minute = minute;
this.second = 0;
validate();
}
private void validate() {
if(hour < 0 || hour > 23)
throw new IllegalArgumentException("Hour must be from 0 to 23");
if(minute < 0 || minute > 59)
throw new IllegalArgumentException("Minute must be from 0 to 59");
if(second < 0 || second > 59)
throw new IllegalArgumentException("Second must be from 0 to 59");
}
/**
* Create a TimeOfDay instance for the given hour, minute and second.
*
* @param hour The hour of day, between 0 and 23.
* @param minute The minute of the hour, between 0 and 59.
* @param second The second of the minute, between 0 and 59.
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
*/
public static TimeOfDay hourMinuteAndSecondOfDay(int hour, int minute, int second) {
return new TimeOfDay(hour, minute, second);
}
/**
* Create a TimeOfDay instance for the given hour and minute (at the zero second of the minute).
*
* @param hour The hour of day, between 0 and 23.
* @param minute The minute of the hour, between 0 and 59.
* @throws IllegalArgumentException if one or more of the input values is out of their valid range.
*/
public static TimeOfDay hourAndMinuteOfDay(int hour, int minute) {
return new TimeOfDay(hour, minute);
}
/**
* The hour of the day (between 0 and 23).
*
* @return The hour of the day (between 0 and 23).
*/
public int getHour() {
return hour;
}
/**
* The minute of the hour.
*
* @return The minute of the hour (between 0 and 59).
*/
public int getMinute() {
return minute;
}
/**
* The second of the minute.
*
* @return The second of the minute (between 0 and 59).
*/
public int getSecond() {
return second;
}
/**
* Determine with this time of day is before the given time of day.
*
* @return true this time of day is before the given time of day.
*/
public boolean before(TimeOfDay timeOfDay) {
if(timeOfDay.hour > hour)
return true;
if(timeOfDay.hour < hour)
return false;
if(timeOfDay.minute > minute)
return true;
if(timeOfDay.minute < minute)
return false;
if(timeOfDay.second > second)
return true;
if(timeOfDay.second < second)
return false;
return false; // must be equal...
}
@Override
public boolean equals(Object obj) {
if(!(obj instanceof TimeOfDay))
return false;
TimeOfDay other = (TimeOfDay)obj;
return (other.hour == hour && other.minute == minute && other.second == second);
}
@Override
public int hashCode() {
return (hour + 1) ^ (minute + 1) ^ (second + 1);
}
/** Return a date with time of day reset to this object values. The millisecond value will be zero. */
public Date getTimeOfDayForDate(Date dateTime) {
if (dateTime == null)
return null;
Calendar cal = Calendar.getInstance();
cal.setTime(dateTime);
cal.set(Calendar.HOUR_OF_DAY, hour);
cal.set(Calendar.MINUTE, minute);
cal.set(Calendar.SECOND, second);
cal.clear(Calendar.MILLISECOND);
return cal.getTime();
}
/**
* Create a TimeOfDay from the given date, in the system default TimeZone.
*
* @param dateTime The java.util.Date from which to extract Hour, Minute and Second.
*/
public static TimeOfDay hourAndMinuteAndSecondFromDate(Date dateTime) {
return hourAndMinuteAndSecondFromDate(dateTime, null);
}
/**
* Create a TimeOfDay from the given date, in the given TimeZone.
*
* @param dateTime The java.util.Date from which to extract Hour, Minute and Second.
* @param tz The TimeZone from which relate Hour, Minute and Second for the given date. If null, system default
* TimeZone will be used.
*/
public static TimeOfDay hourAndMinuteAndSecondFromDate(Date dateTime, TimeZone tz) {
if (dateTime == null)
return null;
Calendar cal = Calendar.getInstance();
cal.setTime(dateTime);
if(tz != null)
cal.setTimeZone(tz);
return new TimeOfDay(cal.get(Calendar.HOUR_OF_DAY), cal.get(Calendar.MINUTE), cal.get(Calendar.SECOND));
}
/**
* Create a TimeOfDay from the given date (at the zero-second), in the system default TimeZone.
*
* @param dateTime The java.util.Date from which to extract Hour and Minute.
*/
public static TimeOfDay hourAndMinuteFromDate(Date dateTime) {
return hourAndMinuteFromDate(dateTime, null);
}
/**
* Create a TimeOfDay from the given date (at the zero-second), in the system default TimeZone.
*
* @param dateTime The java.util.Date from which to extract Hour and Minute.
* @param tz The TimeZone from which relate Hour and Minute for the given date. If null, system default
* TimeZone will be used.
*/
public static TimeOfDay hourAndMinuteFromDate(Date dateTime, TimeZone tz) {
if (dateTime == null)
return null;
Calendar cal = Calendar.getInstance();
cal.setTime(dateTime);
if(tz != null)
cal.setTimeZone(tz);
return new TimeOfDay(cal.get(Calendar.HOUR_OF_DAY), cal.get(Calendar.MINUTE));
}
public static TimeOfDay from(String hms) {
String[] split = hms.split(":");
if (split.length != 3) {
throw new IllegalArgumentException("invalid TimeOfDay, make pattern like 15:30:10");
}
return new TimeOfDay(Integer.parseInt(split[0]), Integer.parseInt(split[1]), Integer.parseInt(split[2]));
}
@Override
public String toString() {
return "TimeOfDay[" + hour + ":" + minute + ":" + second + "]";
}
}

View File

@ -0,0 +1,37 @@
package tech.powerjob.server.core.scheduler.auxiliary;
import tech.powerjob.common.enums.TimeExpressionType;
/**
* @author Echo009
* @since 2022/2/24
*/
public interface TimingStrategyHandler {
/**
* 校验表达式
*
* @param timeExpression 时间表达式
*/
void validate(String timeExpression);
/**
* 计算下次触发时间
*
* @param preTriggerTime 上次触发时间 (not null)
* @param timeExpression 时间表达式
* @param startTime 开始时间(include)
* @param endTime 结束时间(include)
* @return next trigger time
*/
Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime);
/**
* 支持的定时策略
*
* @return TimeExpressionType
*/
TimeExpressionType supportType();
}

View File

@ -0,0 +1,17 @@
package tech.powerjob.server.core.scheduler.auxiliary.impl;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
/**
* @author Echo009
* @since 2022/3/22
*/
@Component
public class ApiTimingStrategyHandler extends AbstractTimingStrategyHandler {
@Override
public TimeExpressionType supportType() {
return TimeExpressionType.API;
}
}

View File

@ -0,0 +1,77 @@
package tech.powerjob.server.core.scheduler.auxiliary.impl;
import com.cronutils.model.Cron;
import com.cronutils.model.definition.CronDefinition;
import com.cronutils.model.definition.CronDefinitionBuilder;
import com.cronutils.model.time.ExecutionTime;
import com.cronutils.parser.CronParser;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.server.core.scheduler.auxiliary.TimingStrategyHandler;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.Optional;
/**
* @author Echo009
* @since 2022/2/24
*/
@Component
public class CronTimingStrategyHandler implements TimingStrategyHandler {
private final CronParser cronParser;
/**
* @see CronDefinitionBuilder#instanceDefinitionFor
* <p>
* Enhanced quartz cronSupport for specifying both a day-of-week and a day-of-month parameter.
* https://github.com/PowerJob/PowerJob/issues/382
*/
public CronTimingStrategyHandler() {
CronDefinition cronDefinition = CronDefinitionBuilder.defineCron()
.withSeconds().withValidRange(0, 59).and()
.withMinutes().withValidRange(0, 59).and()
.withHours().withValidRange(0, 23).and()
.withDayOfMonth().withValidRange(1, 31).supportsL().supportsW().supportsLW().supportsQuestionMark().and()
.withMonth().withValidRange(1, 12).and()
.withDayOfWeek().withValidRange(1, 7).withMondayDoWValue(2).supportsHash().supportsL().supportsQuestionMark().and()
.withYear().withValidRange(1970, 2099).withStrictRange().optional().and()
.instance();
this.cronParser = new CronParser(cronDefinition);
}
@Override
public void validate(String timeExpression) {
cronParser.parse(timeExpression);
}
@Override
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
Cron cron = cronParser.parse(timeExpression);
ExecutionTime executionTime = ExecutionTime.forCron(cron);
if (startTime != null && startTime > System.currentTimeMillis() && preTriggerTime < startTime) {
// 需要计算出离 startTime 最近的一次真正的触发时间
Optional<ZonedDateTime> zonedDateTime = executionTime.lastExecution(ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTime), ZoneId.systemDefault()));
preTriggerTime = zonedDateTime.map(dateTime -> dateTime.toEpochSecond() * 1000).orElse(startTime);
}
Instant instant = Instant.ofEpochMilli(preTriggerTime);
ZonedDateTime preZonedDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
Optional<ZonedDateTime> opt = executionTime.nextExecution(preZonedDateTime);
if (opt.isPresent()) {
long nextTriggerTime = opt.get().toEpochSecond() * 1000;
if (endTime != null && endTime < nextTriggerTime) {
return null;
}
return nextTriggerTime;
}
return null;
}
@Override
public TimeExpressionType supportType() {
return TimeExpressionType.CRON;
}
}

View File

@ -0,0 +1,166 @@
package tech.powerjob.server.core.scheduler.auxiliary.impl;
import com.google.common.collect.Sets;
import lombok.Data;
import lombok.SneakyThrows;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.common.utils.CollectionUtils;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.server.common.utils.TimeUtils;
import tech.powerjob.server.core.scheduler.auxiliary.TimeOfDay;
import tech.powerjob.server.core.scheduler.auxiliary.TimingStrategyHandler;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Date;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
/**
* DailyTimeIntervalStrategyHandler
* @author 550w
* @date 2027/02/15
*/
@Component
public class DailyTimeIntervalStrategyHandler implements TimingStrategyHandler {
/**
* 使用中国星期!!!
*/
private static final Set<Integer> ALL_DAY = Sets.newHashSet(1, 2, 3, 4, 5, 6, 7);
@Override
public TimeExpressionType supportType() {
return TimeExpressionType.DAILY_TIME_INTERVAL;
}
@Override
@SneakyThrows
public void validate(String timeExpression) {
DailyTimeIntervalExpress ep = JsonUtils.parseObject(timeExpression, DailyTimeIntervalExpress.class);
CommonUtils.requireNonNull(ep.interval, "interval can't be null or empty in DailyTimeIntervalExpress");
CommonUtils.requireNonNull(ep.startTimeOfDay, "startTimeOfDay can't be null or empty in DailyTimeIntervalExpress");
CommonUtils.requireNonNull(ep.endTimeOfDay, "endTimeOfDay can't be null or empty in DailyTimeIntervalExpress");
TimeOfDay startTime = TimeOfDay.from(ep.startTimeOfDay);
TimeOfDay endTime = TimeOfDay.from(ep.endTimeOfDay);
if (endTime.before(startTime)) {
throw new IllegalArgumentException("endTime should after startTime!");
}
if (StringUtils.isNotEmpty(ep.intervalUnit)) {
TimeUnit.valueOf(ep.intervalUnit);
}
}
@Override
@SneakyThrows
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
DailyTimeIntervalExpress ep = JsonUtils.parseObject(timeExpression, DailyTimeIntervalExpress.class);
// 未开始状态下,用起点算调度时间
if (startTime != null && startTime > System.currentTimeMillis() && preTriggerTime < startTime) {
return calculateInRangeTime(startTime, ep);
}
// 间隔时间
TimeUnit timeUnit = Optional.ofNullable(ep.intervalUnit).map(TimeUnit::valueOf).orElse(TimeUnit.SECONDS);
long interval = timeUnit.toMillis(ep.interval);
Long ret = calculateInRangeTime(preTriggerTime + interval, ep);
if (ret == null || ret <= Optional.ofNullable(endTime).orElse(Long.MAX_VALUE)) {
return ret;
}
return null;
}
/**
* 计算最近一次在范围中的时间
* @param time 当前时间基准,可能直接返回该时间作为结果
* @param ep 表达式
* @return 最近一次在范围中的时间
*/
static Long calculateInRangeTime(Long time, DailyTimeIntervalExpress ep) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date(time));
int year = calendar.get(Calendar.YEAR);
// 月份 + 1转为熟悉的 112 月
int month = calendar.get(Calendar.MONTH) + 1;
int day = calendar.get(Calendar.DAY_OF_MONTH);
// 判断是否符合"日"的执行条件
int week = TimeUtils.calculateWeek(year, month, day);
Set<Integer> targetDays = CollectionUtils.isEmpty(ep.daysOfWeek) ? ALL_DAY : ep.daysOfWeek;
// 未包含情况下,将时间改写为符合条件日的 00:00 分,重新开始递归(这部分应该有性能更优的写法,不过这个调度模式应该很难触发瓶颈,先简单好用的实现)
if (!targetDays.contains(week)) {
simpleSetCalendar(calendar, 0, 0, 0);
Date tomorrowZero = DateUtils.addDays(calendar.getTime(), 1);
return calculateInRangeTime(tomorrowZero.getTime(), ep);
}
// 范围的开始时间
TimeOfDay rangeStartTime = TimeOfDay.from(ep.startTimeOfDay);
simpleSetCalendar(calendar, rangeStartTime.getHour(), rangeStartTime.getMinute(), rangeStartTime.getSecond());
long todayStartTs = calendar.getTimeInMillis();
// 未开始
if (time < todayStartTs) {
return todayStartTs;
}
TimeOfDay rangeEndTime = TimeOfDay.from(ep.endTimeOfDay);
simpleSetCalendar(calendar, rangeEndTime.getHour(), rangeEndTime.getMinute(), rangeEndTime.getSecond());
long todayEndTs = calendar.getTimeInMillis();
// 范围之间
if (time <= todayEndTs) {
return time;
}
// 已结束,重新计算第二天时间
simpleSetCalendar(calendar, 0, 0, 0);
return calculateInRangeTime(DateUtils.addDays(calendar.getTime(), 1).getTime(), ep);
}
private static void simpleSetCalendar(Calendar calendar, int h, int m, int s) {
calendar.set(Calendar.SECOND, s);
calendar.set(Calendar.MINUTE, m);
calendar.set(Calendar.HOUR_OF_DAY, h);
calendar.set(Calendar.MILLISECOND, 0);
}
@Data
static class DailyTimeIntervalExpress implements Serializable {
/**
* 时间间隔
*/
private Long interval;
/**
* 每天激活的时间起点格式为18:30:00 代表 18点30分00秒激活
*/
private String startTimeOfDay;
/**
* 每日激活的时间终点,格式同上
*/
private String endTimeOfDay;
/* ************ 非必填字段 ************ */
/**
* 时间单位,默认秒
*/
private String intervalUnit;
/**
* 每周的哪几天激活,空代表每天都激活
*/
private Set<Integer> daysOfWeek;
}
}

View File

@ -0,0 +1,38 @@
package tech.powerjob.server.core.scheduler.auxiliary.impl;
import org.springframework.stereotype.Component;
import tech.powerjob.common.PowerJobDKey;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
/**
* @author Echo009
* @since 2022/3/22
*/
@Component
public class FixedDelayTimingStrategyHandler extends AbstractTimingStrategyHandler {
@Override
public void validate(String timeExpression) {
long delay;
try {
delay = Long.parseLong(timeExpression);
} catch (Exception e) {
throw new PowerJobException("invalid timeExpression!");
}
// 默认 120s ,超过这个限制应该考虑使用其他类型以减少资源占用
int maxInterval = Integer.parseInt(System.getProperty(PowerJobDKey.FREQUENCY_JOB_MAX_INTERVAL, "120000"));
if (delay > maxInterval) {
throw new PowerJobException("the delay must be less than " + maxInterval + "ms");
}
if (delay <= 0) {
throw new PowerJobException("the delay must be greater than 0 ms");
}
}
@Override
public TimeExpressionType supportType() {
return TimeExpressionType.FIXED_DELAY;
}
}

View File

@ -0,0 +1,46 @@
package tech.powerjob.server.core.scheduler.auxiliary.impl;
import org.springframework.stereotype.Component;
import tech.powerjob.common.PowerJobDKey;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
/**
* @author Echo009
* @since 2022/3/22
*/
@Component
public class FixedRateTimingStrategyHandler extends AbstractTimingStrategyHandler {
@Override
public void validate(String timeExpression) {
long delay;
try {
delay = Long.parseLong(timeExpression);
} catch (Exception e) {
throw new PowerJobException("invalid timeExpression!");
}
// 默认 120s ,超过这个限制应该使用考虑使用其他类型以减少资源占用
int maxInterval = Integer.parseInt(System.getProperty(PowerJobDKey.FREQUENCY_JOB_MAX_INTERVAL, "120000"));
if (delay > maxInterval) {
throw new PowerJobException("the rate must be less than " + maxInterval + "ms");
}
if (delay <= 0) {
throw new PowerJobException("the rate must be greater than 0 ms");
}
}
@Override
public Long calculateNextTriggerTime(Long preTriggerTime, String timeExpression, Long startTime, Long endTime) {
long r = startTime != null && startTime > preTriggerTime
? startTime : preTriggerTime + Long.parseLong(timeExpression);
return endTime != null && endTime < r ? null : r;
}
@Override
public TimeExpressionType supportType() {
return TimeExpressionType.FIXED_RATE;
}
}

View File

@ -0,0 +1,17 @@
package tech.powerjob.server.core.scheduler.auxiliary.impl;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.server.core.scheduler.auxiliary.AbstractTimingStrategyHandler;
/**
* @author Echo009
* @since 2022/3/22
*/
@Component
public class WorkflowTimingStrategyHandler extends AbstractTimingStrategyHandler {
@Override
public TimeExpressionType supportType() {
return TimeExpressionType.WORKFLOW;
}
}

View File

@ -0,0 +1,46 @@
package tech.powerjob.server.core.service;
import tech.powerjob.server.persistence.remote.model.AppInfoDO;
import java.util.Optional;
/**
* AppInfoService
*
* @author tjq
* @since 2023/3/4
*/
public interface AppInfoService {
Optional<AppInfoDO> findByAppName(String appName);
/**
* 获取 AppInfo带缓存
* @param appId appId
* @param useCache cache
* @return App 信息
*/
Optional<AppInfoDO> findById(Long appId, boolean useCache);
void deleteById(Long appId);
/**
* 保存 App
* @param appInfo app 信息
* @return 保存后结果
*/
AppInfoDO save(AppInfoDO appInfo);
/**
*
* @param appName 验证 APP 账号密码
* @param password 密码
* @param encryptType 密码类型
* @return appId
*/
Long assertApp(String appName, String password, String encryptType);
Long assertApp(AppInfoDO appInfo, String password, String encryptType);
String fetchOriginAppPassword(AppInfoDO appInfo);
}

View File

@ -0,0 +1,138 @@
package tech.powerjob.server.core.service;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import java.time.Duration;
import java.util.Optional;
/**
* 本地缓存常用数据查询操作
*
* @author tjq
* @since 2020/4/14
*/
@Slf4j
@Service
public class CacheService {
private final JobInfoRepository jobInfoRepository;
private final WorkflowInfoRepository workflowInfoRepository;
private final InstanceInfoRepository instanceInfoRepository;
private final Cache<Long, String> jobId2JobNameCache;
private final Cache<Long, String> workflowId2WorkflowNameCache;
private final Cache<Long, Long> instanceId2AppId;
private final Cache<Long, Long> jobId2AppId;
public CacheService(JobInfoRepository jobInfoRepository, WorkflowInfoRepository workflowInfoRepository, InstanceInfoRepository instanceInfoRepository) {
this.jobInfoRepository = jobInfoRepository;
this.workflowInfoRepository = workflowInfoRepository;
this.instanceInfoRepository = instanceInfoRepository;
jobId2JobNameCache = CacheBuilder.newBuilder()
.expireAfterWrite(Duration.ofMinutes(1))
.maximumSize(512)
.softValues()
.build();
workflowId2WorkflowNameCache = CacheBuilder.newBuilder()
.expireAfterWrite(Duration.ofMinutes(1))
.maximumSize(512)
.softValues()
.build();
instanceId2AppId = CacheBuilder.newBuilder()
.maximumSize(1024)
.softValues()
.build();
jobId2AppId = CacheBuilder.newBuilder()
.maximumSize(1024)
.softValues()
.build();
}
/**
* 根据 jobId 查询 jobName不保证数据一致性或者说只要改了数据必不一致hhh
* @param jobId 任务ID
* @return 任务名称
*/
public String getJobName(Long jobId) {
try {
return jobId2JobNameCache.get(jobId, () -> {
Optional<JobInfoDO> jobInfoDOOptional = jobInfoRepository.findById(jobId);
// 防止缓存穿透 hhh但是一开始没有后来创建的情况下会有问题不过问题不大这里就不管了
return jobInfoDOOptional.map(JobInfoDO::getJobName).orElse("");
});
}catch (Exception e) {
log.error("[CacheService] getJobName for {} failed.", jobId, e);
}
return null;
}
/**
* 根据 workflowId 查询 工作流名称
* @param workflowId 工作流ID
* @return 工作流名称
*/
public String getWorkflowName(Long workflowId) {
try {
return workflowId2WorkflowNameCache.get(workflowId, () -> {
Optional<WorkflowInfoDO> jobInfoDOOptional = workflowInfoRepository.findById(workflowId);
// 防止缓存穿透 hhh但是一开始没有后来创建的情况下会有问题不过问题不大这里就不管了
return jobInfoDOOptional.map(WorkflowInfoDO::getWfName).orElse("");
});
}catch (Exception e) {
log.error("[CacheService] getWorkflowName for {} failed.", workflowId, e);
}
return null;
}
public Long getAppIdByInstanceId(Long instanceId) {
try {
return instanceId2AppId.get(instanceId, () -> {
// 内部记录数据库异常
try {
InstanceInfoDO instanceLog = instanceInfoRepository.findByInstanceId(instanceId);
if (instanceLog != null) {
return instanceLog.getAppId();
}
}catch (Exception e) {
log.error("[CacheService] getAppId for instanceId:{} failed.", instanceId, e);
}
return null;
});
}catch (Exception ignore) {
// 忽略缓存 load 失败的异常
}
return null;
}
public Long getAppIdByJobId(Long jobId) {
try {
return jobId2AppId.get(jobId, () -> {
try {
Optional<JobInfoDO> jobInfoDOOptional = jobInfoRepository.findById(jobId);
return jobInfoDOOptional.map(JobInfoDO::getAppId).orElse(null);
}catch (Exception e) {
log.error("[CacheService] getAppId for job:{} failed.", jobId, e);
}
return null;
});
} catch (Exception ignore) {
}
return null;
}
}

View File

@ -0,0 +1,38 @@
package tech.powerjob.server.core.service;
import tech.powerjob.common.PowerQuery;
import tech.powerjob.common.request.http.RunJobRequest;
import tech.powerjob.common.request.http.SaveJobInfoRequest;
import tech.powerjob.common.response.JobInfoDTO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import java.util.List;
/**
* JobService
*
* @author tjq
* @since 2023/3/4
*/
public interface JobService {
Long saveJob(SaveJobInfoRequest request);
JobInfoDO copyJob(Long jobId);
JobInfoDTO fetchJob(Long jobId);
List<JobInfoDTO> fetchAllJob(Long appId);
List<JobInfoDTO> queryJob(PowerQuery powerQuery);
long runJob(Long appId, RunJobRequest runJobRequest);
void deleteJob(Long jobId);
void disableJob(Long jobId);
void enableJob(Long jobId);
SaveJobInfoRequest exportJob(Long jobId);
}

View File

@ -0,0 +1,56 @@
package tech.powerjob.server.core.service;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.server.core.validator.NodeValidator;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
/**
* @author Echo009
* @since 2021/12/14
*/
@Service
@Slf4j
public class NodeValidateService {
private final Map<WorkflowNodeType, NodeValidator> nodeValidatorMap;
public NodeValidateService(List<NodeValidator> nodeValidators) {
nodeValidatorMap = new EnumMap<>(WorkflowNodeType.class);
nodeValidators.forEach(e -> nodeValidatorMap.put(e.matchingType(), e));
}
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
NodeValidator nodeValidator = getNodeValidator(node);
if (nodeValidator == null) {
// 默认不需要校验
return;
}
nodeValidator.complexValidate(node, dag);
}
public void simpleValidate(WorkflowNodeInfoDO node) {
NodeValidator nodeValidator = getNodeValidator(node);
if (nodeValidator == null) {
// 默认不需要校验
return;
}
nodeValidator.simpleValidate(node);
}
private NodeValidator getNodeValidator(WorkflowNodeInfoDO node) {
Integer nodeTypeCode = node.getType();
if (nodeTypeCode == null) {
// 前向兼容,默认为 任务节点
return nodeValidatorMap.get(WorkflowNodeType.JOB);
}
return nodeValidatorMap.get(WorkflowNodeType.of(nodeTypeCode));
}
}

View File

@ -0,0 +1,42 @@
package tech.powerjob.server.core.service;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import tech.powerjob.server.persistence.remote.model.UserInfoDO;
import tech.powerjob.server.persistence.remote.repository.UserInfoRepository;
import javax.annotation.Resource;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
* 用户服务
*
* @author tjq
* @since 2020/6/12
*/
@Service
public class UserService {
@Resource
private UserInfoRepository userInfoRepository;
/**
* 根据用户ID字符串获取用户信息详细列表
* @param userIds 逗号分割的用户ID信息
* @return 用户信息详细列表
*/
public List<UserInfoDO> fetchNotifyUserList(String userIds) {
if (StringUtils.isEmpty(userIds)) {
return Lists.newLinkedList();
}
// 去重
Set<Long> userIdList = Splitter.on(",").splitToList(userIds).stream().map(Long::valueOf).collect(Collectors.toSet());
List<UserInfoDO> res = userInfoRepository.findByIdIn(Lists.newLinkedList(userIdList));
res.forEach(x -> x.setPassword(null));
return res;
}
}

View File

@ -0,0 +1,101 @@
package tech.powerjob.server.core.service;
import com.alibaba.fastjson.JSON;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.server.core.workflow.hanlder.ControlNodeHandler;
import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler;
import tech.powerjob.server.core.workflow.hanlder.WorkflowNodeHandlerMarker;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
/**
* @author Echo009
* @since 2021/12/9
*/
@Slf4j
@Service
public class WorkflowNodeHandleService {
private final Map<WorkflowNodeType, ControlNodeHandler> controlNodeHandlerContainer;
private final Map<WorkflowNodeType, TaskNodeHandler> taskNodeHandlerContainer;
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
public WorkflowNodeHandleService(List<ControlNodeHandler> controlNodeHandlerList, List<TaskNodeHandler> taskNodeHandlerList, WorkflowInstanceInfoRepository workflowInstanceInfoRepository) {
// init
controlNodeHandlerContainer = new EnumMap<>(WorkflowNodeType.class);
taskNodeHandlerContainer = new EnumMap<>(WorkflowNodeType.class);
controlNodeHandlerList.forEach(controlNodeHandler -> controlNodeHandlerContainer.put(controlNodeHandler.matchingType(), controlNodeHandler));
taskNodeHandlerList.forEach(taskNodeHandler -> taskNodeHandlerContainer.put(taskNodeHandler.matchingType(), taskNodeHandler));
//
this.workflowInstanceInfoRepository = workflowInstanceInfoRepository;
}
/**
* 处理任务节点
* 注意,上层调用方必须保证这里的 taskNodeList 不能为空
*/
public void handleTaskNodes(List<PEWorkflowDAG.Node> taskNodeList, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
// 创建任务实例
taskNodeList.forEach(taskNode -> {
// 注意:这里必须保证任务实例全部创建成功,如果在这里创建实例部分失败,会导致 DAG 信息不会更新,已经生成的实例节点在工作流日志中没法展示
TaskNodeHandler taskNodeHandler = (TaskNodeHandler) findMatchingHandler(taskNode);
taskNodeHandler.createTaskInstance(taskNode, dag, wfInstanceInfo);
log.debug("[Workflow-{}|{}] workflowInstance start to process new node(nodeId={},jobId={})", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), taskNode.getNodeId(), taskNode.getJobId());
});
// 持久化工作流实例信息
wfInstanceInfo.setDag(JSON.toJSONString(dag));
workflowInstanceInfoRepository.saveAndFlush(wfInstanceInfo);
// 启动
taskNodeList.forEach(taskNode -> {
TaskNodeHandler taskNodeHandler = (TaskNodeHandler) findMatchingHandler(taskNode);
taskNodeHandler.startTaskInstance(taskNode);
});
}
/**
* 处理控制节点
* 注意,上层调用方必须保证这里的 controlNodeList 不能为空
*/
public void handleControlNodes(List<PEWorkflowDAG.Node> controlNodeList, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
for (PEWorkflowDAG.Node node : controlNodeList) {
handleControlNode(node, dag, wfInstanceInfo);
}
}
public void handleControlNode(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
ControlNodeHandler controlNodeHandler = (ControlNodeHandler) findMatchingHandler(node);
node.setStartTime(CommonUtils.formatTime(System.currentTimeMillis()));
controlNodeHandler.handle(node, dag, wfInstanceInfo);
node.setFinishedTime(CommonUtils.formatTime(System.currentTimeMillis()));
}
private WorkflowNodeHandlerMarker findMatchingHandler(PEWorkflowDAG.Node node) {
WorkflowNodeType nodeType = WorkflowNodeType.of(node.getNodeType());
WorkflowNodeHandlerMarker res;
if (!nodeType.isControlNode()) {
res = taskNodeHandlerContainer.get(nodeType);
} else {
res = controlNodeHandlerContainer.get(nodeType);
}
if (res == null) {
// impossible
throw new UnsupportedOperationException("unsupported node type : " + nodeType);
}
return res;
}
}

View File

@ -0,0 +1,128 @@
package tech.powerjob.server.core.service.impl;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import tech.powerjob.common.enums.EncryptType;
import tech.powerjob.common.enums.ErrorCodes;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.utils.DigestUtils;
import tech.powerjob.server.common.utils.AESUtil;
import tech.powerjob.server.core.service.AppInfoService;
import tech.powerjob.server.persistence.remote.model.AppInfoDO;
import tech.powerjob.server.persistence.remote.repository.AppInfoRepository;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
/**
* AppInfoServiceImpl
*
* @author tjq
* @since 2023/3/4
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class AppInfoServiceImpl implements AppInfoService {
private final Cache<Long, AppInfoDO> appId2AppInfoDO = CacheBuilder.newBuilder()
.softValues()
.expireAfterWrite(3, TimeUnit.MINUTES)
.maximumSize(1024)
.build();
private final AppInfoRepository appInfoRepository;
private static final String ENCRYPT_KEY = "ChinaNo.1_ChinaNo.1_ChinaNo.1AAA";
private static final String ENCRYPT_PWD_PREFIX = "sys_encrypt_aes:";
@Override
public Optional<AppInfoDO> findByAppName(String appName) {
return appInfoRepository.findByAppName(appName);
}
@Override
public Optional<AppInfoDO> findById(Long appId, boolean useCache) {
if (!useCache) {
Optional<AppInfoDO> appInfoOpt = appInfoRepository.findById(appId);
appInfoOpt.ifPresent(appInfo -> appId2AppInfoDO.put(appId, appInfo));
return appInfoOpt;
}
try {
AppInfoDO appInfoDO = appId2AppInfoDO.get(appId, () -> {
Optional<AppInfoDO> appInfoOpt = appInfoRepository.findById(appId);
if (appInfoOpt.isPresent()) {
return appInfoOpt.get();
}
throw new IllegalArgumentException("can't find appInfo by appId:" + appId);
});
return Optional.of(appInfoDO);
} catch (Exception e) {
log.warn("[AppInfoService] findByIdWithCache failed,appId={}", appId, e);
}
return Optional.empty();
}
@Override
public void deleteById(Long appId) {
appInfoRepository.deleteById(appId);
}
@Override
public AppInfoDO save(AppInfoDO appInfo) {
String originPassword = appInfo.getPassword();
String encryptPassword = AESUtil.encrypt(originPassword, ENCRYPT_KEY);
String finalPassword = ENCRYPT_PWD_PREFIX.concat(encryptPassword);
appInfo.setPassword(finalPassword);
return appInfoRepository.saveAndFlush(appInfo);
}
@Override
public Long assertApp(String appName, String password, String encryptType) {
AppInfoDO appInfo = appInfoRepository.findByAppName(appName).orElseThrow(() -> new PowerJobException(ErrorCodes.INVALID_APP, appName));
return assertApp(appInfo, password, encryptType);
}
@Override
public Long assertApp(AppInfoDO appInfo, String password, String encryptType) {
boolean checkPass = checkPassword(appInfo, password, encryptType);
if (!checkPass) {
throw new PowerJobException(ErrorCodes.INCORRECT_PASSWORD, null);
}
return appInfo.getId();
}
private boolean checkPassword(AppInfoDO appInfo, String password, String encryptType) {
String originPwd = fetchOriginAppPassword(appInfo);
if (StringUtils.isEmpty(encryptType) || EncryptType.NONE.getCode().equalsIgnoreCase(encryptType)) {
return password.equals(originPwd);
}
if (EncryptType.MD5.getCode().equalsIgnoreCase(encryptType)) {
return password.equalsIgnoreCase(DigestUtils.md5(originPwd));
}
throw new PowerJobException(ErrorCodes.INVALID_REQUEST, "unknown_encryptType:" + encryptType);
}
@Override
public String fetchOriginAppPassword(AppInfoDO appInfo) {
String dbPwd = appInfo.getPassword();
if (StringUtils.isEmpty(dbPwd)) {
return dbPwd;
}
if (dbPwd.startsWith(ENCRYPT_PWD_PREFIX)) {
String encryptPassword = dbPwd.replaceFirst(ENCRYPT_PWD_PREFIX, StringUtils.EMPTY);
return AESUtil.decrypt(encryptPassword, ENCRYPT_KEY);
}
return dbPwd;
}
}

View File

@ -0,0 +1,65 @@
package tech.powerjob.server.core.service.impl.job;
import com.alibaba.fastjson.JSON;
import com.google.common.collect.Lists;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import tech.powerjob.common.enums.DispatchStrategy;
import tech.powerjob.common.enums.ExecuteType;
import tech.powerjob.common.enums.ProcessorType;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.model.AlarmConfig;
import tech.powerjob.common.model.JobAdvancedRuntimeConfig;
import tech.powerjob.common.model.LifeCycle;
import tech.powerjob.common.model.LogConfig;
import tech.powerjob.common.request.http.SaveJobInfoRequest;
import tech.powerjob.common.response.JobInfoDTO;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.server.common.SJ;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import java.util.stream.Collectors;
/**
* JobConverter
*
* @author tjq
* @since 2023/3/4
*/
public class JobConverter {
public static SaveJobInfoRequest convertJobInfoDO2SaveJobInfoRequest(JobInfoDO jobInfoDO) {
SaveJobInfoRequest saveJobInfoRequest = new SaveJobInfoRequest();
BeanUtils.copyProperties(jobInfoDO, saveJobInfoRequest);
saveJobInfoRequest.setTimeExpressionType(TimeExpressionType.of(jobInfoDO.getTimeExpressionType()));
saveJobInfoRequest.setExecuteType(ExecuteType.of(jobInfoDO.getExecuteType()));
saveJobInfoRequest.setProcessorType(ProcessorType.of(jobInfoDO.getProcessorType()));
if (StringUtils.isNotEmpty(jobInfoDO.getNotifyUserIds())) {
saveJobInfoRequest.setNotifyUserIds(Lists.newArrayList(SJ.COMMA_SPLITTER.split(jobInfoDO.getNotifyUserIds())).stream().map(Long::valueOf).collect(Collectors.toList()));
}
saveJobInfoRequest.setDispatchStrategy(DispatchStrategy.of(jobInfoDO.getDispatchStrategy()));
saveJobInfoRequest.setLifeCycle(LifeCycle.parse(jobInfoDO.getLifecycle()));
saveJobInfoRequest.setAlarmConfig(JsonUtils.parseObjectIgnoreException(jobInfoDO.getAlarmConfig(), AlarmConfig.class));
saveJobInfoRequest.setLogConfig(JsonUtils.parseObjectIgnoreException(jobInfoDO.getLogConfig(), LogConfig.class));
saveJobInfoRequest.setAdvancedRuntimeConfig(JsonUtils.parseObjectIgnoreException(jobInfoDO.getAdvancedRuntimeConfig(), JobAdvancedRuntimeConfig.class));
return saveJobInfoRequest;
}
public static JobInfoDTO convertJobInfoDO2JobInfoDTO(JobInfoDO jobInfoDO) {
JobInfoDTO jobInfoDTO = new JobInfoDTO();
BeanUtils.copyProperties(jobInfoDO, jobInfoDTO);
if (jobInfoDO.getAlarmConfig() != null) {
jobInfoDTO.setAlarmConfig(JSON.parseObject(jobInfoDO.getAlarmConfig(), AlarmConfig.class));
}
if (StringUtils.isNotEmpty(jobInfoDO.getLogConfig())) {
jobInfoDTO.setLogConfig(JSON.parseObject(jobInfoDO.getLogConfig(), LogConfig.class));
}
if (StringUtils.isNotEmpty(jobInfoDO.getAdvancedRuntimeConfig())) {
jobInfoDTO.setAdvancedRuntimeConfig(JSON.parseObject(jobInfoDO.getAdvancedRuntimeConfig(), JobAdvancedRuntimeConfig.class));
}
return jobInfoDTO;
}
}

View File

@ -0,0 +1,323 @@
package tech.powerjob.server.core.service.impl.job;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.data.jpa.domain.Specification;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import tech.powerjob.common.PowerQuery;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.AlarmConfig;
import tech.powerjob.common.model.LifeCycle;
import tech.powerjob.common.request.http.RunJobRequest;
import tech.powerjob.common.request.http.SaveJobInfoRequest;
import tech.powerjob.common.response.JobInfoDTO;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.server.common.SJ;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
import tech.powerjob.server.core.DispatchService;
import tech.powerjob.server.core.instance.InstanceService;
import tech.powerjob.server.core.scheduler.TimingStrategyService;
import tech.powerjob.server.core.service.JobService;
import tech.powerjob.server.persistence.QueryConvertUtils;
import tech.powerjob.server.persistence.remote.model.InstanceInfoDO;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.repository.InstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import tech.powerjob.server.remote.server.redirector.DesignateServer;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* JobServiceImpl
*
* @author tjq
* @since 2023/3/4
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class JobServiceImpl implements JobService {
private final InstanceService instanceService;
private final DispatchService dispatchService;
private final JobInfoRepository jobInfoRepository;
private final InstanceInfoRepository instanceInfoRepository;
private final TimingStrategyService timingStrategyService;
/**
* 保存/修改任务
*
* @param request 任务请求
* @return 创建的任务IDjobId
*/
@Override
public Long saveJob(SaveJobInfoRequest request) {
request.valid();
JobInfoDO jobInfoDO;
if (request.getId() != null) {
jobInfoDO = jobInfoRepository.findById(request.getId()).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId: " + request.getId()));
} else {
jobInfoDO = new JobInfoDO();
}
// 值拷贝
BeanUtils.copyProperties(request, jobInfoDO);
// 拷贝枚举值
jobInfoDO.setExecuteType(request.getExecuteType().getV());
jobInfoDO.setProcessorType(request.getProcessorType().getV());
jobInfoDO.setTimeExpressionType(request.getTimeExpressionType().getV());
jobInfoDO.setStatus(request.isEnable() ? SwitchableStatus.ENABLE.getV() : SwitchableStatus.DISABLE.getV());
jobInfoDO.setDispatchStrategy(request.getDispatchStrategy().getV());
// 填充默认值,非空保护防止 NPE
fillDefaultValue(jobInfoDO);
// 转化报警用户列表
if (request.getNotifyUserIds() != null) {
if (request.getNotifyUserIds().size() == 0) {
jobInfoDO.setNotifyUserIds(null);
} else {
jobInfoDO.setNotifyUserIds(SJ.COMMA_JOINER.join(request.getNotifyUserIds()));
}
}
LifeCycle lifecycle = Optional.ofNullable(request.getLifeCycle()).orElse(LifeCycle.EMPTY_LIFE_CYCLE);
jobInfoDO.setLifecycle(JSON.toJSONString(lifecycle));
// 检查定时策略
timingStrategyService.validate(request.getTimeExpressionType(), request.getTimeExpression(), lifecycle.getStart(), lifecycle.getEnd());
calculateNextTriggerTime(jobInfoDO);
if (request.getId() == null) {
jobInfoDO.setGmtCreate(new Date());
}
// 检查告警配置
if (request.getAlarmConfig() != null) {
AlarmConfig config = request.getAlarmConfig();
if (config.getStatisticWindowLen() == null || config.getAlertThreshold() == null || config.getSilenceWindowLen() == null) {
throw new PowerJobException("illegal alarm config!");
}
jobInfoDO.setAlarmConfig(JSON.toJSONString(request.getAlarmConfig()));
}
// 日志配置
if (request.getLogConfig() != null) {
jobInfoDO.setLogConfig(JSONObject.toJSONString(request.getLogConfig()));
}
// 日志配置
if (request.getAdvancedRuntimeConfig() != null) {
jobInfoDO.setAdvancedRuntimeConfig(JSONObject.toJSONString(request.getAdvancedRuntimeConfig()));
}
JobInfoDO res = jobInfoRepository.saveAndFlush(jobInfoDO);
return res.getId();
}
/**
* 复制任务
*
* @param jobId 目标任务ID
* @return 复制后的任务 ID
*/
@Override
public JobInfoDO copyJob(Long jobId) {
JobInfoDO origin = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId: " + jobId));
if (origin.getStatus() == SwitchableStatus.DELETED.getV()) {
throw new IllegalStateException("can't copy the job which has been deleted!");
}
JobInfoDO copyJob = new JobInfoDO();
// 值拷贝
BeanUtils.copyProperties(origin, copyJob);
// 填充默认值,理论上应该不需要
fillDefaultValue(copyJob);
// 修正创建时间以及更新时间
copyJob.setId(null);
copyJob.setJobName(copyJob.getJobName() + "_COPY");
copyJob.setGmtCreate(new Date());
copyJob.setGmtModified(new Date());
copyJob = jobInfoRepository.saveAndFlush(copyJob);
return copyJob;
}
@Override
public JobInfoDTO fetchJob(Long jobId) {
return JobConverter.convertJobInfoDO2JobInfoDTO(jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId: " + jobId)));
}
@Override
public List<JobInfoDTO> fetchAllJob(Long appId) {
return jobInfoRepository.findByAppId(appId).stream().map(JobConverter::convertJobInfoDO2JobInfoDTO).collect(Collectors.toList());
}
@Override
public List<JobInfoDTO> queryJob(PowerQuery powerQuery) {
Specification<JobInfoDO> specification = QueryConvertUtils.toSpecification(powerQuery);
return jobInfoRepository.findAll(specification).stream().map(JobConverter::convertJobInfoDO2JobInfoDTO).collect(Collectors.toList());
}
@Override
@DesignateServer
public long runJob(Long appId, RunJobRequest runJobRequest) {
Long jobId = runJobRequest.getJobId();
String instanceParams = runJobRequest.getInstanceParams();
String outerKey = runJobRequest.getOuterKey();
long delay = runJobRequest.getDelay() == null ? 0 : runJobRequest.getDelay();
JobInfoDO jobInfo = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by id:" + jobId));
log.info("[Job-{}] try to run job in app[{}], instanceParams={},delay={} ms,outerKey={}", jobInfo.getId(), appId, instanceParams, delay, outerKey);
final InstanceInfoDO instanceInfo = instanceService.create(jobInfo.getId(), jobInfo.getAppId(), jobInfo.getJobParams(),
instanceParams, null, System.currentTimeMillis() + Math.max(delay, 0),
outerKey, runJobRequest.getExtendValue()
);
instanceInfoRepository.flush();
if (delay <= 0) {
dispatchService.dispatch(jobInfo, instanceInfo.getInstanceId(), Optional.of(instanceInfo),Optional.empty());
} else {
InstanceTimeWheelService.schedule(instanceInfo.getInstanceId(), delay, () -> dispatchService.dispatch(jobInfo, instanceInfo.getInstanceId(), Optional.empty(),Optional.empty()));
}
log.info("[Job-{}|{}] execute 'runJob' successfully, params={}", jobInfo.getId(), instanceInfo.getInstanceId(), instanceParams);
return instanceInfo.getInstanceId();
}
/**
* 删除某个任务
*
* @param jobId 任务ID
*/
@Override
public void deleteJob(Long jobId) {
shutdownOrStopJob(jobId, SwitchableStatus.DELETED);
}
/**
* 禁用某个任务
*/
@Override
public void disableJob(Long jobId) {
shutdownOrStopJob(jobId, SwitchableStatus.DISABLE);
}
/**
* 导出某个任务为 JSON
* @param jobId jobId
* @return 导出结果
*/
@Override
public SaveJobInfoRequest exportJob(Long jobId) {
Optional<JobInfoDO> jobInfoOpt = jobInfoRepository.findById(jobId);
if (!jobInfoOpt.isPresent()) {
throw new IllegalArgumentException("can't find job by jobId: " + jobId);
}
final JobInfoDO jobInfoDO = jobInfoOpt.get();
final SaveJobInfoRequest saveJobInfoRequest = JobConverter.convertJobInfoDO2SaveJobInfoRequest(jobInfoDO);
saveJobInfoRequest.setId(null);
saveJobInfoRequest.setJobName(saveJobInfoRequest.getJobName() + "_EXPORT_" + System.currentTimeMillis());
log.info("[Job-{}] [exportJob] jobInfoDO: {}, saveJobInfoRequest: {}", jobId, JsonUtils.toJSONString(jobInfoDO), JsonUtils.toJSONString(saveJobInfoRequest));
return saveJobInfoRequest;
}
/**
* 启用某个任务
*
* @param jobId 任务ID
*/
@Override
public void enableJob(Long jobId) {
JobInfoDO jobInfoDO = jobInfoRepository.findById(jobId).orElseThrow(() -> new IllegalArgumentException("can't find job by jobId:" + jobId));
jobInfoDO.setStatus(SwitchableStatus.ENABLE.getV());
calculateNextTriggerTime(jobInfoDO);
jobInfoRepository.saveAndFlush(jobInfoDO);
}
/**
* 停止或删除某个JOB
* 秒级任务还要额外停止正在运行的任务实例
*/
private void shutdownOrStopJob(Long jobId, SwitchableStatus status) {
// 1. 先更新 job_info 表
Optional<JobInfoDO> jobInfoOPT = jobInfoRepository.findById(jobId);
if (!jobInfoOPT.isPresent()) {
throw new IllegalArgumentException("can't find job by jobId:" + jobId);
}
JobInfoDO jobInfoDO = jobInfoOPT.get();
jobInfoDO.setStatus(status.getV());
jobInfoDO.setGmtModified(new Date());
jobInfoRepository.saveAndFlush(jobInfoDO);
// 2. 关闭秒级任务
if (!TimeExpressionType.FREQUENT_TYPES.contains(jobInfoDO.getTimeExpressionType())) {
return;
}
List<InstanceInfoDO> executeLogs = instanceInfoRepository.findByJobIdAndStatusIn(jobId, InstanceStatus.GENERALIZED_RUNNING_STATUS);
if (CollectionUtils.isEmpty(executeLogs)) {
return;
}
if (executeLogs.size() > 1) {
log.warn("[Job-{}] frequent job should just have one running instance, there must have some bug.", jobId);
}
executeLogs.forEach(instance -> {
try {
// 重复查询了数据库,不过问题不大,这个调用量很小
instanceService.stopInstance(instance.getAppId(), instance.getInstanceId());
} catch (Exception ignore) {
// ignore exception
}
});
}
private void calculateNextTriggerTime(JobInfoDO jobInfo) {
// 计算下次调度时间
if (TimeExpressionType.FREQUENT_TYPES.contains(jobInfo.getTimeExpressionType())) {
// 固定频率类型的任务不计算
jobInfo.setNextTriggerTime(null);
} else {
LifeCycle lifeCycle = LifeCycle.parse(jobInfo.getLifecycle());
Long nextValidTime = timingStrategyService.calculateNextTriggerTimeWithInspection(TimeExpressionType.of(jobInfo.getTimeExpressionType()), jobInfo.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
jobInfo.setNextTriggerTime(nextValidTime);
}
// 重写最后修改时间
jobInfo.setGmtModified(new Date());
}
private void fillDefaultValue(JobInfoDO jobInfoDO) {
if (jobInfoDO.getMaxWorkerCount() == null) {
jobInfoDO.setMaxWorkerCount(0);
}
if (jobInfoDO.getMaxInstanceNum() == null) {
jobInfoDO.setMaxInstanceNum(0);
}
if (jobInfoDO.getConcurrency() == null) {
jobInfoDO.setConcurrency(5);
}
if (jobInfoDO.getInstanceRetryNum() == null) {
jobInfoDO.setInstanceRetryNum(0);
}
if (jobInfoDO.getTaskRetryNum() == null) {
jobInfoDO.setTaskRetryNum(0);
}
if (jobInfoDO.getInstanceTimeLimit() == null) {
jobInfoDO.setInstanceTimeLimit(0L);
}
}
}

View File

@ -0,0 +1,37 @@
package tech.powerjob.server.core.uid;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import tech.powerjob.server.remote.server.self.ServerInfoService;
/**
* 唯一ID生成服务使用 Twitter snowflake 算法
* 机房ID固定为0占用2位
* 机器ID由 ServerIdProvider 提供
*
* @author tjq
* @since 2020/4/6
*/
@Slf4j
@Service
public class IdGenerateService {
private final SnowFlakeIdGenerator snowFlakeIdGenerator;
private static final int DATA_CENTER_ID = 0;
public IdGenerateService(ServerInfoService serverInfoService) {
long id = serverInfoService.fetchCurrentServerInfo().getId();
snowFlakeIdGenerator = new SnowFlakeIdGenerator(DATA_CENTER_ID, id);
log.info("[IdGenerateService] initialize IdGenerateService successfully, ID:{}", id);
}
/**
* 分配分布式唯一ID
* @return 分布式唯一ID
*/
public long allocate() {
return snowFlakeIdGenerator.nextId();
}
}

View File

@ -0,0 +1,122 @@
package tech.powerjob.server.core.uid;
/**
* Twitter SnowFlakeScala -> Java
*
* @author tjq
* @since 2020/4/6
*/
public class SnowFlakeIdGenerator {
/**
* 起始的时间戳(a special day for me)
*/
private final static long START_STAMP = 1555776000000L;
/**
* 序列号占用的位数
*/
private final static long SEQUENCE_BIT = 6;
/**
* 机器标识占用的位数
*/
private final static long MACHINE_BIT = 14;
/**
* 数据中心占用的位数
*/
private final static long DATA_CENTER_BIT = 2;
/**
* 每一部分的最大值
*/
private final static long MAX_DATA_CENTER_NUM = ~(-1L << DATA_CENTER_BIT);
private final static long MAX_MACHINE_NUM = ~(-1L << MACHINE_BIT);
private final static long MAX_SEQUENCE = ~(-1L << SEQUENCE_BIT);
/**
* 每一部分向左的位移
*/
private final static long MACHINE_LEFT = SEQUENCE_BIT;
private final static long DATA_CENTER_LEFT = SEQUENCE_BIT + MACHINE_BIT;
private final static long TIMESTAMP_LEFT = DATA_CENTER_LEFT + DATA_CENTER_BIT;
/**
* 数据中心
*/
private final long dataCenterId;
/**
* 机器标识
*/
private final long machineId;
/**
* 序列号
*/
private long sequence = 0L;
/**
* 上一次时间戳
*/
private long lastTimestamp = -1L;
public SnowFlakeIdGenerator(long dataCenterId, long machineId) {
if (dataCenterId > MAX_DATA_CENTER_NUM || dataCenterId < 0) {
throw new IllegalArgumentException("dataCenterId can't be greater than MAX_DATA_CENTER_NUM or less than 0");
}
if (machineId > MAX_MACHINE_NUM || machineId < 0) {
throw new IllegalArgumentException("machineId can't be greater than MAX_MACHINE_NUM or less than 0");
}
this.dataCenterId = dataCenterId;
this.machineId = machineId;
}
/**
* 产生下一个ID
*/
public synchronized long nextId() {
long currStamp = getNewStamp();
if (currStamp < lastTimestamp) {
return futureId();
}
if (currStamp == lastTimestamp) {
//相同毫秒内,序列号自增
sequence = (sequence + 1) & MAX_SEQUENCE;
//同一毫秒的序列数已经达到最大
if (sequence == 0L) {
currStamp = getNextMill();
}
} else {
//不同毫秒内序列号置为0
sequence = 0L;
}
lastTimestamp = currStamp;
return (currStamp - START_STAMP) << TIMESTAMP_LEFT //时间戳部分
| dataCenterId << DATA_CENTER_LEFT //数据中心部分
| machineId << MACHINE_LEFT //机器标识部分
| sequence; //序列号部分
}
/**
* 发生时钟回拨时借用未来时间生成Id避免运行过程中任务调度和工作流直接进入不可用状态
* 注该方式不可解决原算法中停服状态下时钟回拨导致的重复id问题
*/
private long futureId() {
sequence = (sequence + 1) & MAX_SEQUENCE;
if (sequence == 0L) {
lastTimestamp = lastTimestamp + 1;
}
return (lastTimestamp - START_STAMP) << TIMESTAMP_LEFT //时间戳部分
| dataCenterId << DATA_CENTER_LEFT //数据中心部分
| machineId << MACHINE_LEFT //机器标识部分
| sequence; //序列号部分
}
private long getNextMill() {
long mill = getNewStamp();
while (mill <= lastTimestamp) {
mill = getNewStamp();
}
return mill;
}
private long getNewStamp() {
return System.currentTimeMillis();
}
}

View File

@ -0,0 +1,70 @@
package tech.powerjob.server.core.validator;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
import java.util.Collection;
/**
* @author Echo009
* @since 2021/12/14
*/
@Component
@Slf4j
public class DecisionNodeValidator implements NodeValidator {
@Override
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
// 出度固定为 2
WorkflowDAG.Node nodeWrapper = dag.getNode(node.getId());
Collection<PEWorkflowDAG.Edge> edges = nodeWrapper.getSuccessorEdgeMap().values();
if (edges.size() != 2) {
throw new PowerJobException("DecisionNodes out-degree must be 2,node name : " + node.getNodeName());
}
// 边的属性必须为 ture 或者 false
boolean containFalse = false;
boolean containTrue = false;
for (PEWorkflowDAG.Edge edge : edges) {
if (!isValidBooleanStr(edge.getProperty())) {
throw new PowerJobException("Illegal property of DecisionNodes out-degree edge,node name : " + node.getNodeName());
}
boolean b = Boolean.parseBoolean(edge.getProperty());
if (b) {
containTrue = true;
} else {
containFalse = true;
}
}
if (!containFalse || !containTrue) {
throw new PowerJobException("Illegal property of DecisionNodes out-degree edge,node name : " + node.getNodeName());
}
}
@Override
public void simpleValidate(WorkflowNodeInfoDO node) {
// 简单校验
String nodeParams = node.getNodeParams();
if (StringUtils.isBlank(nodeParams)) {
throw new PowerJobException("DecisionNodes param must be not null,node name : " + node.getNodeName());
}
}
public static boolean isValidBooleanStr(String str) {
return StringUtils.equalsIgnoreCase(str.trim(), "true") || StringUtils.equalsIgnoreCase(str.trim(), "false");
}
@Override
public WorkflowNodeType matchingType() {
return WorkflowNodeType.DECISION;
}
}

View File

@ -0,0 +1,45 @@
package tech.powerjob.server.core.validator;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
/**
* @author Echo009
* @since 2021/12/14
*/
@Component
@Slf4j
@RequiredArgsConstructor
public class JobNodeValidator implements NodeValidator {
private final JobInfoRepository jobInfoRepository;
@Override
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
// do nothing
}
@Override
public void simpleValidate(WorkflowNodeInfoDO node) {
// 判断对应的任务是否存在
JobInfoDO job = jobInfoRepository.findById(node.getJobId())
.orElseThrow(() -> new PowerJobException("Illegal job node,specified job is not exist,node name : " + node.getNodeName()));
if (job.getStatus() == SwitchableStatus.DELETED.getV()) {
throw new PowerJobException("Illegal job node,specified job has been deleted,node name : " + node.getNodeName());
}
}
@Override
public WorkflowNodeType matchingType() {
return WorkflowNodeType.JOB;
}
}

View File

@ -0,0 +1,69 @@
package tech.powerjob.server.core.validator;
import com.alibaba.fastjson.JSON;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
import java.util.Objects;
import java.util.Optional;
/**
* @author Echo009
* @since 2021/12/14
*/
@Component
@Slf4j
@RequiredArgsConstructor
public class NestedWorkflowNodeValidator implements NodeValidator {
private final WorkflowInfoRepository workflowInfoRepository;
private final WorkflowNodeInfoRepository workflowNodeInfoRepository;
@Override
public void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag) {
// 这里检查是否循环嵌套(自身引用自身)
if (Objects.equals(node.getJobId(), node.getWorkflowId())) {
throw new PowerJobException("Illegal nested workflow node,Prohibit circular references!" + node.getNodeName());
}
}
@Override
public void simpleValidate(WorkflowNodeInfoDO node) {
// 判断对应工作流是否存在
WorkflowInfoDO workflowInfo = workflowInfoRepository.findById(node.getJobId())
.orElseThrow(() -> new PowerJobException("Illegal nested workflow node,specified workflow is not exist,node name : " + node.getNodeName()));
if (workflowInfo.getStatus() == SwitchableStatus.DELETED.getV()) {
throw new PowerJobException("Illegal nested workflow node,specified workflow has been deleted,node name : " + node.getNodeName());
}
// 不允许多层嵌套,即 嵌套工作流节点引用的工作流中不能包含嵌套节点
PEWorkflowDAG peDag = JSON.parseObject(workflowInfo.getPeDAG(), PEWorkflowDAG.class);
for (PEWorkflowDAG.Node peDagNode : peDag.getNodes()) {
//
final Optional<WorkflowNodeInfoDO> nestWfNodeOp = workflowNodeInfoRepository.findById(peDagNode.getNodeId());
if (!nestWfNodeOp.isPresent()) {
// 嵌套的工作流无效,缺失节点元数据
throw new PowerJobException("Illegal nested workflow node,specified workflow is invalidate,node name : " + node.getNodeName());
}
if (Objects.equals(nestWfNodeOp.get().getType(), WorkflowNodeType.NESTED_WORKFLOW.getCode())) {
throw new PowerJobException("Illegal nested workflow node,specified workflow must be a simple workflow,node name : " + node.getNodeName());
}
}
}
@Override
public WorkflowNodeType matchingType() {
return WorkflowNodeType.NESTED_WORKFLOW;
}
}

View File

@ -0,0 +1,31 @@
package tech.powerjob.server.core.validator;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
/**
* @author Echo009
* @since 2021/12/14
*/
public interface NodeValidator {
/**
* 校验工作流节点(校验拓扑关系等)
* @param node 节点
* @param dag dag
*/
void complexValidate(WorkflowNodeInfoDO node, WorkflowDAG dag);
/**
* 校验工作流节点
* @param node 节点
*/
void simpleValidate(WorkflowNodeInfoDO node);
/**
* 匹配的节点类型
* @return node type
*/
WorkflowNodeType matchingType();
}

View File

@ -0,0 +1,488 @@
package tech.powerjob.server.core.workflow;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.WorkflowContextConstant;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowInstanceStatus;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.utils.SpringUtils;
import tech.powerjob.server.core.alarm.AlarmUtils;
import tech.powerjob.server.core.helper.StatusMappingHelper;
import tech.powerjob.server.core.lock.UseCacheLock;
import tech.powerjob.server.core.service.UserService;
import tech.powerjob.server.core.service.WorkflowNodeHandleService;
import tech.powerjob.server.core.uid.IdGenerateService;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
import tech.powerjob.server.core.alarm.AlarmCenter;
import tech.powerjob.server.core.alarm.module.WorkflowInstanceAlarm;
import tech.powerjob.server.persistence.remote.model.*;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
import java.util.*;
import java.util.stream.Collectors;
import static tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils.isNotAllowSkipWhenFailed;
/**
* 管理运行中的工作流实例
*
* @author tjq
* @author Echo009
* @since 2020/5/26
*/
@Slf4j
@Service
@RequiredArgsConstructor
@SuppressWarnings("squid:S1192")
public class WorkflowInstanceManager {
private final AlarmCenter alarmCenter;
private final IdGenerateService idGenerateService;
private final JobInfoRepository jobInfoRepository;
private final UserService userService;
private final WorkflowInfoRepository workflowInfoRepository;
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
private final WorkflowNodeInfoRepository workflowNodeInfoRepository;
private final WorkflowNodeHandleService workflowNodeHandleService;
/**
* 创建工作流任务实例
* ********************************************
* 2021-02-03 modify by Echo009
* 通过 initParams 初始化工作流上下文wfContext
* ********************************************
*
* @param wfInfo 工作流任务元数据(描述信息)
* @param initParams 启动参数
* @param expectTriggerTime 预计执行时间
* @return wfInstanceId
*/
public Long create(WorkflowInfoDO wfInfo, String initParams, Long expectTriggerTime, Long parentWfInstanceId) {
Long wfId = wfInfo.getId();
Long wfInstanceId = idGenerateService.allocate();
// 构造实例信息
WorkflowInstanceInfoDO newWfInstance = constructWfInstance(wfInfo, initParams, expectTriggerTime, wfId, wfInstanceId);
if (parentWfInstanceId != null) {
// 处理子工作流
newWfInstance.setParentWfInstanceId(parentWfInstanceId);
// 直接透传上下文
newWfInstance.setWfContext(initParams);
}
PEWorkflowDAG dag = null;
try {
dag = JSON.parseObject(wfInfo.getPeDAG(), PEWorkflowDAG.class);
// 校验 DAG 信息
if (!WorkflowDAGUtils.valid(dag)) {
log.error("[Workflow-{}|{}] DAG of this workflow is illegal! maybe you has modified the DAG info directly in database!", wfId, wfInstanceId);
throw new PowerJobException(SystemInstanceResult.INVALID_DAG);
}
// 初始化节点信息
initNodeInfo(dag);
// 最后检查工作流中的任务是否均处于可用状态(没有被删除)
Set<Long> allJobIds = Sets.newHashSet();
dag.getNodes().forEach(node -> {
if (node.getNodeType() == WorkflowNodeType.JOB.getCode()) {
allJobIds.add(node.getJobId());
}
// 将节点的初始状态置为等待派发
node.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
});
int needNum = allJobIds.size();
long dbNum = jobInfoRepository.countByAppIdAndStatusInAndIdIn(wfInfo.getAppId(), Sets.newHashSet(SwitchableStatus.ENABLE.getV(), SwitchableStatus.DISABLE.getV()), allJobIds);
log.debug("[Workflow-{}|{}] contains {} jobs, find {} jobs in database.", wfId, wfInstanceId, needNum, dbNum);
if (dbNum < allJobIds.size()) {
log.warn("[Workflow-{}|{}] this workflow need {} jobs, but just find {} jobs in database, maybe you delete or disable some job!", wfId, wfInstanceId, needNum, dbNum);
throw new PowerJobException(SystemInstanceResult.CAN_NOT_FIND_JOB);
}
newWfInstance.setDag(JSON.toJSONString(dag));
workflowInstanceInfoRepository.saveAndFlush(newWfInstance);
} catch (Exception e) {
if (dag != null) {
newWfInstance.setDag(JSON.toJSONString(dag));
}
handleWfInstanceFinalStatus(newWfInstance, e.getMessage(), WorkflowInstanceStatus.FAILED);
}
return wfInstanceId;
}
/**
* 初始化节点信息
*/
private void initNodeInfo(PEWorkflowDAG dag) {
for (PEWorkflowDAG.Node node : dag.getNodes()) {
WorkflowNodeInfoDO workflowNodeInfo = workflowNodeInfoRepository.findById(node.getNodeId()).orElseThrow(() -> new PowerJobException(SystemInstanceResult.CAN_NOT_FIND_NODE));
if (workflowNodeInfo.getType() == null) {
// 前向兼容
workflowNodeInfo.setType(WorkflowNodeType.JOB.getCode());
}
// 填充基础信息
node.setNodeType(workflowNodeInfo.getType())
.setJobId(workflowNodeInfo.getJobId())
.setNodeName(workflowNodeInfo.getNodeName())
.setNodeParams(workflowNodeInfo.getNodeParams())
.setEnable(workflowNodeInfo.getEnable())
.setSkipWhenFailed(workflowNodeInfo.getSkipWhenFailed());
// 任务节点,初始化节点参数时需要特殊处理
if (node.getNodeType() == WorkflowNodeType.JOB.getCode()) {
// 任务节点缺失任务信息
if (workflowNodeInfo.getJobId() == null) {
throw new PowerJobException(SystemInstanceResult.ILLEGAL_NODE);
}
JobInfoDO jobInfo = jobInfoRepository.findById(workflowNodeInfo.getJobId()).orElseThrow(() -> new PowerJobException(SystemInstanceResult.CAN_NOT_FIND_JOB));
if (!StringUtils.isBlank(workflowNodeInfo.getNodeParams())) {
node.setNodeParams(workflowNodeInfo.getNodeParams());
} else {
node.setNodeParams(jobInfo.getJobParams());
}
}
}
}
/**
* 构造工作流实例,并初始化基础信息(不包括 DAG
*/
private WorkflowInstanceInfoDO constructWfInstance(WorkflowInfoDO wfInfo, String initParams, Long expectTriggerTime, Long wfId, Long wfInstanceId) {
Date now = new Date();
WorkflowInstanceInfoDO newWfInstance = new WorkflowInstanceInfoDO();
newWfInstance.setAppId(wfInfo.getAppId());
newWfInstance.setWfInstanceId(wfInstanceId);
newWfInstance.setWorkflowId(wfId);
newWfInstance.setStatus(WorkflowInstanceStatus.WAITING.getV());
newWfInstance.setExpectedTriggerTime(expectTriggerTime);
newWfInstance.setActualTriggerTime(System.currentTimeMillis());
newWfInstance.setWfInitParams(initParams);
// 如果 initParams 是个合法的 Map<String,String> JSON 串则直接将其注入 wfContext
boolean injectDirect = false;
try {
Map<String, String> parseRes = JSON.parseObject(initParams, new TypeReference<Map<String, String>>() {
});
if (parseRes != null && !parseRes.isEmpty()) {
injectDirect = true;
}
} catch (Exception e) {
// ignore
}
if (injectDirect) {
newWfInstance.setWfContext(initParams);
} else {
// 初始化上下文
Map<String, String> wfContextMap = Maps.newHashMap();
wfContextMap.put(WorkflowContextConstant.CONTEXT_INIT_PARAMS_KEY, initParams);
newWfInstance.setWfContext(JsonUtils.toJSONString(wfContextMap));
}
newWfInstance.setGmtCreate(now);
newWfInstance.setGmtModified(now);
return newWfInstance;
}
/**
* 开始任务
* ********************************************
* 2021-02-03 modify by Echo009
* 1、工作流支持配置重复的任务节点
* 2、移除参数 initParams改为统一从工作流实例中获取
* 传递工作流实例的 wfContext 作为 初始启动参数
* 3、通过 {@link WorkflowDAGUtils#listReadyNodes} 兼容原地重试逻辑
* ********************************************
*
* @param wfInfo 工作流任务信息
* @param wfInstanceId 工作流任务实例ID
*/
@UseCacheLock(type = "processWfInstance", key = "#wfInfo.getMaxWfInstanceNum() > 0 ? #wfInfo.getId() : #wfInstanceId", concurrencyLevel = 1024)
public void start(WorkflowInfoDO wfInfo, Long wfInstanceId) {
Optional<WorkflowInstanceInfoDO> wfInstanceInfoOpt = workflowInstanceInfoRepository.findByWfInstanceId(wfInstanceId);
if (!wfInstanceInfoOpt.isPresent()) {
log.error("[WorkflowInstanceManager] can't find metadata by workflowInstanceId({}).", wfInstanceId);
return;
}
WorkflowInstanceInfoDO wfInstanceInfo = wfInstanceInfoOpt.get();
// 不是等待中,不再继续执行(可能上一流程已经失败)
if (wfInstanceInfo.getStatus() != WorkflowInstanceStatus.WAITING.getV()) {
log.info("[Workflow-{}|{}] workflowInstance({}) needn't running any more.", wfInfo.getId(), wfInstanceId, wfInstanceInfo);
return;
}
// 最大实例数量 <= 0 表示不限制
if (wfInfo.getMaxWfInstanceNum() > 0) {
// 并发度控制
int instanceConcurrency = workflowInstanceInfoRepository.countByWorkflowIdAndStatusIn(wfInfo.getId(), WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS);
if (instanceConcurrency > wfInfo.getMaxWfInstanceNum()) {
handleWfInstanceFinalStatus(wfInstanceInfo, String.format(SystemInstanceResult.TOO_MANY_INSTANCES, instanceConcurrency, wfInfo.getMaxWfInstanceNum()), WorkflowInstanceStatus.FAILED);
return;
}
}
try {
// 从实例中读取工作流信息
PEWorkflowDAG dag = JSON.parseObject(wfInstanceInfo.getDag(), PEWorkflowDAG.class);
// 根节点有可能被 disable
List<PEWorkflowDAG.Node> readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
// 先处理其中的控制节点
List<PEWorkflowDAG.Node> controlNodes = findControlNodes(readyNodes);
while (!controlNodes.isEmpty()) {
workflowNodeHandleService.handleControlNodes(controlNodes, dag, wfInstanceInfo);
readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
controlNodes = findControlNodes(readyNodes);
}
if (readyNodes.isEmpty()) {
// 没有就绪的节点(所有节点都被禁用)
wfInstanceInfo.setFinishedTime(System.currentTimeMillis());
wfInstanceInfo.setDag(JSON.toJSONString(dag));
log.warn("[Workflow-{}|{}] workflowInstance({}) needn't running ", wfInfo.getId(), wfInstanceId, wfInstanceInfo);
handleWfInstanceFinalStatus(wfInstanceInfo, SystemInstanceResult.NO_ENABLED_NODES, WorkflowInstanceStatus.SUCCEED);
return;
}
// 需要更新工作流实例状态
wfInstanceInfo.setStatus(WorkflowInstanceStatus.RUNNING.getV());
// 处理任务节点
workflowNodeHandleService.handleTaskNodes(readyNodes, dag, wfInstanceInfo);
log.info("[Workflow-{}|{}] start workflow successfully", wfInfo.getId(), wfInstanceId);
} catch (Exception e) {
log.error("[Workflow-{}|{}] start workflow: {} failed.", wfInfo.getId(), wfInstanceId, wfInfo, e);
handleWfInstanceFinalStatus(wfInstanceInfo, e.getMessage(), WorkflowInstanceStatus.FAILED);
}
}
/**
* 下一步(当工作流的某个任务完成时调用该方法)
* ********************************************
* 2021-02-03 modify by Echo009
* 1、工作流支持配置重复的任务节点
* 2、不再获取上游任务的结果作为实例参数而是传递工作流
* 实例的 wfContext 作为 实例参数
* 3、通过 {@link WorkflowDAGUtils#listReadyNodes} 支持跳过禁用的节点
* ********************************************
*
* @param wfInstanceId 工作流任务实例ID
* @param instanceId 具体完成任务的某个任务实例ID
* @param status 完成任务的任务实例状态SUCCEED/FAILED/STOPPED
* @param result 完成任务的任务实例结果
*/
@SuppressWarnings({"squid:S3776", "squid:S2142", "squid:S1141"})
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
public void move(Long wfInstanceId, Long instanceId, InstanceStatus status, String result) {
Optional<WorkflowInstanceInfoDO> wfInstanceInfoOpt = workflowInstanceInfoRepository.findByWfInstanceId(wfInstanceId);
if (!wfInstanceInfoOpt.isPresent()) {
log.error("[WorkflowInstanceManager] can't find metadata by workflowInstanceId({}).", wfInstanceId);
return;
}
WorkflowInstanceInfoDO wfInstance = wfInstanceInfoOpt.get();
Long wfId = wfInstance.getWorkflowId();
// 特殊处理手动终止 且 工作流实例已经不在运行状态的情况
if (status == InstanceStatus.STOPPED && !WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
// 由用户手动停止工作流实例导致,不需要任何操作
return;
}
try {
PEWorkflowDAG dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
// 更新完成节点状态
boolean allFinished = true;
PEWorkflowDAG.Node instanceNode = null;
for (PEWorkflowDAG.Node node : dag.getNodes()) {
if (instanceId.equals(node.getInstanceId())) {
node.setStatus(status.getV());
node.setResult(result);
node.setFinishedTime(CommonUtils.formatTime(System.currentTimeMillis()));
instanceNode = node;
log.info("[Workflow-{}|{}] node(nodeId={},jobId={},instanceId={}) finished in workflowInstance, status={},result={}", wfId, wfInstanceId, node.getNodeId(), node.getJobId(), instanceId, status.name(), result);
}
if (InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(node.getStatus())) {
allFinished = false;
}
}
if (instanceNode == null) {
// DAG 中的节点实例已经被覆盖(原地重试,生成了新的实例信息),直接忽略
log.warn("[Workflow-{}|{}] current job instance(instanceId={}) is dissociative! it will be ignore! ", wfId, wfInstanceId, instanceId);
return;
}
wfInstance.setGmtModified(new Date());
wfInstance.setDag(JSON.toJSONString(dag));
// 工作流已经结束(某个节点失败导致工作流整体已经失败),仅更新最新的 DAG 图
if (!WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
log.info("[Workflow-{}|{}] workflow already finished(status={}), just update the dag info.", wfId, wfInstanceId, wfInstance.getStatus());
return;
}
// 任务失败 && 不允许失败跳过DAG 流程被打断,整体失败
if (status == InstanceStatus.FAILED && isNotAllowSkipWhenFailed(instanceNode)) {
log.warn("[Workflow-{}|{}] workflow instance process failed because middle task(instanceId={}) failed", wfId, wfInstanceId, instanceId);
handleWfInstanceFinalStatus(wfInstance, SystemInstanceResult.MIDDLE_JOB_FAILED, WorkflowInstanceStatus.FAILED);
return;
}
// 子任务被手动停止
if (status == InstanceStatus.STOPPED) {
handleWfInstanceFinalStatus(wfInstance, SystemInstanceResult.MIDDLE_JOB_STOPPED, WorkflowInstanceStatus.STOPPED);
log.warn("[Workflow-{}|{}] workflow instance stopped because middle task(instanceId={}) stopped by user", wfId, wfInstanceId, instanceId);
return;
}
// 注意:这里会直接跳过 disable 的节点
List<PEWorkflowDAG.Node> readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
// 如果没有就绪的节点,需要再次判断是否已经全部完成
if (readyNodes.isEmpty() && isFinish(dag)) {
allFinished = true;
}
// 工作流执行完毕(能执行到这里代表该工作流内所有子任务都执行成功了)
if (allFinished) {
// 这里得重新更新一下,因为 WorkflowDAGUtils#listReadyNodes 可能会更新节点状态
wfInstance.setDag(JSON.toJSONString(dag));
// 最终任务的结果作为整个 workflow 的结果
handleWfInstanceFinalStatus(wfInstance, result, WorkflowInstanceStatus.SUCCEED);
log.info("[Workflow-{}|{}] process successfully.", wfId, wfInstanceId);
return;
}
// 先处理其中的控制节点
List<PEWorkflowDAG.Node> controlNodes = findControlNodes(readyNodes);
while (!controlNodes.isEmpty()) {
workflowNodeHandleService.handleControlNodes(controlNodes, dag, wfInstance);
readyNodes = WorkflowDAGUtils.listReadyNodes(dag);
controlNodes = findControlNodes(readyNodes);
}
// 再次判断是否已完成 (允许控制节点出现在末尾)
if (readyNodes.isEmpty()) {
if (isFinish(dag)) {
wfInstance.setDag(JSON.toJSONString(dag));
handleWfInstanceFinalStatus(wfInstance, result, WorkflowInstanceStatus.SUCCEED);
log.info("[Workflow-{}|{}] process successfully.", wfId, wfInstanceId);
return;
}
// 没有就绪的节点 但 还没执行完成,仅更新 DAG
wfInstance.setDag(JSON.toJSONString(dag));
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
return;
}
// 处理任务节点
workflowNodeHandleService.handleTaskNodes(readyNodes, dag, wfInstance);
} catch (Exception e) {
handleWfInstanceFinalStatus(wfInstance, "MOVE NEXT STEP FAILED: " + e.getMessage(), WorkflowInstanceStatus.FAILED);
log.error("[Workflow-{}|{}] update failed.", wfId, wfInstanceId, e);
}
}
/**
* 更新工作流上下文
* fix : 得和其他操作工作流实例的方法用同一把锁才行,不然有并发问题,会导致节点状态被覆盖
*
* @param wfInstanceId 工作流实例
* @param appendedWfContextData 追加的上下文数据
* @since 2021/02/05
*/
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
public void updateWorkflowContext(Long wfInstanceId, Map<String, String> appendedWfContextData) {
try {
Optional<WorkflowInstanceInfoDO> wfInstanceInfoOpt = workflowInstanceInfoRepository.findByWfInstanceId(wfInstanceId);
if (!wfInstanceInfoOpt.isPresent()) {
log.error("[WorkflowInstanceManager] can't find metadata by workflowInstanceId({}).", wfInstanceId);
return;
}
WorkflowInstanceInfoDO wfInstance = wfInstanceInfoOpt.get();
HashMap<String, String> wfContext = JSON.parseObject(wfInstance.getWfContext(), new TypeReference<HashMap<String, String>>() {
});
for (Map.Entry<String, String> entry : appendedWfContextData.entrySet()) {
String key = entry.getKey();
String originValue = wfContext.put(key, entry.getValue());
log.info("[Workflow-{}|{}] update workflow context {} : {} -> {}", wfInstance.getWorkflowId(), wfInstance.getWfInstanceId(), key, originValue, entry.getValue());
}
wfInstance.setWfContext(JSON.toJSONString(wfContext));
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
} catch (Exception e) {
log.error("[WorkflowInstanceManager] update workflow(workflowInstanceId={}) context failed.", wfInstanceId, e);
}
}
private void handleWfInstanceFinalStatus(WorkflowInstanceInfoDO wfInstance, String result, WorkflowInstanceStatus workflowInstanceStatus) {
wfInstance.setStatus(workflowInstanceStatus.getV());
wfInstance.setResult(result);
wfInstance.setFinishedTime(System.currentTimeMillis());
wfInstance.setGmtModified(new Date());
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
// 处理子工作流
if (wfInstance.getParentWfInstanceId() != null) {
// 先处理上下文
if (workflowInstanceStatus == WorkflowInstanceStatus.SUCCEED){
HashMap<String, String> wfContext = JSON.parseObject(wfInstance.getWfContext(), new TypeReference<HashMap<String, String>>() {
});
SpringUtils.getBean(this.getClass()).updateWorkflowContext(wfInstance.getParentWfInstanceId(), wfContext);
}
// 处理父工作流, fix https://github.com/PowerJob/PowerJob/issues/465
SpringUtils.getBean(this.getClass()).move(wfInstance.getParentWfInstanceId(), wfInstance.getWfInstanceId(), StatusMappingHelper.toInstanceStatus(workflowInstanceStatus), result);
}
// 报警
if (workflowInstanceStatus == WorkflowInstanceStatus.FAILED) {
try {
workflowInfoRepository.findById(wfInstance.getWorkflowId()).ifPresent(wfInfo -> {
WorkflowInstanceAlarm content = new WorkflowInstanceAlarm();
BeanUtils.copyProperties(wfInfo, content);
BeanUtils.copyProperties(wfInstance, content);
content.setResult(result);
List<UserInfoDO> userList = userService.fetchNotifyUserList(wfInfo.getNotifyUserIds());
alarmCenter.alarmFailed(content, AlarmUtils.convertUserInfoList2AlarmTargetList(userList));
});
} catch (Exception ignore) {
// ignore
}
}
}
private List<PEWorkflowDAG.Node> findControlNodes(List<PEWorkflowDAG.Node> readyNodes) {
return readyNodes.stream().filter(node -> {
WorkflowNodeType nodeType = WorkflowNodeType.of(node.getNodeType());
return nodeType.isControlNode();
}).collect(Collectors.toList());
}
private boolean isFinish(PEWorkflowDAG dag) {
for (PEWorkflowDAG.Node node : dag.getNodes()) {
if (InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(node.getStatus())) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,230 @@
package tech.powerjob.server.core.workflow;
import com.alibaba.fastjson.JSON;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowInstanceStatus;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.response.WorkflowInstanceInfoDTO;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.utils.SpringUtils;
import tech.powerjob.server.core.instance.InstanceService;
import tech.powerjob.server.core.lock.UseCacheLock;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
import tech.powerjob.server.remote.server.redirector.DesignateServer;
import java.util.Date;
import java.util.Objects;
import java.util.Optional;
/**
* 工作流实例服务
*
* @author tjq
* @author Echo009
* @since 2020/5/31
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class WorkflowInstanceService {
private final InstanceService instanceService;
private final WorkflowInstanceInfoRepository wfInstanceInfoRepository;
private final WorkflowInstanceManager workflowInstanceManager;
private final WorkflowInfoRepository workflowInfoRepository;
/**
* 停止工作流实例(入口)
*
* @param wfInstanceId 工作流实例ID
* @param appId 所属应用ID
*/
public void stopWorkflowInstanceEntrance(Long wfInstanceId, Long appId) {
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
if (!WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
throw new PowerJobException("workflow instance already stopped");
}
// 如果这是一个被嵌套的工作流,则终止父工作流
if (wfInstance.getParentWfInstanceId() != null) {
SpringUtils.getBean(this.getClass()).stopWorkflowInstance(wfInstance.getParentWfInstanceId(), appId);
return;
}
SpringUtils.getBean(this.getClass()).stopWorkflowInstance(wfInstanceId, appId);
}
/**
* 停止工作流实例
*
* @param wfInstanceId 工作流实例ID
* @param appId 所属应用ID
*/
@DesignateServer
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
public void stopWorkflowInstance(Long wfInstanceId, Long appId) {
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
if (!WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
throw new PowerJobException("workflow instance already stopped");
}
// 停止所有已启动且未完成的服务
PEWorkflowDAG dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
// 遍历所有节点,终止正在运行的
dag.getNodes().forEach(node -> {
try {
if (node.getInstanceId() != null && InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(node.getStatus())) {
log.debug("[WfInstance-{}] instance({}) is running, try to stop it now.", wfInstanceId, node.getInstanceId());
node.setStatus(InstanceStatus.STOPPED.getV());
node.setResult(SystemInstanceResult.STOPPED_BY_USER);
// 特殊处理嵌套工作流节点
if (Objects.equals(node.getNodeType(), WorkflowNodeType.NESTED_WORKFLOW.getCode())) {
stopWorkflowInstance(node.getInstanceId(), appId);
//
} else {
// 注意,这里并不保证一定能终止正在运行的实例
instanceService.stopInstance(appId, node.getInstanceId());
}
}
} catch (Exception e) {
log.warn("[WfInstance-{}] stop instance({}) failed.", wfInstanceId, JSON.toJSONString(node), e);
}
});
// 修改数据库状态
wfInstance.setDag(JSON.toJSONString(dag));
wfInstance.setStatus(WorkflowInstanceStatus.STOPPED.getV());
wfInstance.setResult(SystemInstanceResult.STOPPED_BY_USER);
wfInstance.setGmtModified(new Date());
wfInstanceInfoRepository.saveAndFlush(wfInstance);
log.info("[WfInstance-{}] stop workflow instance successfully~", wfInstanceId);
}
/**
* Add by Echo009 on 2021/02/07
*
* @param wfInstanceId 工作流实例ID
* @param appId 应用ID
*/
@DesignateServer
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
public void retryWorkflowInstance(Long wfInstanceId, Long appId) {
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
// 仅允许重试 失败的工作流
if (WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
throw new PowerJobException("workflow instance is running");
}
if (wfInstance.getStatus() == WorkflowInstanceStatus.SUCCEED.getV()) {
throw new PowerJobException("workflow instance is already successful");
}
// 因为 DAG 非法 或者 因任务信息缺失 而失败的工作流实例无法重试
if (SystemInstanceResult.CAN_NOT_FIND_JOB.equals(wfInstance.getResult())) {
throw new PowerJobException("you can't retry the workflow instance which is missing job info!");
}
// 校验 DAG 信息
PEWorkflowDAG dag;
try {
dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
if (!WorkflowDAGUtils.valid(dag)) {
throw new PowerJobException(SystemInstanceResult.INVALID_DAG);
}
} catch (Exception e) {
throw new PowerJobException("you can't retry the workflow instance whose DAG is illegal!");
}
// 检查当前工作流信息
Optional<WorkflowInfoDO> workflowInfo = workflowInfoRepository.findById(wfInstance.getWorkflowId());
if (!workflowInfo.isPresent() || workflowInfo.get().getStatus() == SwitchableStatus.DISABLE.getV()) {
throw new PowerJobException("you can't retry the workflow instance whose metadata is unavailable!");
}
WorkflowDAGUtils.resetRetryableNode(dag);
wfInstance.setDag(JSON.toJSONString(dag));
// 更新工作流实例状态,不覆盖实际触发时间
wfInstance.setStatus(WorkflowInstanceStatus.WAITING.getV());
wfInstance.setGmtModified(new Date());
wfInstanceInfoRepository.saveAndFlush(wfInstance);
// 立即开始
workflowInstanceManager.start(workflowInfo.get(), wfInstanceId);
}
public WorkflowInstanceInfoDTO fetchWorkflowInstanceInfo(Long wfInstanceId, Long appId) {
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
WorkflowInstanceInfoDTO dto = new WorkflowInstanceInfoDTO();
BeanUtils.copyProperties(wfInstance, dto);
return dto;
}
public WorkflowInstanceInfoDO fetchWfInstance(Long wfInstanceId, Long appId) {
WorkflowInstanceInfoDO wfInstance = wfInstanceInfoRepository.findByWfInstanceId(wfInstanceId).orElseThrow(() -> new IllegalArgumentException("can't find workflow instance by wfInstanceId: " + wfInstanceId));
if (!Objects.equals(appId, wfInstance.getAppId())) {
throw new PowerJobException("Permission Denied!");
}
return wfInstance;
}
/**
* Add by Echo009 on 2021/02/20
* 将节点标记成功
* 注意:这里仅能标记真正执行失败的且不允许跳过的节点
* 即处于 [失败且不允许跳过] 的节点
* 而且仅会操作工作流实例 DAG 中的节点信息状态、result
* 并不会改变对应任务实例中的任何信息
* <p>
* 还是加把锁保平安 ~
*
* @param wfInstanceId 工作流实例 ID
* @param nodeId 节点 ID
*/
@DesignateServer
@UseCacheLock(type = "processWfInstance", key = "#wfInstanceId", concurrencyLevel = 1024)
public void markNodeAsSuccess(Long appId, Long wfInstanceId, Long nodeId) {
WorkflowInstanceInfoDO wfInstance = fetchWfInstance(wfInstanceId, appId);
// 校验工作流实例状态,运行中的不允许处理
if (WorkflowInstanceStatus.GENERALIZED_RUNNING_STATUS.contains(wfInstance.getStatus())) {
throw new PowerJobException("you can't mark the node in a running workflow!");
}
// 这里一定能反序列化成功
PEWorkflowDAG dag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
PEWorkflowDAG.Node targetNode = null;
for (PEWorkflowDAG.Node node : dag.getNodes()) {
if (node.getNodeId().equals(nodeId)) {
targetNode = node;
break;
}
}
if (targetNode == null) {
throw new PowerJobException("can't find the node in current DAG!");
}
boolean allowSkipWhenFailed = targetNode.getSkipWhenFailed() != null && targetNode.getSkipWhenFailed();
// 仅允许处理 执行失败的且不允许失败跳过的节点
if (targetNode.getInstanceId() != null
&& targetNode.getStatus() == InstanceStatus.FAILED.getV()
// 不允许失败跳过
&& !allowSkipWhenFailed) {
// 仅处理工作流实例中的节点信息
targetNode.setStatus(InstanceStatus.SUCCEED.getV())
.setResult(SystemInstanceResult.MARK_AS_SUCCESSFUL_NODE);
wfInstance.setDag(JSON.toJSONString(dag));
wfInstanceInfoRepository.saveAndFlush(wfInstance);
return;
}
// 其他情况均拒绝处理
throw new PowerJobException("you can only mark the node which is failed and not allow to skip!");
}
}

View File

@ -0,0 +1,365 @@
package tech.powerjob.server.core.workflow;
import com.alibaba.fastjson.JSON;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.LifeCycle;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.request.http.SaveWorkflowNodeRequest;
import tech.powerjob.common.request.http.SaveWorkflowRequest;
import tech.powerjob.server.common.SJ;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.timewheel.holder.InstanceTimeWheelService;
import tech.powerjob.server.core.scheduler.TimingStrategyService;
import tech.powerjob.server.core.service.NodeValidateService;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowNodeInfoDO;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowNodeInfoRepository;
import tech.powerjob.server.remote.server.redirector.DesignateServer;
import javax.annotation.Resource;
import javax.transaction.Transactional;
import java.util.*;
/**
* Workflow 服务
*
* @author tjq
* @author zenggonggu
* @author Echo009
* @since 2020/5/26
*/
@Slf4j
@Service
public class WorkflowService {
@Resource
private WorkflowInstanceManager workflowInstanceManager;
@Resource
private WorkflowInfoRepository workflowInfoRepository;
@Resource
private WorkflowNodeInfoRepository workflowNodeInfoRepository;
@Resource
private NodeValidateService nodeValidateService;
@Resource
private TimingStrategyService timingStrategyService;
/**
* 保存/修改工作流信息
* <p>
* 注意这里不会保存 DAG 信息
*
* @param req 请求
* @return 工作流ID
*/
@Transactional(rollbackOn = Exception.class)
public Long saveWorkflow(SaveWorkflowRequest req) {
req.valid();
Long wfId = req.getId();
WorkflowInfoDO wf;
if (wfId == null) {
wf = new WorkflowInfoDO();
wf.setGmtCreate(new Date());
} else {
Long finalWfId = wfId;
wf = workflowInfoRepository.findById(wfId).orElseThrow(() -> new IllegalArgumentException("can't find workflow by id:" + finalWfId));
}
BeanUtils.copyProperties(req, wf);
wf.setGmtModified(new Date());
wf.setStatus(req.isEnable() ? SwitchableStatus.ENABLE.getV() : SwitchableStatus.DISABLE.getV());
wf.setTimeExpressionType(req.getTimeExpressionType().getV());
if (req.getNotifyUserIds() != null) {
wf.setNotifyUserIds(SJ.COMMA_JOINER.join(req.getNotifyUserIds()));
}
if (req.getLifeCycle() != null) {
wf.setLifecycle(JSON.toJSONString(req.getLifeCycle()));
}
if (TimeExpressionType.FREQUENT_TYPES.contains(req.getTimeExpressionType().getV())) {
// 固定频率类型的任务不计算
wf.setTimeExpression(null);
} else {
LifeCycle lifeCycle = Optional.ofNullable(req.getLifeCycle()).orElse(LifeCycle.EMPTY_LIFE_CYCLE);
Long nextValidTime = timingStrategyService.calculateNextTriggerTimeWithInspection(TimeExpressionType.of(wf.getTimeExpressionType()), wf.getTimeExpression(), lifeCycle.getStart(), lifeCycle.getEnd());
wf.setNextTriggerTime(nextValidTime);
}
// 新增工作流,需要先 save 一下获取 ID
if (wfId == null) {
wf = workflowInfoRepository.saveAndFlush(wf);
wfId = wf.getId();
}
wf.setPeDAG(validateAndConvert2String(wfId, req.getDag()));
workflowInfoRepository.saveAndFlush(wf);
return wfId;
}
/**
* 保存 DAG 信息
* 这里会物理删除游离的节点信息
*/
private String validateAndConvert2String(Long wfId, PEWorkflowDAG dag) {
if (dag == null || !WorkflowDAGUtils.valid(dag)) {
throw new PowerJobException("illegal DAG");
}
// 注意这里只会保存图相关的基础信息nodeId,jobId,jobName(nodeAlias)
// 其中 jobIdjobName 均以节点中的信息为准
List<Long> nodeIdList = Lists.newArrayList();
List<PEWorkflowDAG.Node> newNodes = Lists.newArrayList();
WorkflowDAG complexDag = WorkflowDAGUtils.convert(dag);
for (PEWorkflowDAG.Node node : dag.getNodes()) {
WorkflowNodeInfoDO nodeInfo = workflowNodeInfoRepository.findById(node.getNodeId()).orElseThrow(() -> new PowerJobException("can't find node info by id :" + node.getNodeId()));
// 更新工作流 ID
if (nodeInfo.getWorkflowId() == null) {
nodeInfo.setWorkflowId(wfId);
nodeInfo.setGmtModified(new Date());
workflowNodeInfoRepository.saveAndFlush(nodeInfo);
}
if (!wfId.equals(nodeInfo.getWorkflowId())) {
throw new PowerJobException("can't use another workflow's node");
}
nodeValidateService.complexValidate(nodeInfo, complexDag);
// 只保存节点的 ID 信息,清空其他信息
newNodes.add(new PEWorkflowDAG.Node(node.getNodeId()));
nodeIdList.add(node.getNodeId());
}
dag.setNodes(newNodes);
int deleteCount = workflowNodeInfoRepository.deleteByWorkflowIdAndIdNotIn(wfId, nodeIdList);
log.warn("[WorkflowService-{}] delete {} dissociative nodes of workflow", wfId, deleteCount);
return JSON.toJSONString(dag);
}
/**
* 深度复制工作流
*
* @param wfId 工作流 ID
* @param appId APP ID
* @return 生成的工作流 ID
*/
@Transactional(rollbackOn = Exception.class)
public long copyWorkflow(Long wfId, Long appId) {
WorkflowInfoDO originWorkflow = permissionCheck(wfId, appId);
if (originWorkflow.getStatus() == SwitchableStatus.DELETED.getV()) {
throw new IllegalStateException("can't copy the workflow which has been deleted!");
}
// 拷贝基础信息
WorkflowInfoDO copyWorkflow = new WorkflowInfoDO();
BeanUtils.copyProperties(originWorkflow, copyWorkflow);
copyWorkflow.setId(null);
copyWorkflow.setGmtCreate(new Date());
copyWorkflow.setGmtModified(new Date());
copyWorkflow.setWfName(copyWorkflow.getWfName() + "_COPY");
// 先 save 获取 id
copyWorkflow = workflowInfoRepository.saveAndFlush(copyWorkflow);
if (StringUtils.isEmpty(copyWorkflow.getPeDAG())) {
return copyWorkflow.getId();
}
PEWorkflowDAG dag = JSON.parseObject(copyWorkflow.getPeDAG(), PEWorkflowDAG.class);
// 拷贝节点信息,并且更新 DAG 中的节点信息
if (!CollectionUtils.isEmpty(dag.getNodes())) {
// originNodeId => copyNodeId
HashMap<Long, Long> nodeIdMap = new HashMap<>(dag.getNodes().size(), 1);
// 校正 节点信息
for (PEWorkflowDAG.Node node : dag.getNodes()) {
WorkflowNodeInfoDO originNode = workflowNodeInfoRepository.findById(node.getNodeId()).orElseThrow(() -> new IllegalArgumentException("can't find workflow Node by id: " + node.getNodeId()));
WorkflowNodeInfoDO copyNode = new WorkflowNodeInfoDO();
BeanUtils.copyProperties(originNode, copyNode);
copyNode.setId(null);
copyNode.setWorkflowId(copyWorkflow.getId());
copyNode.setGmtCreate(new Date());
copyNode.setGmtModified(new Date());
copyNode = workflowNodeInfoRepository.saveAndFlush(copyNode);
nodeIdMap.put(originNode.getId(), copyNode.getId());
node.setNodeId(copyNode.getId());
}
// 校正 边信息
for (PEWorkflowDAG.Edge edge : dag.getEdges()) {
edge.setFrom(nodeIdMap.get(edge.getFrom()));
edge.setTo(nodeIdMap.get(edge.getTo()));
}
}
copyWorkflow.setPeDAG(JSON.toJSONString(dag));
workflowInfoRepository.saveAndFlush(copyWorkflow);
return copyWorkflow.getId();
}
/**
* 获取工作流元信息,这里获取到的 DAG 包含节点的完整信息(是否启用、是否允许失败跳过)
*
* @param wfId 工作流ID
* @param appId 应用ID
* @return 对外输出对象
*/
public WorkflowInfoDO fetchWorkflow(Long wfId, Long appId) {
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
fillWorkflow(wfInfo);
return wfInfo;
}
/**
* 删除工作流(软删除)
*
* @param wfId 工作流ID
* @param appId 所属应用ID
*/
public void deleteWorkflow(Long wfId, Long appId) {
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
wfInfo.setStatus(SwitchableStatus.DELETED.getV());
wfInfo.setGmtModified(new Date());
workflowInfoRepository.saveAndFlush(wfInfo);
}
/**
* 禁用工作流
*
* @param wfId 工作流ID
* @param appId 所属应用ID
*/
public void disableWorkflow(Long wfId, Long appId) {
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
wfInfo.setStatus(SwitchableStatus.DISABLE.getV());
wfInfo.setGmtModified(new Date());
workflowInfoRepository.saveAndFlush(wfInfo);
}
/**
* 启用工作流
*
* @param wfId 工作流ID
* @param appId 所属应用ID
*/
public void enableWorkflow(Long wfId, Long appId) {
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
wfInfo.setStatus(SwitchableStatus.ENABLE.getV());
wfInfo.setGmtModified(new Date());
workflowInfoRepository.saveAndFlush(wfInfo);
}
/**
* 立即运行工作流
*
* @param wfId 工作流ID
* @param appId 所属应用ID
* @param initParams 启动参数
* @param delay 延迟时间
* @return 该 workflow 实例的 instanceIdwfInstanceId
*/
@DesignateServer
public Long runWorkflow(Long wfId, Long appId, String initParams, Long delay) {
delay = delay == null ? 0 : delay;
WorkflowInfoDO wfInfo = permissionCheck(wfId, appId);
log.info("[WorkflowService-{}] try to run workflow, initParams={},delay={} ms.", wfInfo.getId(), initParams, delay);
Long wfInstanceId = workflowInstanceManager.create(wfInfo, initParams, System.currentTimeMillis() + delay, null);
if (delay <= 0) {
workflowInstanceManager.start(wfInfo, wfInstanceId);
} else {
InstanceTimeWheelService.schedule(wfInstanceId, delay, () -> workflowInstanceManager.start(wfInfo, wfInstanceId));
}
return wfInstanceId;
}
/**
* 保存工作流节点(新增 或者 保存)
*
* @param workflowNodeRequestList 工作流节点
* @return 更新 或者 创建后的工作流节点信息
*/
@Transactional(rollbackOn = Exception.class)
public List<WorkflowNodeInfoDO> saveWorkflowNode(List<SaveWorkflowNodeRequest> workflowNodeRequestList) {
if (CollectionUtils.isEmpty(workflowNodeRequestList)) {
return Collections.emptyList();
}
final Long appId = workflowNodeRequestList.get(0).getAppId();
List<WorkflowNodeInfoDO> res = new ArrayList<>(workflowNodeRequestList.size());
for (SaveWorkflowNodeRequest req : workflowNodeRequestList) {
req.valid();
// 必须位于同一个 APP 下
if (!appId.equals(req.getAppId())) {
throw new PowerJobException("node list must are in the same app");
}
WorkflowNodeInfoDO workflowNodeInfo;
if (req.getId() != null) {
workflowNodeInfo = workflowNodeInfoRepository.findById(req.getId()).orElseThrow(() -> new IllegalArgumentException("can't find workflow Node by id: " + req.getId()));
} else {
workflowNodeInfo = new WorkflowNodeInfoDO();
workflowNodeInfo.setGmtCreate(new Date());
}
BeanUtils.copyProperties(req, workflowNodeInfo);
workflowNodeInfo.setType(req.getType());
nodeValidateService.simpleValidate(workflowNodeInfo);
workflowNodeInfo.setGmtModified(new Date());
workflowNodeInfo = workflowNodeInfoRepository.saveAndFlush(workflowNodeInfo);
res.add(workflowNodeInfo);
}
return res;
}
private void fillWorkflow(WorkflowInfoDO wfInfo) {
PEWorkflowDAG dagInfo = null;
try {
dagInfo = JSON.parseObject(wfInfo.getPeDAG(), PEWorkflowDAG.class);
} catch (Exception e) {
log.warn("[WorkflowService-{}]illegal DAG : {}", wfInfo.getId(), wfInfo.getPeDAG());
}
if (dagInfo == null) {
return;
}
Map<Long, WorkflowNodeInfoDO> nodeIdNodInfoMap = Maps.newHashMap();
workflowNodeInfoRepository.findByWorkflowId(wfInfo.getId()).forEach(
e -> nodeIdNodInfoMap.put(e.getId(), e)
);
// 填充节点信息
if (!CollectionUtils.isEmpty(dagInfo.getNodes())) {
for (PEWorkflowDAG.Node node : dagInfo.getNodes()) {
WorkflowNodeInfoDO nodeInfo = nodeIdNodInfoMap.get(node.getNodeId());
if (nodeInfo != null) {
node.setNodeType(nodeInfo.getType())
.setJobId(nodeInfo.getJobId())
.setEnable(nodeInfo.getEnable())
.setSkipWhenFailed(nodeInfo.getSkipWhenFailed())
.setNodeName(nodeInfo.getNodeName())
.setNodeParams(nodeInfo.getNodeParams());
}
}
}
wfInfo.setPeDAG(JSON.toJSONString(dagInfo));
}
private WorkflowInfoDO permissionCheck(Long wfId, Long appId) {
WorkflowInfoDO wfInfo = workflowInfoRepository.findById(wfId).orElseThrow(() -> new IllegalArgumentException("can't find workflow by id: " + wfId));
if (!wfInfo.getAppId().equals(appId)) {
throw new PowerJobException("Permission Denied! can't operate other app's workflow!");
}
return wfInfo;
}
}

View File

@ -0,0 +1,81 @@
package tech.powerjob.server.core.workflow.algorithm;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import lombok.*;
import tech.powerjob.common.model.PEWorkflowDAG;
import java.util.List;
import java.util.Map;
/**
* DAG 工作流对象
* 节点中均记录了上游以及下游的连接关系(无法使用 JSON 来序列化以及反序列化)
*
* @author tjq
* @author Echo009
* @since 2020/5/26
*/
@Data
@ToString(exclude = {"nodeMap"})
@NoArgsConstructor
@AllArgsConstructor
public class WorkflowDAG {
/**
* DAG允许存在多个顶点
*/
private List<Node> roots;
private Map<Long, Node> nodeMap;
public Node getNode(Long nodeId) {
if (nodeMap == null) {
return null;
}
return nodeMap.get(nodeId);
}
@Getter
@Setter
@EqualsAndHashCode(exclude = {"dependencies", "dependenceEdgeMap", "successorEdgeMap", "holder","successors"})
@ToString(exclude = {"dependencies", "dependenceEdgeMap", "successorEdgeMap", "holder"})
@NoArgsConstructor
public static final class Node {
public Node(PEWorkflowDAG.Node node) {
this.nodeId = node.getNodeId();
this.holder = node;
this.dependencies = Lists.newLinkedList();
this.dependenceEdgeMap = Maps.newHashMap();
this.successors = Lists.newLinkedList();
this.successorEdgeMap = Maps.newHashMap();
}
/**
* node id
*
* @since 20210128
*/
private Long nodeId;
private PEWorkflowDAG.Node holder;
/**
* 依赖的上游节点
*/
private List<Node> dependencies;
/**
* 连接依赖节点的边
*/
private Map<Node, PEWorkflowDAG.Edge> dependenceEdgeMap;
/**
* 后继者,子节点
*/
private List<Node> successors;
/**
* 连接后继节点的边
*/
private Map<Node, PEWorkflowDAG.Edge> successorEdgeMap;
}
}

View File

@ -0,0 +1,340 @@
package tech.powerjob.server.core.workflow.algorithm;
import com.google.common.collect.*;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.serialize.JsonUtils;
import java.util.*;
/**
* DAG 工具类
*
* @author tjq
* @author Echo009
* @since 2020/5/26
*/
public class WorkflowDAGUtils {
private WorkflowDAGUtils() {
}
/**
* 重置可重试节点的状态信息
* @param dag 合法的有向无环图
*/
public static void resetRetryableNode(PEWorkflowDAG dag){
// 将需要重试的节点状态重置(失败且不允许跳过的 或者 手动终止的)
for (PEWorkflowDAG.Node node : dag.getNodes()) {
boolean realFailed = node.getStatus() == InstanceStatus.FAILED.getV() && isNotAllowSkipWhenFailed(node);
if (realFailed || node.getStatus() == InstanceStatus.STOPPED.getV()) {
node.setStatus(InstanceStatus.WAITING_DISPATCH.getV());
// 仅重置任务节点的实例 id 信息
if (node.getNodeType() == null || node.getNodeType() == WorkflowNodeType.JOB.getCode()){
node.setInstanceId(null);
}
}
}
}
/**
* 获取所有根节点
*
* @param peWorkflowDAG 点线表示法的DAG图
* @return 根节点列表
*/
public static List<PEWorkflowDAG.Node> listRoots(PEWorkflowDAG peWorkflowDAG) {
Map<Long, PEWorkflowDAG.Node> nodeId2Node = Maps.newHashMap();
peWorkflowDAG.getNodes().forEach(node -> nodeId2Node.put(node.getNodeId(), node));
peWorkflowDAG.getEdges().forEach(edge -> nodeId2Node.remove(edge.getTo()));
return Lists.newLinkedList(nodeId2Node.values());
}
/**
* 校验 DAG 是否有效
*
* @param peWorkflowDAG 点线表示法的 DAG 图
* @return true/false
*/
public static boolean valid(PEWorkflowDAG peWorkflowDAG) {
// 校验节点 ID 是否重复
Set<Long> nodeIds = Sets.newHashSet();
// 不允许为空
if (peWorkflowDAG.getNodes() == null || peWorkflowDAG.getNodes().isEmpty()){
return false;
}
for (PEWorkflowDAG.Node n : peWorkflowDAG.getNodes()) {
if (nodeIds.contains(n.getNodeId())) {
return false;
}
nodeIds.add(n.getNodeId());
}
try {
// 记录遍历过的所有节点 ID
HashSet<Long> traversalNodeIds = Sets.newHashSet();
WorkflowDAG dag = convert(peWorkflowDAG);
// 检查所有顶点的路径
for (WorkflowDAG.Node root : dag.getRoots()) {
if (invalidPath(root, Sets.newHashSet(), traversalNodeIds)) {
return false;
}
}
// 理论上应该遍历过图中的所有节点,如果不相等则说明有环 (孤立的环)
return traversalNodeIds.size() == nodeIds.size();
} catch (Exception ignore) {
// ignore
}
return false;
}
/**
* Add by Echo009 on 2021/02/08
* 获取准备好的节点(非完成状态的节点且,前置依赖节点为空或者均处于完成状态)
* 注意,这里会直接将当前 disableenable = false的节点的状态置为完成
*
* @param dag 点线表示法的DAG图
* @return 当前可执行的节点
*/
public static List<PEWorkflowDAG.Node> listReadyNodes(PEWorkflowDAG dag) {
// 保存 nodeId -> Node 的映射关系
Map<Long, PEWorkflowDAG.Node> nodeId2Node = Maps.newHashMap();
List<PEWorkflowDAG.Node> dagNodes = dag.getNodes();
for (PEWorkflowDAG.Node node : dagNodes) {
nodeId2Node.put(node.getNodeId(), node);
}
// 构建依赖树(下游任务需要哪些上游任务完成才能执行)
Multimap<Long, Long> relyMap = LinkedListMultimap.create();
// 后继节点 Map
Multimap<Long, Long> successorMap = LinkedListMultimap.create();
dag.getEdges().forEach(edge -> {
relyMap.put(edge.getTo(), edge.getFrom());
successorMap.put(edge.getFrom(), edge.getTo());
});
List<PEWorkflowDAG.Node> readyNodes = Lists.newArrayList();
List<PEWorkflowDAG.Node> skipNodes = Lists.newArrayList();
for (PEWorkflowDAG.Node currentNode : dagNodes) {
if (!isReadyNode(currentNode.getNodeId(), nodeId2Node, relyMap)) {
continue;
}
// 需要直接跳过的节点
if (currentNode.getEnable() != null && !currentNode.getEnable()) {
skipNodes.add(currentNode);
} else {
readyNodes.add(currentNode);
}
}
// 当前直接跳过的节点不为空
if (!skipNodes.isEmpty()) {
for (PEWorkflowDAG.Node skipNode : skipNodes) {
// move
readyNodes.addAll(moveAndObtainReadySuccessor(skipNode, nodeId2Node, relyMap, successorMap));
}
}
return readyNodes;
}
/**
* 移动并获取就绪的后继节点
*
* @param skippedNode 当前需要跳过的节点
* @param nodeId2Node nodeId -> Node
* @param relyMap to-node id -> list of from-node id
* @param successorMap from-node id -> list of to-node id
* @return 就绪的后继节点
*/
private static List<PEWorkflowDAG.Node> moveAndObtainReadySuccessor(PEWorkflowDAG.Node skippedNode, Map<Long, PEWorkflowDAG.Node> nodeId2Node, Multimap<Long, Long> relyMap, Multimap<Long, Long> successorMap) {
// 更新当前跳过节点的状态
skippedNode.setStatus(InstanceStatus.SUCCEED.getV());
skippedNode.setResult(SystemInstanceResult.DISABLE_NODE);
// 有可能出现需要连续移动的情况
List<PEWorkflowDAG.Node> readyNodes = Lists.newArrayList();
List<PEWorkflowDAG.Node> skipNodes = Lists.newArrayList();
// 获取当前跳过节点的后继节点
Collection<Long> successors = successorMap.get(skippedNode.getNodeId());
for (Long successor : successors) {
// 判断后继节点是否处于 Ready 状态(前驱节点均处于完成状态)
if (isReadyNode(successor, nodeId2Node, relyMap)) {
PEWorkflowDAG.Node node = nodeId2Node.get(successor);
if (node.getEnable() != null && !node.getEnable()) {
// 需要跳过
skipNodes.add(node);
continue;
}
readyNodes.add(node);
}
}
// 深度优先,继续移动
if (!skipNodes.isEmpty()) {
for (PEWorkflowDAG.Node node : skipNodes) {
readyNodes.addAll(moveAndObtainReadySuccessor(node, nodeId2Node, relyMap, successorMap));
}
}
return readyNodes;
}
/**
* 判断当前节点是否准备就绪
*
* @param nodeId Node id
* @param nodeId2Node Node id -> Node
* @param relyMap to-node id -> list of from-node id
* @return true if current node is ready
*/
private static boolean isReadyNode(long nodeId, Map<Long, PEWorkflowDAG.Node> nodeId2Node, Multimap<Long, Long> relyMap) {
PEWorkflowDAG.Node currentNode = nodeId2Node.get(nodeId);
int currentNodeStatus = currentNode.getStatus() == null ? InstanceStatus.WAITING_DISPATCH.getV() : currentNode.getStatus();
// 跳过已完成节点(处理成功 或者 处理失败)和已派发节点( 状态为运行中
if (InstanceStatus.FINISHED_STATUS.contains(currentNodeStatus)
|| currentNodeStatus == InstanceStatus.RUNNING.getV()) {
return false;
}
Collection<Long> relyNodeIds = relyMap.get(nodeId);
for (Long relyNodeId : relyNodeIds) {
PEWorkflowDAG.Node relyNode = nodeId2Node.get(relyNodeId);
int relyNodeStatus = relyNode.getStatus() == null ? InstanceStatus.WAITING_DISPATCH.getV() : relyNode.getStatus();
// 只要依赖的节点有一个未完成,那么就不是就绪状态
// 注意,这里允许失败的原因是有允许失败跳过节点的存在,对于不允许跳过的失败节点,一定走不到这里(工作流会被打断)
if (InstanceStatus.GENERALIZED_RUNNING_STATUS.contains(relyNodeStatus)) {
return false;
}
}
return true;
}
public static boolean isNotAllowSkipWhenFailed(PEWorkflowDAG.Node node) {
// 默认不允许跳过
return node.getSkipWhenFailed() == null || !node.getSkipWhenFailed();
}
/**
* 处理被 disable 掉的边
* 1. 将仅能通过被 disable 掉的边可达的节点标记为 disabledisableByControlNode将状态更新为已取消
* 2. 将这些被 disable 掉的节点的出口边都标记为 disable
* 3. 递归调用自身,继续处理被 disable 的边
*/
@SuppressWarnings("squid:S3776")
public static void handleDisableEdges(List<PEWorkflowDAG.Edge> disableEdges, WorkflowDAG dag) {
if (disableEdges.isEmpty()) {
return;
}
List<PEWorkflowDAG.Node> disableNodes = Lists.newArrayList();
// 处理边上的节点,如果该节点仅能通过被 disable 掉的边可达,那么将该节点标记为 disable disableByControlNode ,并且将状态更新为 已取消
for (PEWorkflowDAG.Edge disableEdge : disableEdges) {
WorkflowDAG.Node toNode = dag.getNode(disableEdge.getTo());
// 判断是否仅能通过被 disable 掉的边可达
Collection<PEWorkflowDAG.Edge> dependenceEdges = toNode.getDependenceEdgeMap().values();
boolean shouldBeDisable = true;
for (PEWorkflowDAG.Edge dependenceEdge : dependenceEdges) {
if (dependenceEdge.getEnable() == null || dependenceEdge.getEnable()) {
shouldBeDisable = false;
break;
}
}
if (shouldBeDisable) {
// disable
PEWorkflowDAG.Node node = toNode.getHolder();
node.setEnable(false)
.setDisableByControlNode(true)
.setStatus(InstanceStatus.CANCELED.getV());
disableNodes.add(node);
}
}
if (!disableNodes.isEmpty()) {
// 被 disable 掉的节点的出口边都会被标记为 disable
List<PEWorkflowDAG.Edge> targetEdges = Lists.newArrayList();
for (PEWorkflowDAG.Node disableNode : disableNodes) {
WorkflowDAG.Node node = dag.getNode(disableNode.getNodeId());
Collection<PEWorkflowDAG.Edge> edges = node.getSuccessorEdgeMap().values();
for (PEWorkflowDAG.Edge edge : edges) {
edge.setEnable(false);
targetEdges.add(edge);
}
}
// 广度优先 继续处理被 disable 掉的边
handleDisableEdges(targetEdges, dag);
}
}
/**
* 将点线表示法的DAG图转化为引用表达法的DAG图
*
* @param peWorkflowDAG 点线表示法的DAG图
* @return 引用表示法的DAG图
*/
public static WorkflowDAG convert(PEWorkflowDAG peWorkflowDAG) {
Set<Long> rootIds = Sets.newHashSet();
Map<Long, WorkflowDAG.Node> id2Node = Maps.newHashMap();
if (peWorkflowDAG.getNodes() == null || peWorkflowDAG.getNodes().isEmpty()) {
throw new PowerJobException("empty graph");
}
// 创建节点
peWorkflowDAG.getNodes().forEach(node -> {
Long nodeId = node.getNodeId();
WorkflowDAG.Node n = new WorkflowDAG.Node(node);
id2Node.put(nodeId, n);
// 初始阶段,每一个点都设为顶点
rootIds.add(nodeId);
});
// 连接图像
peWorkflowDAG.getEdges().forEach(edge -> {
WorkflowDAG.Node from = id2Node.get(edge.getFrom());
WorkflowDAG.Node to = id2Node.get(edge.getTo());
if (from == null || to == null) {
throw new PowerJobException("Illegal Edge: " + JsonUtils.toJSONString(edge));
}
from.getSuccessors().add(to);
from.getSuccessorEdgeMap().put(to, edge);
to.getDependencies().add(from);
to.getDependenceEdgeMap().put(from, edge);
// 被连接的点不可能成为 root移除
rootIds.remove(to.getNodeId());
});
// 合法性校验(至少存在一个顶点)
if (rootIds.isEmpty()) {
throw new PowerJobException("Illegal DAG: " + JsonUtils.toJSONString(peWorkflowDAG));
}
List<WorkflowDAG.Node> roots = Lists.newLinkedList();
rootIds.forEach(id -> roots.add(id2Node.get(id)));
return new WorkflowDAG(roots, id2Node);
}
private static boolean invalidPath(WorkflowDAG.Node root, Set<Long> ids, Set<Long> nodeIdContainer) {
// 递归出口(出现之前的节点则代表有环,失败;出现无后继者节点,则说明该路径成功)
if (ids.contains(root.getNodeId())) {
return true;
}
nodeIdContainer.add(root.getNodeId());
if (root.getSuccessors().isEmpty()) {
return false;
}
ids.add(root.getNodeId());
for (WorkflowDAG.Node node : root.getSuccessors()) {
if (invalidPath(node, Sets.newHashSet(ids), nodeIdContainer)) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,22 @@
package tech.powerjob.server.core.workflow.hanlder;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
/**
* @author Echo009
* @since 2021/12/9
*/
public interface ControlNodeHandler extends WorkflowNodeHandlerMarker {
/**
* 处理控制节点
*
* @param node 需要被处理的目标节点
* @param dag 节点所属 DAG
* @param wfInstanceInfo 节点所属工作流实例
*/
void handle(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo);
}

View File

@ -0,0 +1,29 @@
package tech.powerjob.server.core.workflow.hanlder;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
/**
* @author Echo009
* @since 2021/12/9
*/
public interface TaskNodeHandler extends WorkflowNodeHandlerMarker {
/**
* 创建任务实例
*
* @param node 目标节点
* @param dag DAG
* @param wfInstanceInfo 工作流实例
*/
void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo);
/**
* 执行任务实例
*
* @param node 目标节点
*/
void startTaskInstance(PEWorkflowDAG.Node node);
}

View File

@ -0,0 +1,20 @@
package tech.powerjob.server.core.workflow.hanlder;
import tech.powerjob.common.enums.WorkflowNodeType;
/**
* @author Echo009
* @since 2021/12/9
*/
public interface WorkflowNodeHandlerMarker {
/**
* 返回能够处理的节点类型
* @return matching node type
*/
WorkflowNodeType matchingType();
}

View File

@ -0,0 +1,98 @@
package tech.powerjob.server.core.workflow.hanlder.impl;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.serialize.JsonUtils;
import tech.powerjob.server.core.evaluator.GroovyEvaluator;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAG;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
import tech.powerjob.server.core.workflow.hanlder.ControlNodeHandler;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
import java.util.*;
/**
* @author Echo009
* @since 2021/12/9
*/
@Slf4j
@Component
public class DecisionNodeHandler implements ControlNodeHandler {
private final GroovyEvaluator groovyEvaluator = new GroovyEvaluator();
/**
* 处理判断节点
* 1. 执行脚本
* 2. 根据返回值 disable 掉相应的边以及节点
*/
@Override
public void handle(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
String script = node.getNodeParams();
if (StringUtils.isBlank(script)) {
log.error("[Workflow-{}|{}]decision node's param is blank! nodeId:{}", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId());
throw new PowerJobException("decision node's param is blank!");
}
// wfContext must be a map
HashMap<String, String> wfContext = JSON.parseObject(wfInstanceInfo.getWfContext(), new TypeReference<HashMap<String, String>>() {
});
Object result;
try {
result = groovyEvaluator.evaluate(script, wfContext);
} catch (Exception e) {
log.error("[Workflow-{}|{}]failed to evaluate decision node,nodeId:{}", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), e);
throw new PowerJobException("can't evaluate decision node!");
}
boolean finalRes;
if (result instanceof Boolean) {
finalRes = ((Boolean) result);
} else if (result instanceof Number) {
finalRes = ((Number) result).doubleValue() > 0;
} else {
log.error("[Workflow-{}|{}]decision node's return value is illegal,nodeId:{},result:{}", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), JsonUtils.toJSONString(result));
throw new PowerJobException("decision node's return value is illegal!");
}
handleDag(finalRes, node, dag);
}
private void handleDag(boolean res, PEWorkflowDAG.Node node, PEWorkflowDAG peDag) {
// 更新判断节点的状态为成功
node.setResult(String.valueOf(res));
node.setStatus(InstanceStatus.SUCCEED.getV());
WorkflowDAG dag = WorkflowDAGUtils.convert(peDag);
// 根据节点的计算结果,将相应的边 disable
WorkflowDAG.Node targetNode = dag.getNode(node.getNodeId());
Collection<PEWorkflowDAG.Edge> edges = targetNode.getSuccessorEdgeMap().values();
if (edges.isEmpty()) {
return;
}
List<PEWorkflowDAG.Edge> disableEdges = new ArrayList<>(edges.size());
for (PEWorkflowDAG.Edge edge : edges) {
// 这里一定不会出现异常
boolean property = Boolean.parseBoolean(edge.getProperty());
if (res != property) {
// disable
edge.setEnable(false);
disableEdges.add(edge);
}
}
WorkflowDAGUtils.handleDisableEdges(disableEdges,dag);
}
@Override
public WorkflowNodeType matchingType() {
return WorkflowNodeType.DECISION;
}
}

View File

@ -0,0 +1,52 @@
package tech.powerjob.server.core.workflow.hanlder.impl;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.TimeExpressionType;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.server.common.utils.SpringUtils;
import tech.powerjob.server.core.DispatchService;
import tech.powerjob.server.core.instance.InstanceService;
import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler;
import tech.powerjob.server.persistence.remote.model.JobInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
import tech.powerjob.server.persistence.remote.repository.JobInfoRepository;
import java.util.Optional;
/**
* @author Echo009
* @since 2021/12/9
*/
@Slf4j
@Component
@RequiredArgsConstructor
public class JobNodeHandler implements TaskNodeHandler {
private final JobInfoRepository jobInfoRepository;
@Override
public void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
// instanceParam 传递的是工作流实例的 wfContext
Long instanceId = SpringUtils.getBean(InstanceService.class).create(node.getJobId(), wfInstanceInfo.getAppId(), node.getNodeParams(), wfInstanceInfo.getWfContext(), wfInstanceInfo.getWfInstanceId(), System.currentTimeMillis(), null, null).getInstanceId();
node.setInstanceId(instanceId);
node.setStatus(InstanceStatus.RUNNING.getV());
log.info("[Workflow-{}|{}] create readyNode(JOB) instance(nodeId={},jobId={},instanceId={}) successfully~", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId(), instanceId);
}
@Override
public void startTaskInstance(PEWorkflowDAG.Node node) {
JobInfoDO jobInfo = jobInfoRepository.findById(node.getJobId()).orElseGet(JobInfoDO::new);
// 洗去时间表达式类型
jobInfo.setTimeExpressionType(TimeExpressionType.WORKFLOW.getV());
SpringUtils.getBean(DispatchService.class).dispatch(jobInfo, node.getInstanceId(), Optional.empty(), Optional.empty());
}
@Override
public WorkflowNodeType matchingType() {
return WorkflowNodeType.JOB;
}
}

View File

@ -0,0 +1,96 @@
package tech.powerjob.server.core.workflow.hanlder.impl;
import com.alibaba.fastjson.JSON;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import tech.powerjob.common.SystemInstanceResult;
import tech.powerjob.common.enums.InstanceStatus;
import tech.powerjob.common.enums.WorkflowInstanceStatus;
import tech.powerjob.common.enums.WorkflowNodeType;
import tech.powerjob.common.exception.PowerJobException;
import tech.powerjob.common.model.PEWorkflowDAG;
import tech.powerjob.common.utils.CommonUtils;
import tech.powerjob.common.enums.SwitchableStatus;
import tech.powerjob.server.common.utils.SpringUtils;
import tech.powerjob.server.core.workflow.WorkflowInstanceManager;
import tech.powerjob.server.core.workflow.algorithm.WorkflowDAGUtils;
import tech.powerjob.server.core.workflow.hanlder.TaskNodeHandler;
import tech.powerjob.server.persistence.remote.model.WorkflowInfoDO;
import tech.powerjob.server.persistence.remote.model.WorkflowInstanceInfoDO;
import tech.powerjob.server.persistence.remote.repository.WorkflowInfoRepository;
import tech.powerjob.server.persistence.remote.repository.WorkflowInstanceInfoRepository;
import java.util.Date;
/**
* @author Echo009
* @since 2021/12/13
*/
@Component
@Slf4j
@RequiredArgsConstructor
public class NestedWorkflowNodeHandler implements TaskNodeHandler {
private final WorkflowInfoRepository workflowInfoRepository;
private final WorkflowInstanceInfoRepository workflowInstanceInfoRepository;
@Override
public void createTaskInstance(PEWorkflowDAG.Node node, PEWorkflowDAG dag, WorkflowInstanceInfoDO wfInstanceInfo) {
// check
Long wfId = node.getJobId();
WorkflowInfoDO targetWf = workflowInfoRepository.findById(wfId).orElse(null);
if (targetWf == null || targetWf.getStatus() == SwitchableStatus.DELETED.getV()) {
if (targetWf == null) {
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow({}) is not exist!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId());
} else {
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow({}) has been deleted!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getJobId());
}
throw new PowerJobException("invalid nested workflow node," + node.getNodeId());
}
if (node.getInstanceId() != null) {
// 处理重试的情形,不需要创建实例,仅需要更改对应实例的状态,以及相应的节点状态
WorkflowInstanceInfoDO wfInstance = workflowInstanceInfoRepository.findByWfInstanceId(node.getInstanceId()).orElse(null);
if (wfInstance == null) {
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow instance({}) is not exist!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getInstanceId());
throw new PowerJobException("invalid nested workflow instance id " + node.getInstanceId());
}
// 不用考虑状态,只有失败的工作流嵌套节点状态会被重置
// 需要将子工作流中失败的节点状态重置为 等待 派发
try {
PEWorkflowDAG nodeDag = JSON.parseObject(wfInstance.getDag(), PEWorkflowDAG.class);
if (!WorkflowDAGUtils.valid(nodeDag)) {
throw new PowerJobException(SystemInstanceResult.INVALID_DAG);
}
WorkflowDAGUtils.resetRetryableNode(nodeDag);
wfInstance.setDag(JSON.toJSONString(nodeDag));
wfInstance.setStatus(WorkflowInstanceStatus.WAITING.getV());
wfInstance.setGmtModified(new Date());
workflowInstanceInfoRepository.saveAndFlush(wfInstance);
} catch (Exception e) {
log.error("[Workflow-{}|{}] invalid nested workflow node({}),target workflow instance({})'s DAG is illegal!", wfInstanceInfo.getWorkflowId(), wfInstanceInfo.getWfInstanceId(), node.getNodeId(), node.getInstanceId(),e);
throw new PowerJobException("illegal nested workflow instance, id : "+ node.getInstanceId());
}
} else {
// 透传当前的上下文创建新的工作流实例
String wfContext = wfInstanceInfo.getWfContext();
Long instanceId = SpringUtils.getBean(WorkflowInstanceManager.class).create(targetWf, wfContext, System.currentTimeMillis(), wfInstanceInfo.getWfInstanceId());
node.setInstanceId(instanceId);
}
node.setStartTime(CommonUtils.formatTime(System.currentTimeMillis()));
node.setStatus(InstanceStatus.RUNNING.getV());
}
@Override
public void startTaskInstance(PEWorkflowDAG.Node node) {
Long wfId = node.getJobId();
WorkflowInfoDO targetWf = workflowInfoRepository.findById(wfId).orElse(null);
SpringUtils.getBean(WorkflowInstanceManager.class).start(targetWf, node.getInstanceId());
}
@Override
public WorkflowNodeType matchingType() {
return WorkflowNodeType.NESTED_WORKFLOW;
}
}