|
|
@@ -0,0 +1,322 @@
|
|
|
+package com.inspur.device.service.impl;
|
|
|
+
|
|
|
+import cn.hutool.http.HttpRequest;
|
|
|
+import cn.hutool.http.HttpResponse;
|
|
|
+import cn.hutool.json.JSONArray;
|
|
|
+import cn.hutool.json.JSONObject;
|
|
|
+import cn.hutool.json.JSONUtil;
|
|
|
+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
|
|
+import com.baomidou.mybatisplus.core.toolkit.Wrappers;
|
|
|
+import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
|
|
+import com.inspur.device.domain.SmsbDifyDatasetsFile;
|
|
|
+import com.inspur.device.domain.bo.SmsbDifyDatasetsFileBo;
|
|
|
+import com.inspur.device.domain.vo.DifyDatasetsFileRspData;
|
|
|
+import com.inspur.device.domain.vo.SmsbDifyDatasetsFileVo;
|
|
|
+import com.inspur.device.domain.vo.SmsbDifyDatasetsVo;
|
|
|
+import com.inspur.device.mapper.SmsbDifyDatasetsFileMapper;
|
|
|
+import com.inspur.device.mapper.SmsbDifyDatasetsMapper;
|
|
|
+import com.inspur.device.service.ISmsbDifyDatasetsFileService;
|
|
|
+import lombok.RequiredArgsConstructor;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.dromara.common.core.utils.MapstructUtils;
|
|
|
+import org.dromara.common.core.utils.StringUtils;
|
|
|
+import org.dromara.common.mybatis.core.page.PageQuery;
|
|
|
+import org.dromara.common.mybatis.core.page.TableDataInfo;
|
|
|
+import org.dromara.system.domain.vo.SysOssVo;
|
|
|
+import org.dromara.system.mapper.SysOssMapper;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+
|
|
|
+import java.io.File;
|
|
|
+import java.io.IOException;
|
|
|
+import java.io.InputStream;
|
|
|
+import java.net.URL;
|
|
|
+import java.nio.file.Files;
|
|
|
+import java.nio.file.Path;
|
|
|
+import java.nio.file.Paths;
|
|
|
+import java.nio.file.StandardCopyOption;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Collection;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+
|
|
|
+/**
|
|
|
+ * Dify知识库文件Service业务层处理
|
|
|
+ *
|
|
|
+ * @author Lion Li
|
|
|
+ * @date 2025-05-14
|
|
|
+ */
|
|
|
+@RequiredArgsConstructor
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class SmsbDifyDatasetsFileServiceImpl implements ISmsbDifyDatasetsFileService {
|
|
|
+
|
|
|
+ private final SmsbDifyDatasetsFileMapper baseMapper;
|
|
|
+
|
|
|
+ private final SmsbDifyDatasetsMapper smsbDifyDatasetsMapper;
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private SysOssMapper sysOssMapper;
|
|
|
+
|
|
|
+ @Value("${server.tempDir}")
|
|
|
+ private String tempDir;
|
|
|
+
|
|
|
+ @Value("${dify.url}")
|
|
|
+ private String difyUrl;
|
|
|
+
|
|
|
+ @Value("${dify.datasets.apiKey}")
|
|
|
+ private String datasetsApiKey;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 知识库API
|
|
|
+ */
|
|
|
+ private final static String API_DATASETS_COMMON = "/v1/datasets";
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 查询Dify知识库文件
|
|
|
+ *
|
|
|
+ * @param id 主键
|
|
|
+ * @return Dify知识库文件
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public SmsbDifyDatasetsFileVo queryById(Long id) {
|
|
|
+ return baseMapper.selectVoById(id);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 分页查询Dify知识库文件列表
|
|
|
+ *
|
|
|
+ * @param bo 查询条件
|
|
|
+ * @param pageQuery 分页参数
|
|
|
+ * @return Dify知识库文件分页列表
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public TableDataInfo<SmsbDifyDatasetsFileVo> queryPageList(SmsbDifyDatasetsFileBo bo, PageQuery pageQuery) {
|
|
|
+ LambdaQueryWrapper<SmsbDifyDatasetsFile> lqw = buildQueryWrapper(bo);
|
|
|
+ Page<SmsbDifyDatasetsFileVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
|
|
|
+ return TableDataInfo.build(result);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 查询符合条件的Dify知识库文件列表
|
|
|
+ *
|
|
|
+ * @param bo 查询条件
|
|
|
+ * @return Dify知识库文件列表
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public List<SmsbDifyDatasetsFileVo> queryList(SmsbDifyDatasetsFileBo bo) {
|
|
|
+ LambdaQueryWrapper<SmsbDifyDatasetsFile> lqw = buildQueryWrapper(bo);
|
|
|
+ return baseMapper.selectVoList(lqw);
|
|
|
+ }
|
|
|
+
|
|
|
+ private LambdaQueryWrapper<SmsbDifyDatasetsFile> buildQueryWrapper(SmsbDifyDatasetsFileBo bo) {
|
|
|
+ Map<String, Object> params = bo.getParams();
|
|
|
+ LambdaQueryWrapper<SmsbDifyDatasetsFile> lqw = Wrappers.lambdaQuery();
|
|
|
+ lqw.like(StringUtils.isNotBlank(bo.getName()), SmsbDifyDatasetsFile::getName, bo.getName());
|
|
|
+ lqw.eq(StringUtils.isNotBlank(bo.getDatasetDifyId()), SmsbDifyDatasetsFile::getDatasetsDifyId, bo.getDatasetDifyId());
|
|
|
+ return lqw;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 新增Dify知识库文件
|
|
|
+ *
|
|
|
+ * @param bo Dify知识库文件
|
|
|
+ * @return 是否新增成功
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public Boolean insertByBo(SmsbDifyDatasetsFileBo bo) {
|
|
|
+ SmsbDifyDatasetsFile add = new SmsbDifyDatasetsFile();
|
|
|
+ add.setOssId(bo.getOssId());
|
|
|
+ add.setDatasetsDifyId(bo.getDatasetsId());
|
|
|
+ // 1、 获取oss info,将文件保存至临时路径
|
|
|
+ Long ossId = add.getOssId();
|
|
|
+ SysOssVo sysOssVo = sysOssMapper.selectVoById(ossId);
|
|
|
+ String tempLocalPath = "";
|
|
|
+ try {
|
|
|
+ tempLocalPath = saveToLocal(sysOssVo);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("save file to local error: {}", e.getMessage());
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ // 2、 调用dify接口,将文件上传至dify
|
|
|
+ String requestUrl = difyUrl + API_DATASETS_COMMON + "/" + add.getDatasetsDifyId() + "/document/create-by-file";
|
|
|
+ String requestBody = createAddFileRequestBody();
|
|
|
+
|
|
|
+ // 发送请求
|
|
|
+ HttpResponse response = HttpRequest.post(requestUrl)
|
|
|
+ .header("Authorization", "Bearer " + datasetsApiKey)
|
|
|
+ .form("data", requestBody)
|
|
|
+ .form("file", new File(tempLocalPath))
|
|
|
+ .execute();
|
|
|
+ // 处理响应
|
|
|
+ if (!response.isOk()) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ // 3、 获取接口返回文件ID
|
|
|
+ JSONObject document = JSONUtil.parseObj(response.body()).get("document", JSONObject.class);
|
|
|
+ String difyId = document.getStr("id");
|
|
|
+ // 4、 保存至数据库
|
|
|
+ add.setDifyId(difyId);
|
|
|
+ // 获取知识库信息
|
|
|
+ SmsbDifyDatasetsVo datasetsVo = smsbDifyDatasetsMapper.selectVoByDifyId(add.getDatasetsDifyId());
|
|
|
+ add.setDatasetsId(datasetsVo.getId());
|
|
|
+ add.setDatasetsName(datasetsVo.getName());
|
|
|
+ add.setName(sysOssVo.getOriginalName());
|
|
|
+ add.setIndexingStatus(document.getStr("indexing_status"));
|
|
|
+ add.setPosition(document.getInt("position"));
|
|
|
+ add.setTokens(document.getInt("tokens"));
|
|
|
+ add.setWordCount(document.getInt("word_count"));
|
|
|
+ add.setFileUrl(sysOssVo.getUrl());
|
|
|
+ add.setFilePath(tempLocalPath);
|
|
|
+ baseMapper.insert(add);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ private String createAddFileRequestBody() {
|
|
|
+ // 构建 JSON 格式的 data 参数
|
|
|
+ JSONObject dataJson = new JSONObject();
|
|
|
+ // 1-索引方式
|
|
|
+ dataJson.put("indexing_technique", "high_quality");
|
|
|
+
|
|
|
+ // 2-处理规则
|
|
|
+ JSONObject processRule = new JSONObject();
|
|
|
+ // 2-1 清洗、分段模式
|
|
|
+ processRule.put("mode", "custom");
|
|
|
+ // 2-2 自定义规则
|
|
|
+ JSONObject rules = new JSONObject();
|
|
|
+ // 2-2-1 预处理规则
|
|
|
+ JSONArray preProcessingRules = new JSONArray();
|
|
|
+ JSONObject rule1 = new JSONObject();
|
|
|
+ rule1.put("id", "remove_extra_spaces");
|
|
|
+ rule1.put("enabled", true);
|
|
|
+ preProcessingRules.add(rule1);
|
|
|
+ JSONObject rule2 = new JSONObject();
|
|
|
+ rule2.put("id", "remove_urls_emails");
|
|
|
+ rule2.put("enabled", true);
|
|
|
+ preProcessingRules.add(rule2);
|
|
|
+ // 2-2-1 分段规则
|
|
|
+ JSONObject segmentation = new JSONObject();
|
|
|
+ segmentation.put("separator", "###");
|
|
|
+ segmentation.put("max_tokens", 800);
|
|
|
+ rules.put("pre_processing_rules", preProcessingRules);
|
|
|
+ rules.put("segmentation", segmentation);
|
|
|
+ processRule.put("rules", rules);
|
|
|
+
|
|
|
+ dataJson.put("process_rule", processRule);
|
|
|
+ return dataJson.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ private String saveToLocal(SysOssVo sysOssVo) throws IOException {
|
|
|
+ String originalName = sysOssVo.getOriginalName();
|
|
|
+ // 创建一个临时路径
|
|
|
+ String localPath = tempDir + "/" + System.currentTimeMillis();
|
|
|
+ Path path = Paths.get(localPath);
|
|
|
+ Files.createDirectories(path);
|
|
|
+ // 将文件保存到临时路径
|
|
|
+ String filePath = localPath + "/" + originalName;
|
|
|
+ String fileUrl = sysOssVo.getUrl();
|
|
|
+ URL url = new URL(fileUrl);
|
|
|
+ InputStream in = url.openStream();
|
|
|
+ Path tempFilePath = Paths.get(filePath);
|
|
|
+ Files.copy(in, tempFilePath, StandardCopyOption.REPLACE_EXISTING);
|
|
|
+ return filePath;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 修改Dify知识库文件
|
|
|
+ *
|
|
|
+ * @param bo Dify知识库文件
|
|
|
+ * @return 是否修改成功
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public Boolean updateByBo(SmsbDifyDatasetsFileBo bo) {
|
|
|
+ SmsbDifyDatasetsFile update = MapstructUtils.convert(bo, SmsbDifyDatasetsFile.class);
|
|
|
+ return baseMapper.updateById(update) > 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 校验并批量删除Dify知识库文件信息
|
|
|
+ *
|
|
|
+ * @param ids 待删除的主键集合
|
|
|
+ * @param isValid 是否进行有效性校验
|
|
|
+ * @return 是否删除成功
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
|
|
|
+ return baseMapper.deleteByIds(ids) > 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 同步知识库文件列表
|
|
|
+ *
|
|
|
+ * @param datasetsDifyId
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ public boolean syncFileList(String datasetsDifyId) {
|
|
|
+ String requestUrl = difyUrl + API_DATASETS_COMMON + "/" + datasetsDifyId + "/documents?page=1&limit=100";
|
|
|
+ HttpRequest request = HttpRequest.get(requestUrl)
|
|
|
+ .header("Authorization", "Bearer " + datasetsApiKey);
|
|
|
+ HttpResponse response = request.execute();
|
|
|
+ if (!response.isOk()) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ String responseBody = response.body();
|
|
|
+ JSONObject jsonObject = JSONUtil.parseObj(responseBody);
|
|
|
+ // 1 如果结果为0 直接返回
|
|
|
+ Integer total = jsonObject.getInt("total");
|
|
|
+ if (total == 0) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ // 知识库信息
|
|
|
+ SmsbDifyDatasetsVo smsbDifyDatasetsVo = smsbDifyDatasetsMapper.selectVoByDifyId(datasetsDifyId);
|
|
|
+
|
|
|
+ // data中取出知识库列表
|
|
|
+ List<DifyDatasetsFileRspData> datasetsFiles = jsonObject.getJSONArray("data").toList(DifyDatasetsFileRspData.class);
|
|
|
+ // 获取数据库中的所有文件
|
|
|
+ List<SmsbDifyDatasetsFileVo> datasetsFileVoList = baseMapper.selectVoList(new LambdaQueryWrapper<SmsbDifyDatasetsFile>()
|
|
|
+ .eq(SmsbDifyDatasetsFile::getDatasetsDifyId, datasetsDifyId));
|
|
|
+ List<SmsbDifyDatasetsFile> insertList = new ArrayList<>();
|
|
|
+ // 数据库为空 则全部新增
|
|
|
+ if (CollectionUtils.isEmpty(datasetsFileVoList)) {
|
|
|
+ for (DifyDatasetsFileRspData datasetFile : datasetsFiles) {
|
|
|
+ SmsbDifyDatasetsFile oneInsert = buildDatasetFileByDifyData(datasetFile, smsbDifyDatasetsVo);
|
|
|
+ insertList.add(oneInsert);
|
|
|
+ }
|
|
|
+ baseMapper.insertBatch(insertList);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ List<String> dbDifyIds = datasetsFileVoList.stream().map(SmsbDifyDatasetsFileVo::getDifyId).toList();
|
|
|
+ // 数据库不为空 则遍历数据库中的知识库文件
|
|
|
+ for (DifyDatasetsFileRspData datasetFile : datasetsFiles) {
|
|
|
+ // 数据库中不包含这个id 则进行新增
|
|
|
+ if (!dbDifyIds.contains(datasetFile.getId())) {
|
|
|
+ SmsbDifyDatasetsFile oneInsert = buildDatasetFileByDifyData(datasetFile, smsbDifyDatasetsVo);
|
|
|
+ insertList.add(oneInsert);
|
|
|
+ } else {
|
|
|
+ // 数据库中包含这个id 则进行更新
|
|
|
+ SmsbDifyDatasetsFile oneUpdate = buildDatasetFileByDifyData(datasetFile, smsbDifyDatasetsVo);
|
|
|
+ oneUpdate.setId(datasetsFileVoList.stream().filter(vo -> vo.getDifyId().equals(datasetFile.getId())).findFirst().get().getId());
|
|
|
+ baseMapper.updateById(oneUpdate);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ baseMapper.insertBatch(insertList);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
+ private SmsbDifyDatasetsFile buildDatasetFileByDifyData(DifyDatasetsFileRspData datasetFile, SmsbDifyDatasetsVo smsbDifyDatasetsVo) {
|
|
|
+ SmsbDifyDatasetsFile oneInsert = new SmsbDifyDatasetsFile();
|
|
|
+ oneInsert.setDatasetsName(smsbDifyDatasetsVo.getName());
|
|
|
+ oneInsert.setDatasetsId(smsbDifyDatasetsVo.getId());
|
|
|
+ oneInsert.setDatasetsDifyId(smsbDifyDatasetsVo.getDifyId());
|
|
|
+ oneInsert.setDifyId(datasetFile.getId());
|
|
|
+ oneInsert.setName(datasetFile.getName());
|
|
|
+ oneInsert.setIndexingStatus(datasetFile.getIndexing_status());
|
|
|
+ oneInsert.setPosition(datasetFile.getPosition());
|
|
|
+ oneInsert.setTokens(datasetFile.getTokens());
|
|
|
+ oneInsert.setWordCount(datasetFile.getWord_count());
|
|
|
+ return oneInsert;
|
|
|
+ }
|
|
|
+}
|