zero 10 months ago
parent
commit
aede1b1a4b

+ 0 - 4
src/main/java/cn/cslg/wdc/entity/DiscrepancyDetail.java

@@ -8,7 +8,6 @@ import lombok.Data;
 @TableName("discrepancy_detail")
 public class DiscrepancyDetail extends BaseEntity<DiscrepancyDetail>{
 
-    //案件id
     @TableField(value = "discrepancy_id")
     private Integer discrepancyId;
 
@@ -24,9 +23,6 @@ public class DiscrepancyDetail extends BaseEntity<DiscrepancyDetail>{
     @TableField(value = "del_world_count")
     private Integer delWorldCount;
 
-    @TableField(value = "diff_degree")
-    private String diffDegree;
-
     @TableField(value = "diff_type")
     private Integer diffType;
 

+ 447 - 73
src/main/java/cn/cslg/wdc/service/CaseFileService.java

@@ -6,10 +6,17 @@ import cn.cslg.wdc.common.utils.ReadExcelUtils;
 import cn.cslg.wdc.dto.common.GetBaseInfoByWDDTO;
 import cn.cslg.wdc.dto.common.GetCaseInfoByWDDTO;
 import cn.cslg.wdc.dto.common.GetPatentActionByWDDTO;
+import cn.cslg.wdc.dto.common.SectionDiffCommandVisitor;
 import cn.cslg.wdc.entity.AssoCaseFile;
 import cn.cslg.wdc.entity.CaseFile;
+import cn.cslg.wdc.entity.Discrepancy;
+import cn.cslg.wdc.entity.DiscrepancyDetail;
 import cn.cslg.wdc.entity.common.PatentData;
+import cn.cslg.wdc.mapper.AssoCaseFileMapper;
 import cn.cslg.wdc.mapper.CaseFileMapper;
+import cn.cslg.wdc.mapper.DiscrepancyDetailMapper;
+import cn.cslg.wdc.mapper.DiscrepancyMapper;
+import cn.cslg.wdc.service.common.CosineSimilarityService;
 import cn.cslg.wdc.service.common.FileManagerService;
 import cn.cslg.wdc.service.common.GetReportInfoFromWDService;
 import cn.cslg.wdc.vo.common.GetBaseInfoByWDVO;
@@ -19,6 +26,7 @@ import cn.hutool.core.io.FileUtil;
 import cn.hutool.core.util.IdUtil;
 import cn.hutool.core.util.ZipUtil;
 import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson2.JSON;
 import com.alibaba.fastjson2.JSONObject;
 import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
 import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
@@ -27,7 +35,18 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.compress.utils.IOUtils;
 import org.apache.commons.lang3.ObjectUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.diff.EditScript;
+import org.apache.commons.text.diff.StringsComparator;
+import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.select.Elements;
 import org.openqa.selenium.*;
 import org.openqa.selenium.chrome.ChromeDriver;
 import org.openqa.selenium.chrome.ChromeOptions;
@@ -36,10 +55,13 @@ import org.openqa.selenium.support.ui.WebDriverWait;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.annotation.Lazy;
+import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Service;
 import org.springframework.util.CollectionUtils;
 
 import java.io.*;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.nio.charset.Charset;
 import java.nio.file.*;
 import java.nio.file.attribute.BasicFileAttributes;
@@ -73,15 +95,32 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
     private CaseFileMapper caseFileMapper;
 
     @Autowired
+    private AssoCaseFileMapper assoCaseFileMapper;
+
+    @Autowired
     private FileManagerService fileManagerService;
 
+    @Autowired
+    private CosineSimilarityService cosineSimilarityService;
+
+    @Autowired
+    private DiscrepancyMapper discrepancyMapper;
+
+    @Autowired
+    private DiscrepancyDetailMapper discrepancyDetailMapper;
+
     /**
      * 根据新申请类型,状态为递交中条件去查询当前时间的案件
+     * 获取案件信息后进行保存
      * 每日定时01:00启动方法
+     *
      * @return
      */
-//    @Scheduled(cron = "*****")
-    public void getCaseInfoByHanding() throws Exception{
+//    @Scheduled(cron = "0 0 1 * * ?")
+    public void getCaseInfoByHanding() throws Exception {
+        //重试机制
+        int retry = 0;
+        int retrySum = 3;
         //初始化谷歌浏览器
         ChromeOptions options = new ChromeOptions();
         options.addArguments("user-data-dir=" + strFileSavePath);
@@ -204,8 +243,10 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
                 return null;
             });
         } catch (Exception e) {
-            e.printStackTrace();
-            throw new RuntimeException("An error occurred while retrieving case info", e);
+            retry += 1;
+            if (retry < retrySum) {
+                this.getCaseInfoByFinish();
+            }
         } finally {
             if (driver != null) {
                 driver.quit();
@@ -215,18 +256,27 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
             try {
                 Thread.sleep(1000);
             } catch (InterruptedException e) {
-                e.printStackTrace();
+                retry += 1;
+                if (retry < retrySum) {
+                    this.getCaseInfoByFinish();
+                }
             }
             this.getCaseFile(caseNo);
+            this.addDiscrepancy(caseNo);
+
         }
     }
 
     /**
      * 根据新申请类型,状态为递交中条件去查询当前时间的案件
      * 每日定时02:00启动方法
+     *
      * @return
      */
-    public void getCaseInfoByFinish() {
+//    @Scheduled(cron = "0 0 3 * * ?")
+    public void getCaseInfoByFinish() throws Exception {
+        int retry = 0;
+        int retrySum = 3;
         //初始化谷歌浏览器
         ChromeOptions options = new ChromeOptions();
         options.addArguments("user-data-dir=" + strFileSavePath);
@@ -364,8 +414,10 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
                 return null;
             });
         } catch (Exception e) {
-            e.printStackTrace();
-            throw new RuntimeException("An error occurred while retrieving case info", e);
+            retry += 1;
+            if (retry < retrySum) {
+                this.getCaseInfoByFinish();
+            }
         } finally {
             if (driver != null) {
                 driver.quit();
@@ -375,14 +427,19 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
             try {
                 Thread.sleep(1000);
             } catch (InterruptedException e) {
-                e.printStackTrace();
-            }
+                retry += 1;
+                if (retry < retrySum) {
+                    this.getCaseInfoByFinish();
+                }
+        }
             this.getCaseFile(caseNo);
+            this.addDiscrepancy(caseNo);
         }
     }
 
     /**
      * 获取案件信息并保存
+     *
      * @param cookie
      * @param uuid
      * @param number
@@ -390,7 +447,7 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
      * @throws IOException
      */
     public List<String> getCaseInfo(String cookie, String uuid, String number) throws Exception {
-        byte[] bytes = getReportInfoFromWDService.exportFileFromWD(cookie,uuid,number);
+        byte[] bytes = getReportInfoFromWDService.exportFileFromWD(cookie, uuid, number);
         //创建临时文件tempFile,并将文件读取到tempFile
         File tempFile = File.createTempFile(IdUtil.simpleUUID() + "temp", ".xlsx");
         try (
@@ -443,6 +500,11 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
         return list;
     }
 
+    /**
+     * 根据案号获取该案件的文件信息
+     * 初稿、内部定稿、定稿分别进行保存到文件关联表中
+     * @param caseNo
+     */
     public void getCaseFile(String caseNo) {
         ChromeOptions options = new ChromeOptions();
         options.addArguments("user-data-dir=" + strFileSavePath);
@@ -550,7 +612,7 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
             JSONObject jsonObject1 = JSONObject.parseObject(caseInfo);
             List<GetCaseInfoByWDVO> caseInfoRows = JSONArray.parseArray(jsonObject1.getString("TableRows"), GetCaseInfoByWDVO.class);
             if (!CollectionUtils.isEmpty(caseInfoRows)) {
-                GetCaseInfoByWDVO caseInfoByWDVO = caseInfoRows.stream().filter(i -> i.getCtrl_proc().equals("新申请")).findFirst().orElse(null);
+                GetCaseInfoByWDVO caseInfoByWDVO = caseInfoRows.stream().filter(i -> i.getCtrl_proc().equals("新申请")).findFirst().orElse(new GetCaseInfoByWDVO());
                 if (ObjectUtils.isNotEmpty(caseInfoByWDVO)) {
                     String procId = caseInfoByWDVO.getProc_id();
                     GetPatentActionByWDDTO vo = new GetPatentActionByWDDTO();
@@ -575,100 +637,124 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
         }
     }
 
-    public void getCaseFile(String result,String cookieStr,String caseNo) throws Exception {
+    /**
+     * 精确区分出初稿、内部定稿、定稿文件
+     * @param result
+     * @param cookieStr
+     * @param caseNo
+     * @throws Exception
+     */
+    public void getCaseFile(String result, String cookieStr, String caseNo) throws Exception {
         JSONObject jsonObject = JSONObject.parseObject(result);
-        List<TableRowsVO> rowsVOS = JSONArray.parseArray(jsonObject.getString("TableRows"), TableRowsVO.class);
+        List<TableRowsVO> rowsVOS = JSON.parseArray(jsonObject.getString("TableRows"), TableRowsVO.class);
         if (!CollectionUtils.isEmpty(rowsVOS)) {
-            TableRowsVO rowsVO = rowsVOS.stream().filter(i -> i.getFile_desc().equals("新申请第一次内审")).findFirst().orElse(null);
+            TableRowsVO rowsVO = rowsVOS.stream().filter(i -> i.getFile_desc().equals("新申请第一次内审")).findFirst().orElse(new TableRowsVO());
             if (ObjectUtils.isNotEmpty(rowsVO)) {
                 GetBaseInfoByWDDTO getBaseInfoByWDDTO = new GetBaseInfoByWDDTO();
                 getBaseInfoByWDDTO.setFile_no(rowsVO.getFile_no());
                 getBaseInfoByWDDTO.setFile_name(rowsVO.getFile_name());
                 String res = getReportInfoFromWDService.getBaseInfoByWD(getBaseInfoByWDDTO, cookieStr);
                 GetBaseInfoByWDVO baseInfoByWDVO = JSONObject.parseObject(res, GetBaseInfoByWDVO.class);
-                System.out.println(baseInfoByWDVO);
                 this.uploadCaseFile(cookieStr, baseInfoByWDVO.getFile_guid(), baseInfoByWDVO.getFile_size(),
-                        rowsVO.getFile_name(),caseNo,1);
+                        rowsVO.getFile_name(), caseNo, 1);
             }
-            TableRowsVO rowsVO1 = rowsVOS.stream().filter(i -> i.getFile_desc().equals("新申请第一次返稿")).findFirst().orElse(null);
+            TableRowsVO rowsVO1 = rowsVOS.stream().filter(i -> i.getFile_desc().equals("新申请第一次返稿")).findFirst().orElse(new TableRowsVO());
             if (ObjectUtils.isNotEmpty(rowsVO1)) {
                 GetBaseInfoByWDDTO getBaseInfoByWDDTO = new GetBaseInfoByWDDTO();
                 getBaseInfoByWDDTO.setFile_no(rowsVO1.getFile_no());
                 getBaseInfoByWDDTO.setFile_name(rowsVO1.getFile_name());
                 String res = getReportInfoFromWDService.getBaseInfoByWD(getBaseInfoByWDDTO, cookieStr);
                 GetBaseInfoByWDVO baseInfoByWDVO = JSONObject.parseObject(res, GetBaseInfoByWDVO.class);
-                System.out.println(baseInfoByWDVO);
                 this.uploadCaseFile(cookieStr, baseInfoByWDVO.getFile_guid(), baseInfoByWDVO.getFile_size(),
-                        rowsVO1.getFile_name(),caseNo,2);
+                        rowsVO1.getFile_name(), caseNo, 2);
             }
-            TableRowsVO rowsVO2 = rowsVOS.stream().filter(i -> i.getFile_desc().equals("新申请文档")).findFirst().orElse(null);
+            TableRowsVO rowsVO2 = rowsVOS.stream().filter(i -> i.getFile_desc().equals("新申请文档")).findFirst().orElse(new TableRowsVO());
             if (ObjectUtils.isNotEmpty(rowsVO2)) {
                 GetBaseInfoByWDDTO getBaseInfoByWDDTO = new GetBaseInfoByWDDTO();
                 getBaseInfoByWDDTO.setFile_no(rowsVO2.getFile_no());
                 getBaseInfoByWDDTO.setFile_name(rowsVO2.getFile_name());
                 String res = getReportInfoFromWDService.getBaseInfoByWD(getBaseInfoByWDDTO, cookieStr);
                 GetBaseInfoByWDVO baseInfoByWDVO = JSONObject.parseObject(res, GetBaseInfoByWDVO.class);
-                System.out.println(baseInfoByWDVO);
                 this.uploadCaseFile(cookieStr, baseInfoByWDVO.getFile_guid(), baseInfoByWDVO.getFile_size(),
-                        rowsVO2.getFile_name(),caseNo,3);
+                        rowsVO2.getFile_name(), caseNo, 3);
             }
         }
     }
 
-    public void uploadCaseFile(String cookie, String guid, String number,String fileName,String caseNo,Integer type) throws Exception {
-        byte[] bytes = getReportInfoFromWDService.exportFileFromWD(cookie, guid, number);//创建临时文件tempFile,并将文件读取到tempFile
-        File tempFile = null;
-        if (fileName.endsWith(".docx")) {
-            tempFile = File.createTempFile("temp_", ".docx");
-        } else if (fileName.endsWith(".zip")) {
-            tempFile = File.createTempFile("temp_", ".zip");
-        }
-        List<String> ids = new ArrayList<>();
-        try (
-                InputStream inputStream = new ByteArrayInputStream(bytes);
-                FileOutputStream outputStream = new FileOutputStream(tempFile)
-        ) {
-            IOUtils.copy(inputStream, outputStream); // 将输入流复制到临时文件
-            if (fileName.endsWith(".docx")) {
-                ids = fileManagerService.uploadFileGetGuid2(Arrays.asList(tempFile));
-            } else if (fileName.endsWith(".zip")) {
-                String tempDirectoryName = IdUtil.simpleUUID();
-                String tempPath = null;
-                try {
-                    tempPath = FileUtils.getSystemPath2(tempDirectoryName);
-                } catch (Exception e) {
-                    tempPath = "F:\\file\\"  + tempDirectoryName;
+    /**
+     * 将文件直接保存
+     *
+     * @param cookie
+     * @param guid
+     * @param number
+     * @param fileName
+     * @param caseNo
+     * @param type
+     * @throws Exception
+     */
+    public void uploadCaseFile(String cookie, String guid, String number, String fileName, String caseNo, Integer type) throws Exception {
+        CaseFile caseFile = caseFileMapper.selectOne(new LambdaQueryWrapper<CaseFile>()
+                .eq(CaseFile::getCaseNo, caseNo));
+        if (ObjectUtils.isNotEmpty(caseFile)) {
+            AssoCaseFile assoCaseFile1 = assoCaseFileMapper.selectOne(new LambdaQueryWrapper<AssoCaseFile>()
+                    .eq(AssoCaseFile::getCaseId, caseFile.getId())
+                    .eq(AssoCaseFile::getFileType, type));
+            if (ObjectUtils.isEmpty(assoCaseFile1)) {
+                byte[] bytes = getReportInfoFromWDService.exportFileFromWD(cookie, guid, number);//创建临时文件tempFile,并将文件读取到tempFile
+                File tempFile = null;
+                if (fileName.endsWith(".docx")) {
+                    tempFile = File.createTempFile("temp_", ".docx");
+                } else if (fileName.endsWith(".doc")) {
+                    tempFile = File.createTempFile("temp_", ".doc");
+                } else if (fileName.endsWith(".zip")) {
+                    tempFile = File.createTempFile("temp_", ".zip");
                 }
-                File tempDirectory = new File(tempPath);
-                if (!tempDirectory.exists()) {
-                    tempDirectory.mkdir();
+                List<String> ids = new ArrayList<>();
+                try (
+                        InputStream inputStream = new ByteArrayInputStream(bytes);
+                        FileOutputStream outputStream = new FileOutputStream(tempFile)
+                ) {
+                    IOUtils.copy(inputStream, outputStream); // 将输入流复制到临时文件
+                    if (fileName.endsWith(".docx") || fileName.equals(".doc")) {
+                        ids = fileManagerService.uploadFileGetGuid2(Collections.singletonList(tempFile));
+                    } else if (fileName.endsWith(".zip")) {
+                        String tempDirectoryName = IdUtil.simpleUUID();
+                        String tempPath = null;
+                        try {
+                            tempPath = FileUtils.getSystemPath2(tempDirectoryName);
+                        } catch (Exception e) {
+                            //todo
+                            tempPath = "F:\\file\\" + tempDirectoryName;
+                        }
+                        File tempDirectory = new File(tempPath);
+                        if (!tempDirectory.exists()) {
+                            tempDirectory.mkdir();
+                        }
+                        ZipFile zipFile = new ZipFile(tempFile, Charset.forName("GBK"));
+                        ZipUtil.unzip(zipFile, tempDirectory);
+                        List<File> fileList = FileUtil.loopFiles(tempPath).stream().filter(item -> item.getName().endsWith(".docx") || item.getName().endsWith(".doc")).collect(Collectors.toList());
+                        if (!CollectionUtils.isEmpty(fileList)) {
+                            File file = fileList.get(0);
+                            fileName = file.getName();
+                            ids = fileManagerService.uploadFileGetGuid2(Collections.singletonList(file));
+                        }
+                        this.deleteDirectory(Paths.get(tempPath));
+                    }
+                } finally {
+                    tempFile.delete();
                 }
-                ZipFile zipFile =new ZipFile(tempFile, Charset.forName("GBK"));
-                ZipUtil.unzip(zipFile, tempDirectory);
-                List<File> fileList = FileUtil.loopFiles(tempPath).stream().filter(item -> item.getName().endsWith(".docx")).collect(Collectors.toList());
-                if (!CollectionUtils.isEmpty(fileList)) {
-                    File file = fileList.get(0);
-                    ids = fileManagerService.uploadFileGetGuid2(Arrays.asList(file));
+                //添加案件文件关联表
+                if (!CollectionUtils.isEmpty(ids)) {
+                    Integer caseFileId = caseFile.getId();
+                    String fileGuid = ids.get(0);
+                    AssoCaseFile assoCaseFile = new AssoCaseFile();
+                    assoCaseFile.setCaseId(caseFileId);
+                    assoCaseFile.setFileName(fileName);
+                    assoCaseFile.setFileType(type);
+                    assoCaseFile.setFileGuid(fileGuid);
+                    assoCaseFile.insert();
                 }
-                this.deleteDirectory(Paths.get(tempPath));
             }
-        }finally {
-            tempFile.delete();
-        }
-
-        CaseFile caseFile = caseFileMapper.selectOne(new LambdaQueryWrapper<CaseFile>()
-                .eq(CaseFile::getCaseNo, caseNo));
-        //添加案件文件关联表
-        if (ObjectUtils.isNotEmpty(caseFile) && !CollectionUtils.isEmpty(ids)) {
-            Integer caseFileId = caseFile.getId();
-            String fileGuid = ids.get(0);
-            String nameStr = fileName.substring(0, fileName.lastIndexOf("."));
-            AssoCaseFile assoCaseFile = new AssoCaseFile();
-            assoCaseFile.setCaseId(caseFileId);
-            assoCaseFile.setFileName(nameStr);
-            assoCaseFile.setFileType(type);
-            assoCaseFile.setFileGuid(fileGuid);
-            assoCaseFile.insert();
         }
     }
 
@@ -688,4 +774,292 @@ public class CaseFileService extends ServiceImpl<CaseFileMapper, CaseFile> {
             }
         });
     }
+
+
+    /**
+     * 根据案号添加差异基础信息
+     * @param caseNo
+     * @throws Exception
+     */
+    public void addDiscrepancy(String caseNo) throws Exception {
+        CaseFile caseFile = caseFileMapper.selectOne(new LambdaQueryWrapper<CaseFile>()
+                .eq(CaseFile::getCaseNo, caseNo));
+        if (ObjectUtils.isNotEmpty(caseFile)) {
+            List<AssoCaseFile> assoCaseFiles = assoCaseFileMapper.selectList(new LambdaQueryWrapper<AssoCaseFile>()
+                    .eq(AssoCaseFile::getCaseId, caseFile.getId()));
+            AssoCaseFile assoCaseFile1 = assoCaseFiles.stream().filter(i -> i.getFileType() == 1).findFirst().orElse(new AssoCaseFile());
+            AssoCaseFile assoCaseFile2 = assoCaseFiles.stream().filter(i -> i.getFileType() == 2).findFirst().orElse(new AssoCaseFile());
+            AssoCaseFile assoCaseFile3 = assoCaseFiles.stream().filter(i -> i.getFileType() == 3).findFirst().orElse(new AssoCaseFile());
+            if (ObjectUtils.isNotEmpty(assoCaseFile1) && ObjectUtils.isNotEmpty(assoCaseFile2)) {
+                Discrepancy discrepancy1 = discrepancyMapper.selectOne(new LambdaQueryWrapper<Discrepancy>()
+                        .eq(Discrepancy::getCaseFileId1, assoCaseFile1.getId())
+                        .eq(Discrepancy::getCaseFileId2, assoCaseFile2.getId()));
+                if (ObjectUtils.isEmpty(discrepancy1)) {
+                    Discrepancy discrepancy = new Discrepancy();
+                    discrepancy.setCaseId(caseFile.getId());
+                    discrepancy.setCaseFileId1(assoCaseFile1.getId());
+                    discrepancy.setCaseFileId2(assoCaseFile2.getId());
+                    discrepancy.setDiscrepancyType(1);
+                    discrepancy.insert();
+
+                    this.getFile(assoCaseFile1.getFileGuid(), assoCaseFile2.getFileGuid(), discrepancy.getId(), assoCaseFile1.getFileName(), assoCaseFile2.getFileName());
+                }
+            }
+            if (ObjectUtils.isNotEmpty(assoCaseFile2) && ObjectUtils.isNotEmpty(assoCaseFile3)) {
+                Discrepancy discrepancy2 = discrepancyMapper.selectOne(new LambdaQueryWrapper<Discrepancy>()
+                        .eq(Discrepancy::getCaseFileId1, assoCaseFile2.getId())
+                        .eq(Discrepancy::getCaseFileId2, assoCaseFile3.getId()));
+                if (ObjectUtils.isEmpty(discrepancy2)) {
+                    Discrepancy discrepancy = new Discrepancy();
+                    discrepancy.setCaseId(caseFile.getId());
+                    discrepancy.setCaseFileId1(assoCaseFile2.getId());
+                    discrepancy.setCaseFileId2(assoCaseFile3.getId());
+                    discrepancy.setDiscrepancyType(2);
+                    discrepancy.insert();
+
+                    this.getFile(assoCaseFile2.getFileGuid(), assoCaseFile3.getFileGuid(), discrepancy.getId(), assoCaseFile2.getFileName(), assoCaseFile3.getFileName());
+                }
+            }
+        }
+
+    }
+
+    /**
+     * 保存差异详情信息
+     * @param guid1
+     * @param guid2
+     * @param discrepancyId
+     * @param fileName1
+     * @param fileName2
+     * @throws IOException
+     */
+    public void getFile(String guid1, String guid2, Integer discrepancyId, String fileName1, String fileName2) throws IOException {
+        byte[] bytes = fileManagerService.downloadSystemFileFromFMS(guid1);
+        File tempFile = null;
+        if (fileName1.endsWith(".docx")) {
+            tempFile = File.createTempFile("temp1_", ".docx");
+        } else if (fileName1.endsWith(".doc")) {
+            tempFile = File.createTempFile("temp1_", ".doc");
+        }
+        try (
+                InputStream inputStream = new ByteArrayInputStream(bytes);
+                FileOutputStream outputStream = new FileOutputStream(tempFile)
+        ) {
+            IOUtils.copy(inputStream, outputStream);
+        }
+        byte[] bytes1 = fileManagerService.downloadSystemFileFromFMS(guid2);
+        File tempFile1 = null;
+        if (fileName2.endsWith(".docx")) {
+            tempFile1 = File.createTempFile("temp2_", ".docx");
+        } else if (fileName2.endsWith(".doc")) {
+            tempFile1 = File.createTempFile("temp2_", ".doc");
+        }
+        try (
+                InputStream inputStream = new ByteArrayInputStream(bytes1);
+                FileOutputStream outputStream = new FileOutputStream(tempFile1)
+        ) {
+            IOUtils.copy(inputStream, outputStream);
+        }
+        try {
+            List<String> list = new ArrayList<>();
+            List<String> list1 = new ArrayList<>();
+            if (fileName1.endsWith(".docx")) {
+                XWPFDocument document = new XWPFDocument(new FileInputStream(tempFile));
+                List<XWPFParagraph> paragraphs = document.getParagraphs();
+                for (XWPFParagraph paragraph : paragraphs) {
+                    final String s = paragraph.getText().trim();
+                    if (StringUtils.isNotEmpty(s)) {
+                        list.add(s);
+                    }
+                }
+//                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
+//                text = extractor.getText();
+                document.close();
+            } else if (fileName1.endsWith(".doc")) {
+                InputStream inputStream = new FileInputStream(tempFile);
+                WordExtractor wordExtractor = new WordExtractor(inputStream);
+                String[] paragraphText = wordExtractor.getParagraphText();
+                for (String s : paragraphText) {
+                    String s1 = s.trim();
+                    if (StringUtils.isNotEmpty(s1)) {
+                        list.add(s1);
+                    }
+                }
+                wordExtractor.close();
+                inputStream.close();
+            }
+            if (fileName2.endsWith(".docx")) {
+                XWPFDocument document1 = new XWPFDocument(new FileInputStream(tempFile1));
+                List<XWPFParagraph> paragraphs = document1.getParagraphs();
+                for (XWPFParagraph paragraph : paragraphs) {
+                    String s = paragraph.getText().trim();
+                    if (StringUtils.isNotEmpty(s)) {
+                        list1.add(s);
+                    }
+                }
+//                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
+//                text1 = extractor1.getText();
+                // 关闭流
+                document1.close();
+            } else if (fileName2.endsWith(".doc")) {
+                InputStream inputStream = new FileInputStream(tempFile1);
+                WordExtractor wordExtractor = new WordExtractor(inputStream);
+                String[] paragraphText = wordExtractor.getParagraphText();
+                for (String s : paragraphText) {
+                    String s1 = s.trim();
+                    if (StringUtils.isNotEmpty(s1)) {
+                        list1.add(s1);
+                    }
+                }
+//                list1 = Arrays.asList(paragraphText);
+//                text1 = wordExtractor.getText();
+                wordExtractor.close();
+                inputStream.close();
+            }
+            String text = StringUtils.join(list, "\n");
+            String text1 = StringUtils.join(list1, "\n");
+            Map<String, String> firstMap = this.getText(list);
+            Map<String, String> secondMap = this.getText(list1);
+            compareText(text, text1, 1, discrepancyId);
+            compareText(firstMap.get("abstract"), secondMap.get("abstract"), 2, discrepancyId);
+            compareText(firstMap.get("claims"), secondMap.get("claims"), 3, discrepancyId);
+            compareText(firstMap.get("description"), secondMap.get("description"), 4, discrepancyId);
+        } catch (Exception e) {
+            e.printStackTrace();
+        } finally {
+            tempFile.delete();
+            tempFile1.delete();
+        }
+    }
+
+    /**
+     * 根据文件内容中标题格式取摘要、权要、说明书
+     * @param list
+     * @return
+     */
+    public Map<String, String> getText(List<String> list) {
+        List<String> abstractSection = new ArrayList<>();
+        List<String> claimsSection = new ArrayList<>();
+        List<String> descriptionSection = new ArrayList<>();
+        int abstractIndex = list.indexOf("说   明   书   摘   要");
+        int claimsIndex = list.indexOf("权   利   要   求   书");
+        int descriptionIndex = list.indexOf("说    明    书");
+        if (abstractIndex >= 0) {
+            int end = (claimsIndex > 0 && claimsIndex > abstractIndex) ? claimsIndex : list.size();
+            abstractSection = list.subList(abstractIndex, end);
+        }
+        if (claimsIndex >= 0) {
+            int end = (descriptionIndex > 0 && descriptionIndex > claimsIndex) ? descriptionIndex : list.size();
+            claimsSection = list.subList(claimsIndex, end);
+        }
+        if (descriptionIndex >= 0) {
+            descriptionSection = list.subList(descriptionIndex, list.size());
+        }
+        String abstractStr = StringUtils.join(abstractSection, "\n");
+        String claimsStr = StringUtils.join(claimsSection, "\n");
+        String descriptionStr = StringUtils.join(descriptionSection, "\n");
+        Map<String, String> map = new HashMap<>();
+        map.put("abstract", abstractStr);
+        map.put("claims", claimsStr);
+        map.put("description", descriptionStr);
+        return map;
+    }
+
+    /**
+     * 将文本内容进行对比
+     * 将对比后的信息进行保存数据库中
+     *
+     * @param text
+     * @param text1
+     * @param type
+     * @param discrepancyId
+     */
+    public void compareText(String text, String text1, Integer type, Integer discrepancyId) {
+        double similarity = cosineSimilarityService.calculateCosineSimilarity(text, text1);
+        int total = text.length();
+        StringsComparator comparator = new StringsComparator(text, text1);
+        EditScript<Character> script = comparator.getScript();
+        SectionDiffCommandVisitor commandVisitor = new SectionDiffCommandVisitor();
+        script.visit(commandVisitor);
+        commandVisitor.finish();
+        String secondDoc = commandVisitor.getRightTemp().toString();
+        String secondDocReplace = secondDoc.replaceAll("(\\r\\n|\\n)", "<br>");
+        String html = "<p>" + secondDocReplace + "</p>";
+        // 使用Jsoup解析HTML
+        Document doc = Jsoup.parse(html);
+        Elements paragraphs = doc.select("p");
+        String html3 = doc.html();
+
+        // 创建Word文档
+        XWPFDocument wordDocument = new XWPFDocument();
+        // 遍历每个段落
+        for (Element paragraph : paragraphs) {
+            List<String> emList = new ArrayList<>();
+            List<String> delList = new ArrayList<>();
+            // 遍历段落中的每个节点
+            for (Node node : paragraph.childNodes()) {
+                if (node instanceof Element element) {
+                    // 处理HTML元素
+                    if ("em".equalsIgnoreCase(element.tagName())) {
+                        String s = element.text();
+                        if (StringUtils.isNotEmpty(s)) {
+                            emList.add(s);
+                        }
+                    } else if ("del".equalsIgnoreCase(element.tagName())) {
+                        String s = element.text();
+                        if (StringUtils.isNotEmpty(s)) {
+                            delList.add(s);
+                        }
+                    }
+                }
+            }
+            //修改处数
+            int emNum = 0;
+            //修改字数
+            int emSum = 0;
+            //删除处数
+            int delNum = 0;
+            //删除字数
+            int delSum = 0;
+            if (!CollectionUtils.isEmpty(emList)) {
+                emSum = emList.stream().distinct().mapToInt(String::length).sum();
+                emNum = (int) emList.stream().distinct().count();
+            }
+            if (!CollectionUtils.isEmpty(delList)) {
+                delSum = delList.stream().distinct().mapToInt(String::length).sum();
+                delNum = (int) delList.stream().distinct().count();
+            }
+            //修改总处数
+            int editSum = emNum + delNum;
+            int editWordNum = emSum + delSum;
+            BigDecimal sumBig = new BigDecimal(editWordNum);
+            BigDecimal diff = BigDecimal.ZERO;
+            if (total != 0) {
+                BigDecimal totalBig = new BigDecimal(total);
+                diff = sumBig.divide(totalBig, 6, RoundingMode.HALF_UP);
+            } else {
+                diff = new BigDecimal(1);
+                similarity = 0L;
+            }
+            DiscrepancyDetail detail = new DiscrepancyDetail();
+            detail.setDiscrepancyId(discrepancyId);
+            detail.setTotalWorldCount(total);
+            detail.setEditCount(editSum);
+            detail.setEditWorldCount(editWordNum);
+            detail.setDelWorldCount(delSum);
+            detail.setDiffType(type);
+            detail.setDiffContent(html3);
+            detail.setRate(diff.toString());
+            detail.setSimilarity(String.format("%.6f", similarity));
+            detail.insert();
+        }
+
+        // 关闭文档
+        try {
+            wordDocument.close();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
 }

+ 1 - 0
src/main/java/cn/cslg/wdc/service/DiscrepancyService.java

@@ -8,6 +8,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.springframework.context.annotation.Lazy;
 import org.springframework.stereotype.Service;
 
+
 @Slf4j
 @Service
 @RequiredArgsConstructor(onConstructor_ = {@Lazy})

+ 67 - 0
src/main/java/cn/cslg/wdc/service/common/CosineSimilarityService.java

@@ -0,0 +1,67 @@
+package cn.cslg.wdc.service.common;
+
+import com.hankcs.hanlp.HanLP;
+import com.hankcs.hanlp.seg.Segment;
+import com.hankcs.hanlp.seg.common.Term;
+import org.apache.commons.math3.linear.MatrixUtils;
+import org.apache.commons.math3.linear.RealVector;
+import org.springframework.stereotype.Service;
+
+import java.util.*;
+
+@Service
+public class CosineSimilarityService {
+
+    public double calculateCosineSimilarity(String text1, String text2) {
+        // 分词
+        List<String> words1 = tokenize(text1);
+        List<String> words2 = tokenize(text2);
+
+        // 创建词汇表
+        Set<String> vocabulary = new HashSet<>(words1);
+        vocabulary.addAll(words2);
+
+        // 将文本转换为向量
+        RealVector vector1 = convertTextToVector(words1, vocabulary);
+        RealVector vector2 = convertTextToVector(words2, vocabulary);
+
+        // 计算余弦相似度
+        double dotProduct = vector1.dotProduct(vector2);
+        double norm1 = vector1.getNorm();
+        double norm2 = vector2.getNorm();
+        return dotProduct / (norm1 * norm2);
+    }
+
+    //使用hanlp包进行分词
+    private static List<String> tokenize(String text) {
+        List<String> words = new ArrayList<>();
+        Segment segment = HanLP.newSegment().enableCustomDictionary(false).enablePlaceRecognize(true).enableOrganizationRecognize(true);
+        for (Term term : segment.seg(text)) {
+            words.add(term.word); // 获取分词后的单词
+        }
+        return words;
+    }
+
+    private static RealVector convertTextToVector(List<String> words, Set<String> vocabulary) {
+        double[] vector = new double[vocabulary.size()];
+        // 计算词频
+        int index = 0;
+        for (String word : vocabulary) {
+            int count = Collections.frequency(words, word);
+            vector[index++] = count;
+        }
+
+        // 将数组转换为Apache Commons Math的RealVector对象
+        return MatrixUtils.createRealVector(vector);
+    }
+
+
+    public double calculateAverageCSCWV(List<String> referenceTexts, List<String> hypothesisTexts) {
+        double totalRougeL = 0;
+        for (int i = 0; i < referenceTexts.size(); i++) {
+            totalRougeL += calculateCosineSimilarity(referenceTexts.get(i), hypothesisTexts.get(i));
+        }
+
+        return totalRougeL / referenceTexts.size();
+    }
+}

+ 5 - 3
src/test/java/cn/cslg/wdc/GetReportInfoFromWDTest.java

@@ -314,8 +314,8 @@ public class GetReportInfoFromWDTest {
 
     @Test
     public void test1() {
-        Map<String, String> map = getCaseFile("S2418519-测试卷");
-//        Map<String, String> map = getCaseFile("S2435631-测试卷-压缩包");
+//        Map<String, String> map = getCaseFile("S2418519-测试卷");
+        Map<String, String> map = getCaseFile("S2435631-测试卷-压缩包");
 //        Map<String, String> map = dynamicCaseInfo("P");
         System.out.println(map);
     }
@@ -327,7 +327,7 @@ public class GetReportInfoFromWDTest {
         options.addArguments("--disable-popup-blocking");
         options.addArguments("--disable-images");
         options.addArguments("--remote-allow-origins=*");
-//        options.addArguments("--headless");
+        options.addArguments("--headless");
         WebDriver driver = null;
         Map<String, String> retObject = new HashMap<>();
 
@@ -570,6 +570,8 @@ public class GetReportInfoFromWDTest {
                 List<File> fileList = FileUtil.loopFiles(tempPath).stream().filter(item -> item.getName().endsWith(".docx")).collect(Collectors.toList());
                 if (!CollectionUtils.isEmpty(fileList)) {
                     File file = fileList.get(0);
+                    fileName = file.getName();
+                    System.out.println(fileName);
 //                    ids = fileManagerService.uploadFileGetGuid2(Arrays.asList(file));
                 }
                 this.deleteDirectory(Paths.get(tempPath));

+ 421 - 21
src/test/java/cn/cslg/wdc/WdcApplicationTests.java

@@ -1,11 +1,22 @@
 package cn.cslg.wdc;
 
 import cn.cslg.wdc.common.exception.XiaoShiException;
+import cn.cslg.wdc.common.utils.SimilarityUtils;
 import cn.cslg.wdc.dto.common.SectionDiffCommandVisitor;
+import cn.cslg.wdc.entity.AssoCaseFile;
+import cn.cslg.wdc.entity.CaseFile;
 import cn.cslg.wdc.entity.Discrepancy;
+import cn.cslg.wdc.entity.DiscrepancyDetail;
+import cn.cslg.wdc.mapper.AssoCaseFileMapper;
+import cn.cslg.wdc.mapper.CaseFileMapper;
 import cn.cslg.wdc.service.DiscrepancyService;
+import cn.cslg.wdc.service.common.CosineSimilarityService;
+import cn.cslg.wdc.service.common.FileManagerService;
 import cn.cslg.wdc.service.common.GetReportInfoFromWDService;
 import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
+import org.apache.commons.compress.utils.IOUtils;
+import org.apache.commons.lang3.ObjectUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.text.diff.EditScript;
 import org.apache.commons.text.diff.StringsComparator;
 import org.apache.poi.hwpf.extractor.WordExtractor;
@@ -23,17 +34,32 @@ import org.jsoup.select.Elements;
 import org.junit.jupiter.api.Test;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.util.CollectionUtils;
 
 import java.io.*;
 import java.math.BigDecimal;
 import java.math.RoundingMode;
+import java.util.*;
+import java.util.stream.Collectors;
 
 @SpringBootTest
 class WdcApplicationTests {
 
     @Autowired
+    private AssoCaseFileMapper assoCaseFileMapper;
+
+    @Autowired
+    private CaseFileMapper caseFileMapper;
+
+    @Autowired
     private DiscrepancyService discrepancyService;
 
+    @Autowired
+    private CosineSimilarityService cosineSimilarityService;
+
+    @Autowired
+    private FileManagerService fileManagerService;
+
     @Test
     void contextLoads() {
 
@@ -111,6 +137,10 @@ class WdcApplicationTests {
             }
             if (path1.endsWith(".docx")) {
                 XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
+//                List<XWPFParagraph> xwpfParagraphs= document1.getParagraphs();
+//                xwpfParagraphs.forEach(item->{
+//                   System.out.println(item.getText());
+//                });
                 XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
                 text1 = extractor1.getText();
 //                System.out.println(text1);
@@ -199,7 +229,7 @@ class WdcApplicationTests {
                         run.setStrike(true);
                     } else if ("br".equalsIgnoreCase(element.tagName())) {
                         run = wordParagraph.createRun();
-                        run.setText("<br>");
+                        run.addBreak();
                     } else {
                         // 递归处理其他标签(如果有)
                         for (Node childNode : element.childNodes()) {
@@ -234,16 +264,266 @@ class WdcApplicationTests {
         }
     }
 
+    public void addDiff(String caseNo) throws IOException {
+        CaseFile caseFile = caseFileMapper.selectOne(new LambdaQueryWrapper<CaseFile>()
+                .eq(CaseFile::getCaseNo, caseNo));
+        if (ObjectUtils.isNotEmpty(caseFile)) {
+            List<AssoCaseFile> assoCaseFiles = assoCaseFileMapper.selectList(new LambdaQueryWrapper<AssoCaseFile>()
+                    .eq(AssoCaseFile::getCaseId, caseFile.getId()));
+            AssoCaseFile caseFile1 = assoCaseFiles.stream().filter(i -> i.getFileType() == 1).findFirst().orElse(new AssoCaseFile());
+            AssoCaseFile caseFile2 = assoCaseFiles.stream().filter(i -> i.getFileType() == 3).findFirst().orElse(new AssoCaseFile());
+            AssoCaseFile caseFile3 = assoCaseFiles.stream().filter(i -> i.getFileType() == 3).findFirst().orElse(new AssoCaseFile());
+            if (ObjectUtils.isNotEmpty(caseFile1) && ObjectUtils.isNotEmpty(caseFile2)) {
+                Discrepancy discrepancy = new Discrepancy();
+                discrepancy.setCaseId(caseFile.getId());
+                discrepancy.setCaseFileId1(caseFile1.getId());
+                discrepancy.setCaseFileId2(caseFile2.getId());
+                discrepancy.setDiscrepancyType(1);
+                discrepancy.insert();
+
+                this.getFile(caseFile1.getFileGuid(), caseFile2.getFileGuid(), discrepancy.getId());
+            }
+            if (ObjectUtils.isNotEmpty(caseFile2) && ObjectUtils.isNotEmpty(caseFile3)) {
+                Discrepancy discrepancy = new Discrepancy();
+                discrepancy.setCaseId(caseFile.getId());
+                discrepancy.setCaseFileId1(caseFile2.getId());
+                discrepancy.setCaseFileId2(caseFile3.getId());
+                discrepancy.setDiscrepancyType(2);
+                discrepancy.insert();
+            }
+        }
+
+    }
+
+    public void getFile(String guid1, String guid2, Integer discrepancyId) throws IOException {
+        byte[] bytes = fileManagerService.downloadSystemFileFromFMS(guid1);
+        File tempFile = File.createTempFile("temp1_", ".docx");
+        try (
+                InputStream inputStream = new ByteArrayInputStream(bytes);
+                FileOutputStream outputStream = new FileOutputStream(tempFile)
+        ) {
+            IOUtils.copy(inputStream, outputStream);
+        }
+        byte[] bytes1 = fileManagerService.downloadSystemFileFromFMS(guid2);
+        File tempFile1 = File.createTempFile("temp2_", ".docx");
+        try (
+                InputStream inputStream = new ByteArrayInputStream(bytes1);
+                FileOutputStream outputStream = new FileOutputStream(tempFile1)
+        ) {
+            IOUtils.copy(inputStream, outputStream);
+        }
+        XWPFDocument document = new XWPFDocument(new FileInputStream(tempFile));
+
+        tempFile.delete();
+        tempFile1.delete();
+    }
+
+    //最终方法
     @Test
     public void test118() {
-        String path = "F:\\file\\测试\\word对比\\AAA.docx";
-        String path1 = "F:\\file\\测试\\word对比\\BBBB.docx";
-        if (!path.endsWith(".doc") && !path.endsWith(".docx")) {
-            throw new XiaoShiException("请上传Word文件");
+        String path = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
+//        String path = "F:\\file\\测试\\word对比\\S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-权利要求书-v1r01-sq.docx";
+//        String path = "F:\\file\\测试\\word对比\\AAA.docx";
+        String path1 = "F:\\file\\测试\\word对比\\240805-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v3F-清洁版.docx";
+//        String path1 = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
+        try {
+//            String text = "";
+//            String text1 = "";
+            List<String> list = new ArrayList<>();
+            List<String> list1 = new ArrayList<>();
+            if (path.endsWith(".docx")) {
+                XWPFDocument document = new XWPFDocument(new FileInputStream(path));
+                List<XWPFParagraph> paragraphs = document.getParagraphs();
+                for (XWPFParagraph paragraph : paragraphs) {
+                    final String s = paragraph.getText().trim();
+                    if (StringUtils.isNotEmpty(s)) {
+                        list.add(s);
+                    }
+                }
+//                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
+//                text = extractor.getText();
+                document.close();
+            } else if (path.endsWith(".doc")) {
+                InputStream inputStream = new FileInputStream(path);
+                WordExtractor wordExtractor = new WordExtractor(inputStream);
+                String[] paragraphText = wordExtractor.getParagraphText();
+                for (String s : paragraphText) {
+                    String trim = s.trim();
+                    if (StringUtils.isNotEmpty(trim.trim())) {
+                        list.add(trim);
+                    }
+                }
+//                list = Arrays.asList(paragraphText);
+//                text = wordExtractor.getText();
+            }
+            if (path1.endsWith(".docx")) {
+                XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
+                List<XWPFParagraph> paragraphs = document1.getParagraphs();
+                for (XWPFParagraph paragraph : paragraphs) {
+                    final String s = paragraph.getText().trim();
+                    if (StringUtils.isNotEmpty(s)) {
+                        list1.add(s);
+                    }
+                }
+//                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
+//                text1 = extractor1.getText();
+                // 关闭流
+                document1.close();
+            } else if (path1.endsWith(".doc")) {
+                InputStream inputStream = new FileInputStream(path1);
+                WordExtractor wordExtractor = new WordExtractor(inputStream);
+                String[] paragraphText = wordExtractor.getParagraphText();
+                list1 = Arrays.asList(paragraphText);
+//                text1 = wordExtractor.getText();
+            }
+            Map<String, String> firstMap = this.getText(list);
+            Map<String, String> secondMap = this.getText(list1);
+
+            final String text = StringUtils.join(list, "\n");
+            final String text1 = StringUtils.join(list1, "\n");
+
+
+            System.out.println("text:" + text);
+            System.out.println("text1:" + text1);
+            compareText1(text, text1, 1);
+            compareText1(firstMap.get("abstract"), secondMap.get("abstract"), 2);
+            compareText1(firstMap.get("claims"), secondMap.get("claims"), 3);
+            compareText1(firstMap.get("description"), secondMap.get("description"), 4);
+        } catch (Exception e) {
+            e.printStackTrace();
         }
-        if (!path1.endsWith(".doc") && !path1.endsWith(".docx")) {
-            throw new XiaoShiException("请上传Word文件");
+    }
+
+    public Map<String, String> getText(List<String> list) {
+        List<String> abstractSection = new ArrayList<>();
+        List<String> claimsSection = new ArrayList<>();
+        List<String> descriptionSection = new ArrayList<>();
+        int abstractIndex = list.indexOf("说   明   书   摘   要");
+        int claimsIndex = list.indexOf("权   利   要   求   书");
+        int descriptionIndex = list.indexOf("说    明    书");
+        if (abstractIndex >= 0) {
+            int end = (claimsIndex > 0 && claimsIndex > abstractIndex) ? claimsIndex : list.size();
+            abstractSection = list.subList(abstractIndex, end);
+        }
+        if (claimsIndex >= 0) {
+            int end = (descriptionIndex > 0 && descriptionIndex > claimsIndex) ? descriptionIndex : list.size();
+            claimsSection = list.subList(claimsIndex, end);
+        }
+        if (descriptionIndex >= 0) {
+            descriptionSection = list.subList(descriptionIndex, list.size());
         }
+        String abstractStr = StringUtils.join(abstractSection, "\n");
+        String claimsStr = StringUtils.join(claimsSection, "\n");
+        String descriptionStr = StringUtils.join(descriptionSection, "\n");
+        Map<String, String> map = new HashMap<>();
+        map.put("abstract", abstractStr);
+        map.put("claims", claimsStr);
+        map.put("description", descriptionStr);
+        return map;
+    }
+
+    public void compareText1(String text, String text1, Integer type) {
+        double similarity = cosineSimilarityService.calculateCosineSimilarity(text, text1);
+        int total = text.length();
+        System.out.println("firstDoc:" + total);
+        StringsComparator comparator = new StringsComparator(text, text1);
+        EditScript<Character> script = comparator.getScript();
+        SectionDiffCommandVisitor commandVisitor = new SectionDiffCommandVisitor();
+        script.visit(commandVisitor);
+        commandVisitor.finish();
+        String secondDoc = commandVisitor.getRightTemp().toString();
+        String secondDocReplace = secondDoc.replaceAll("(\\r\\n|\\n)", "<br>");
+        String html = "<p>" + secondDocReplace + "</p>";
+        // 使用Jsoup解析HTML
+        Document doc = Jsoup.parse(html);
+        Elements paragraphs = doc.select("p");
+        String html3 = doc.html();
+//        String secondDocReplace1 = html3.replaceAll("(\\r\\n|\\n)", "<br>");
+        System.out.println(html3);
+        // 创建Word文档
+        XWPFDocument wordDocument = new XWPFDocument();
+        // 遍历每个段落
+        for (Element paragraph : paragraphs) {
+            List<String> emList = new ArrayList<>();
+            List<String> delList = new ArrayList<>();
+            // 遍历段落中的每个节点
+            for (Node node : paragraph.childNodes()) {
+                if (node instanceof Element element) {
+                    // 处理HTML元素
+                    if ("em".equalsIgnoreCase(element.tagName())) {
+                        String s = element.text();
+                        if (StringUtils.isNotEmpty(s)) {
+                            emList.add(s);
+                        }
+                    } else if ("del".equalsIgnoreCase(element.tagName())) {
+                        String s = element.text();
+                        if (StringUtils.isNotEmpty(s)) {
+                            delList.add(s);
+                        }
+                    }
+                }
+            }
+            //修改处数
+            int emNum = 0;
+            //修改字数
+            int emSum = 0;
+            //删除处数
+            int delNum = 0;
+            //删除字数
+            int delSum = 0;
+            if (!CollectionUtils.isEmpty(emList)) {
+                emSum = emList.stream().distinct().mapToInt(String::length).sum();
+                emNum = (int) emList.stream().distinct().count();
+            }
+            if (!CollectionUtils.isEmpty(delList)) {
+                delSum = delList.stream().distinct().mapToInt(String::length).sum();
+                delNum = (int) delList.stream().distinct().count();
+            }
+
+            System.out.println("Em:" + emNum);
+            System.out.println("Del:" + delNum);
+            System.out.println("Total:" + total);
+            //修改总处数
+            int editSum = emNum + delNum;
+            int editWordNum = emSum + delSum;
+            BigDecimal sumBig = new BigDecimal(editWordNum);
+            BigDecimal diff = new BigDecimal(0);
+            if (total != 0) {
+                BigDecimal totalBig = new BigDecimal(total);
+                diff = sumBig.divide(totalBig, 6, RoundingMode.HALF_UP);
+            } else {
+                diff = new BigDecimal(1);
+                similarity = 0L;
+            }
+            System.out.println("Diff:" + diff);
+            System.out.println("similarity:" + String.format("%.6f", similarity));
+            System.out.println("AAAAAAAAAAAAAA");
+//            DiscrepancyDetail detail = new DiscrepancyDetail();
+//            detail.setDiscrepancyId(0);
+//            detail.setTotalWorldCount(total);
+//            detail.setEditCount(editSum);
+//            detail.setEditWorldCount(editWordNum);
+//            detail.setDelWorldCount(delSum);
+//            detail.setDiffType(type);
+//            detail.setDiffContent(secondDocReplace);
+//            detail.setRate(diff.toString());
+//            detail.setSimilarity(String.format("%.4f", similarity));
+//            detail.insert();
+        }
+
+        // 关闭文档
+        try {
+            wordDocument.close();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    @Test
+    public void test119() {
+//        String path = "F:\\file\\测试\\word对比\\AAA-copy.doc";
+        String path = "F:\\file\\测试\\word对比\\AAA.docx";
+        String path1 = "F:\\file\\测试\\word对比\\BBBB.docx";
         try {
             String text = "";
             String text1 = "";
@@ -251,18 +531,17 @@ class WdcApplicationTests {
                 XWPFDocument document = new XWPFDocument(new FileInputStream(path));
                 XWPFWordExtractor extractor = new XWPFWordExtractor(document);
                 text = extractor.getText();
-//                System.out.println(text);
                 document.close();
             } else if (path.endsWith(".doc")) {
                 InputStream inputStream = new FileInputStream(path);
                 WordExtractor wordExtractor = new WordExtractor(inputStream);
+                final String[] paragraphText = wordExtractor.getParagraphText();
                 text = wordExtractor.getText();
             }
             if (path1.endsWith(".docx")) {
                 XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
                 XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
                 text1 = extractor1.getText();
-//                System.out.println(text1);
                 // 关闭流
                 document1.close();
             } else if (path1.endsWith(".doc")) {
@@ -270,13 +549,17 @@ class WdcApplicationTests {
                 WordExtractor wordExtractor = new WordExtractor(inputStream);
                 text1 = wordExtractor.getText();
             }
-            compareText1(text, text1);
+            compareText2(text, text1);
         } catch (Exception e) {
             e.printStackTrace();
         }
     }
 
-    public static void compareText1(String text, String text1) {
+    public void compareText2(String text, String text1) {
+        double su = cosineSimilarityService.calculateCosineSimilarity(text1, text);
+        int total = text.length();
+        int total1 = text1.length();
+        System.out.println("firstDoc:" + total);
         // commons-text
         StringsComparator comparator = new StringsComparator(text, text1);
         EditScript<Character> script = comparator.getScript();
@@ -285,21 +568,138 @@ class WdcApplicationTests {
         commandVisitor.finish();
 //        System.out.println(commandVisitor.getLeftTemp());
 //        System.out.println(commandVisitor.getRightTemp());
-        String firstDoc = commandVisitor.getLeftTemp().toString();
-        int firstDocLen = firstDoc.length();
-        System.out.println("firstDoc:" + firstDocLen);
         String secondDoc = commandVisitor.getRightTemp().toString();
-        String secondDocReplace = secondDoc.replace("\n", "<br>");
-        String html = "<p>" + secondDocReplace + "</p>";
-        String html1 = "<html><head><title>First parse</title></head>"
-                + "<body><p>" +secondDocReplace + "</p></body></html>";
-        System.out.println(html1);
+        String html = "<p>" + secondDoc + "</p>";
 //        System.out.println(html);
         // 使用Jsoup解析HTML
         Document doc = Jsoup.parse(html);
-        final String text2 = doc.text();
-        System.out.println(text2);
         Elements paragraphs = doc.select("p");
+        String html3 = doc.html();
+        String secondDocReplace = html3.replace("(\r\n|\n)", "<br>");
+        System.out.println(secondDocReplace);
+        // 创建Word文档
+        XWPFDocument wordDocument = new XWPFDocument();
+        // 遍历每个段落
+        for (Element paragraph : paragraphs) {
+            List<String> emList = new ArrayList<>();
+            List<String> delList = new ArrayList<>();
+            // 遍历段落中的每个节点
+            for (Node node : paragraph.childNodes()) {
+                if (node instanceof Element element) {
+                    // 处理HTML元素
+                    if ("em".equalsIgnoreCase(element.tagName())) {
+                        String s = element.text();
+                        emList.add(s);
+                    } else if ("del".equalsIgnoreCase(element.tagName())) {
+                        String s = element.text();
+                        delList.add(s);
+                    }
+                }
+            }
+            //修改处数
+            int emNum = 0;
+            //修改字数
+            int emSum = 0;
+            //删除处数
+            int delNum = 0;
+            //删除字数
+            int delSum = 0;
+            if (!CollectionUtils.isEmpty(emList)) {
+                emSum = emList.stream().mapToInt(String::length).sum();
+                emNum = (int) emList.stream().distinct().count();
+            }
+            if (!CollectionUtils.isEmpty(delList)) {
+                delSum = delList.stream().mapToInt(String::length).sum();
+                delNum = (int) delList.stream().distinct().count();
+            }
+
+            System.out.println("Em:" + emNum);
+            System.out.println("Del:" + delNum);
+            System.out.println("Total:" + total);
+            //修改总处数
+            int editSum = emNum + delNum;
+            int editWordNum = emSum + delSum;
+            BigDecimal sumBig = new BigDecimal(editWordNum);
+            BigDecimal totalBig = new BigDecimal(total);
+            BigDecimal diff = sumBig.divide(totalBig, 4, RoundingMode.HALF_UP);
+            System.out.println("Diff:" + diff);
+            System.out.println("similar:" + String.format("%.4f", su));
 
+        }
+
+        // 关闭文档
+        try {
+            wordDocument.close();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+
+    @Test
+    public void test120() throws IOException {
+//        String path = "F:\\file\\测试\\word对比\\AAA-copy.doc";
+//        String path = "F:\\file\\测试\\word对比\\AAA.docx";
+//        String path1 = "F:\\file\\测试\\word对比\\BBBB.docx";
+        String path = "F:\\file\\测试\\word对比\\S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-权利要求书-v1r01-sq.docx";
+        String path1 = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
+        List<String> list = new ArrayList<>();
+        List<String> list1 = new ArrayList<>();
+        try {
+            String text = "";
+            String text1 = "";
+            if (path.endsWith(".docx")) {
+                XWPFDocument document = new XWPFDocument(new FileInputStream(path));
+                List<XWPFParagraph> paragraphs = document.getParagraphs();
+                for (XWPFParagraph paragraph : paragraphs) {
+                    final String s = paragraph.getText().trim();
+                    if (StringUtils.isNotEmpty(s)) {
+                        list.add(s);
+                    }
+                }
+                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
+                text = extractor.getText();
+                document.close();
+            } else if (path.endsWith(".doc")) {
+                InputStream inputStream = new FileInputStream(path);
+                WordExtractor wordExtractor = new WordExtractor(inputStream);
+                String[] paragraphText = wordExtractor.getParagraphText();
+                for (String s : paragraphText) {
+                    if (StringUtils.isNotEmpty(s)) {
+                        list.add(s);
+                    }
+                }
+                list = Arrays.asList(paragraphText);
+                text = wordExtractor.getText();
+            }
+            if (path1.endsWith(".docx")) {
+                XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
+                List<XWPFParagraph> paragraphs = document1.getParagraphs();
+                for (XWPFParagraph paragraph : paragraphs) {
+                    String s = paragraph.getText().trim();
+                    if (StringUtils.isNotEmpty(s)) {
+                        list1.add(s);
+                    }
+                }
+                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
+                text1 = extractor1.getText();
+                // 关闭流
+                document1.close();
+            } else if (path1.endsWith(".doc")) {
+                InputStream inputStream = new FileInputStream(path1);
+                WordExtractor wordExtractor = new WordExtractor(inputStream);
+                text1 = wordExtractor.getText();
+            }
+//            final double cscwv = cosineSimilarityService.calculateAverageCSCWV(list, list1);
+//            final double su = cosineSimilarityService.calculateCosineSimilarity(text1, text);
+//            final double su1 = cosineSimilarityService.calculateCosineSimilarity(text, text);
+//            System.out.println(su);
+            Map<String, String> firstMap = this.getText(list);
+            Map<String, String> secondMap = this.getText(list1);
+            System.out.println(firstMap);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
     }
+
 }