gaochangkui
/
WDC


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
							package cn.cslg.wdc;

import cn.cslg.wdc.common.exception.XiaoShiException;
import cn.cslg.wdc.dto.SelectCaseInfoDTO;
import cn.cslg.wdc.dto.common.SectionDiffCommandVisitor;
import cn.cslg.wdc.entity.AssoCaseFile;
import cn.cslg.wdc.entity.CaseFile;
import cn.cslg.wdc.entity.Discrepancy;
import cn.cslg.wdc.mapper.AssoCaseFileMapper;
import cn.cslg.wdc.mapper.CaseFileMapper;
import cn.cslg.wdc.service.CaseFileService;
import cn.cslg.wdc.service.DiscrepancyService;
import cn.cslg.wdc.service.common.CosineSimilarityService;
import cn.cslg.wdc.service.common.FileManagerService;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.diff.EditScript;
import org.apache.commons.text.diff.StringsComparator;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.checkerframework.checker.units.qual.A;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.util.CollectionUtils;

import java.io.*;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.*;

@SpringBootTest
class WdcApplicationTests {

    @Autowired
    private AssoCaseFileMapper assoCaseFileMapper;

    @Autowired
    private CaseFileMapper caseFileMapper;

    @Autowired
    private DiscrepancyService discrepancyService;

    @Autowired
    private CosineSimilarityService cosineSimilarityService;

    @Autowired
    private FileManagerService fileManagerService;

    @Autowired
    private CaseFileService caseFileService;

    @Test
    void contextLoads() throws IOException {

        String s1 = "46283d7fdae3413491da50dfd3b92364";
        String s2 = "f8d77ae1ce2f4d298064ecaecf5ff301";
        Integer s = 8;
        String s3 = "241023-P20241102-PACN2417839-翻译方法、翻译装置、电子设备以及计算机可读存储介质-专利申请文件-v1F.docx";
        String s4 = "241024-S2435631-测试卷-压缩包-语音翻译方法及装置、电子设备以及计算机可读存储介质-新申请文档-定稿.docx";
        caseFileService.getDiscrepancyByFile(s1, s2, s, s3, s4);
        System.out.println("AAAAAA");

//        final Long count = discrepancyService.getBaseMapper().selectCount(new LambdaQueryWrapper<Discrepancy>());
//        System.out.println(count);
//        Discrepancy discrepancy = new Discrepancy();
//        discrepancy.setCaseNo("sajdsak");
//        discrepancy.setFirstDiscrepancy("0.36");
//        discrepancy.setSecondDiscrepancy("0.66");
//        discrepancy.insert();
    }


    private static void processNode(XWPFParagraph paragraph, Node node) {
        XWPFRun run = paragraph.createRun();
        if (node instanceof TextNode) {
            run.setText(((TextNode) node).text());
        } else if (node instanceof Element) {
            Element element = (Element) node;
            String tagName = element.tagName();
            if ("em".equalsIgnoreCase(tagName)) {
                run.setText(element.text());
                run.setItalic(true);
            } else if ("del".equalsIgnoreCase(tagName)) {
                run.setText(element.text());
                run.setStrike(true);
            } else {
                // 递归处理其他标签（如果有）
                for (Node childNode : element.childNodes()) {
                    processNode(paragraph, childNode);
                }
            }
        }
    }

    @Test
    public void test1() throws Exception {
        String path = "F:\\file\\测试\\word对比\\CCC.docx";
        XWPFDocument document = new XWPFDocument(new FileInputStream(path));
        XWPFWordExtractor extractor = new XWPFWordExtractor(document);
        String text = extractor.getText();
        System.out.println(text);
        document.close();
        Document doc = Jsoup.parse(text);
        Elements paragraphs = doc.select("p");
    }

    @Test
    public void test117() {
//        String path = "F:\\file\\测试\\word对比\\S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-权利要求书-v1r01-sq.docx";
//        String path1 = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
//        String path = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
//        String path1 = "F:\\file\\测试\\word对比\\240805-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v3F-清洁版.docx";
        String path = "F:\\file\\测试\\word对比\\AAA.docx";
        String path1 = "F:\\file\\测试\\word对比\\BBBB.docx";
        if (!path.endsWith(".doc") && !path.endsWith(".docx")) {
            throw new XiaoShiException("请上传Word文件");
        }
        if (!path1.endsWith(".doc") && !path1.endsWith(".docx")) {
            throw new XiaoShiException("请上传Word文件");
        }
        try {
            String text = "";
            String text1 = "";
            if (path.endsWith(".docx")) {
                XWPFDocument document = new XWPFDocument(new FileInputStream(path));
                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
                text = extractor.getText();
//                System.out.println(text);
                document.close();
            } else if (path.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                text = wordExtractor.getText();
            }
            if (path1.endsWith(".docx")) {
                XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
//                List<XWPFParagraph> xwpfParagraphs= document1.getParagraphs();
//                xwpfParagraphs.forEach(item->{
//                   System.out.println(item.getText());
//                });
                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
                text1 = extractor1.getText();
//                System.out.println(text1);
                // 关闭流
                document1.close();
            } else if (path1.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path1);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                text1 = wordExtractor.getText();
            }
            compareText(text, text1);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void compareText(String text, String text1) {
        // commons-text
        StringsComparator comparator = new StringsComparator(text, text1);
        EditScript<Character> script = comparator.getScript();
        SectionDiffCommandVisitor commandVisitor = new SectionDiffCommandVisitor();
        script.visit(commandVisitor);
        commandVisitor.finish();
//        System.out.println(commandVisitor.getLeftTemp());
//        System.out.println(commandVisitor.getRightTemp());
        String firstDoc = commandVisitor.getLeftTemp().toString();
        int firstDocLen = firstDoc.length();
        System.out.println("firstDoc:" + firstDocLen);
        String secondDoc = commandVisitor.getRightTemp().toString();
        String secondDocReplace = secondDoc.replace("\n", "<br>");
        String html = "<p>" + secondDocReplace + "</p>";
//        System.out.println(html);
        // 使用Jsoup解析HTML
        Document doc = Jsoup.parse(html);
        Elements paragraphs = doc.select("p");

        // 创建Word文档
        XWPFDocument wordDocument = new XWPFDocument();

        // 遍历每个段落
        for (Element paragraph : paragraphs) {
            XWPFParagraph wordParagraph = wordDocument.createParagraph();
            XWPFRun run = wordParagraph.createRun();
            int emNum = 0;
            int delNum = 0;
            int total = 0;

            // 遍历段落中的每个节点
            for (Node node : paragraph.childNodes()) {
                if (node instanceof TextNode) {
                    try {
                        boolean b = run.getText(0).isEmpty();
//                        System.out.println(run.getText(0));
                        if (!b) {
                            if (org.apache.commons.lang3.StringUtils.isNotEmpty(run.getText(0))) {
                                run = wordParagraph.createRun();
                            }
                        }
                    } catch (Exception e) {

                    }

                    // 处理纯文本节点
                    run.setText(((TextNode) node).text());
                    total += ((TextNode) node).text().length();
                } else if (node instanceof Element) {
                    // 处理HTML元素
                    Element element = (Element) node;
                    if ("em".equalsIgnoreCase(element.tagName())) {
                        String s = element.text();
                        // 应用斜体样式
                        run = wordParagraph.createRun();
                        run.setText(s);
                        emNum += s.length();
                        run.setColor("FF0000");
                        run.setBold(true);
                        run.setItalic(true);
                    } else if ("del".equalsIgnoreCase(element.tagName())) {
                        String s = element.text();
                        // 应用删除线样式
                        run = wordParagraph.createRun();
                        run.setText(s);
                        delNum += s.length();
                        run.setColor("0000FF");
                        run.setBold(true);
                        run.setStrike(true);
                    } else if ("br".equalsIgnoreCase(element.tagName())) {
                        run = wordParagraph.createRun();
                        run.addBreak();
                    } else {
                        // 递归处理其他标签（如果有）
                        for (Node childNode : element.childNodes()) {
                            processNode(wordParagraph, childNode);
                        }
                    }
                }
            }
            System.out.println("Em:" + emNum);
            System.out.println("Del:" + delNum);
            System.out.println("Total:" + total);
            int sum = emNum + delNum;
            final BigDecimal sumBig = new BigDecimal(sum);
            final BigDecimal totalBig = new BigDecimal(total);
            BigDecimal diff = sumBig.divide(totalBig, 2, RoundingMode.HALF_UP)
                    .multiply(new BigDecimal(100));
            System.out.println("Diff:" + diff + "%");

        }
        // 将文档写入文件
        try (FileOutputStream out = new FileOutputStream("F:\\file\\测试\\word对比\\CCC.docx")) {
            wordDocument.write(out);
        } catch (IOException e) {
            e.printStackTrace();
        }

        // 关闭文档
        try {
            wordDocument.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void addDiff(String caseNo) throws IOException {
        CaseFile caseFile = caseFileMapper.selectOne(new LambdaQueryWrapper<CaseFile>()
                .eq(CaseFile::getCaseNo, caseNo));
        if (ObjectUtils.isNotEmpty(caseFile)) {
            List<AssoCaseFile> assoCaseFiles = assoCaseFileMapper.selectList(new LambdaQueryWrapper<AssoCaseFile>()
                    .eq(AssoCaseFile::getCaseId, caseFile.getId()));
            AssoCaseFile caseFile1 = assoCaseFiles.stream().filter(i -> i.getFileType() == 1).findFirst().orElse(new AssoCaseFile());
            AssoCaseFile caseFile2 = assoCaseFiles.stream().filter(i -> i.getFileType() == 3).findFirst().orElse(new AssoCaseFile());
            AssoCaseFile caseFile3 = assoCaseFiles.stream().filter(i -> i.getFileType() == 3).findFirst().orElse(new AssoCaseFile());
            if (ObjectUtils.isNotEmpty(caseFile1) && ObjectUtils.isNotEmpty(caseFile2)) {
                Discrepancy discrepancy = new Discrepancy();
                discrepancy.setCaseId(caseFile.getId());
                discrepancy.setCaseFileId1(caseFile1.getId());
                discrepancy.setCaseFileId2(caseFile2.getId());
                discrepancy.setDiscrepancyType(1);
                discrepancy.insert();

                this.getFile(caseFile1.getFileGuid(), caseFile2.getFileGuid(), discrepancy.getId());
            }
            if (ObjectUtils.isNotEmpty(caseFile2) && ObjectUtils.isNotEmpty(caseFile3)) {
                Discrepancy discrepancy = new Discrepancy();
                discrepancy.setCaseId(caseFile.getId());
                discrepancy.setCaseFileId1(caseFile2.getId());
                discrepancy.setCaseFileId2(caseFile3.getId());
                discrepancy.setDiscrepancyType(2);
                discrepancy.insert();
            }
        }

    }

    public void getFile(String guid1, String guid2, Integer discrepancyId) throws IOException {
        byte[] bytes = fileManagerService.downloadSystemFileFromFMS(guid1);
        File tempFile = File.createTempFile("temp1_", ".docx");
        try (
                InputStream inputStream = new ByteArrayInputStream(bytes);
                FileOutputStream outputStream = new FileOutputStream(tempFile)
        ) {
            IOUtils.copy(inputStream, outputStream);
        }
        byte[] bytes1 = fileManagerService.downloadSystemFileFromFMS(guid2);
        File tempFile1 = File.createTempFile("temp2_", ".docx");
        try (
                InputStream inputStream = new ByteArrayInputStream(bytes1);
                FileOutputStream outputStream = new FileOutputStream(tempFile1)
        ) {
            IOUtils.copy(inputStream, outputStream);
        }
        XWPFDocument document = new XWPFDocument(new FileInputStream(tempFile));

        tempFile.delete();
        tempFile1.delete();
    }

    //最终方法
    @Test
    public void test118() {
//        String path = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
        String path = "F:\\file\\测试\\word对比\\S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-权利要求书-v1r01-sq.docx";
//        String path = "F:\\file\\测试\\word对比\\AAA.docx";
//        String path1 = "F:\\file\\测试\\word对比\\240805-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v3F-清洁版.docx";
        String path1 = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
        try {
//            String text = "";
//            String text1 = "";
            List<String> list = new ArrayList<>();
            List<String> list1 = new ArrayList<>();
            if (path.endsWith(".docx")) {
                XWPFDocument document = new XWPFDocument(new FileInputStream(path));
                List<XWPFParagraph> paragraphs = document.getParagraphs();
                for (XWPFParagraph paragraph : paragraphs) {
                    final String s = paragraph.getText().trim();
                    if (StringUtils.isNotEmpty(s)) {
                        list.add(s);
                    }
                }
//                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
//                text = extractor.getText();
                document.close();
            } else if (path.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                String[] paragraphText = wordExtractor.getParagraphText();
                for (String s : paragraphText) {
                    String trim = s.trim();
                    if (StringUtils.isNotEmpty(trim.trim())) {
                        list.add(trim);
                    }
                }
//                list = Arrays.asList(paragraphText);
//                text = wordExtractor.getText();
            }
            if (path1.endsWith(".docx")) {
                XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
                List<XWPFParagraph> paragraphs = document1.getParagraphs();
                for (XWPFParagraph paragraph : paragraphs) {
                    final String s = paragraph.getText().trim();
                    if (StringUtils.isNotEmpty(s)) {
                        list1.add(s);
                    }
                }
//                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
//                text1 = extractor1.getText();
                // 关闭流
                document1.close();
            } else if (path1.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path1);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                String[] paragraphText = wordExtractor.getParagraphText();
                list1 = Arrays.asList(paragraphText);
//                text1 = wordExtractor.getText();
            }
            Map<String, String> firstMap = this.getText(list);
            Map<String, String> secondMap = this.getText(list1);

            final String text = StringUtils.join(list, "\n");
            final String text1 = StringUtils.join(list1, "\n");


            System.out.println("text:" + text);
            System.out.println("text1:" + text1);
            compareText1(text, text1, 1);
            compareText1(firstMap.get("abstract"), secondMap.get("abstract"), 2);
            compareText1(firstMap.get("claims"), secondMap.get("claims"), 3);
            compareText1(firstMap.get("description"), secondMap.get("description"), 4);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public Map<String, String> getText(List<String> list) {
        List<String> abstractSection = new ArrayList<>();
        List<String> claimsSection = new ArrayList<>();
        List<String> descriptionSection = new ArrayList<>();
        int abstractIndex = list.indexOf("说   明   书   摘   要");
        int claimsIndex = list.indexOf("权   利   要   求   书");
        int descriptionIndex = list.indexOf("说    明    书");
        if (abstractIndex >= 0) {
            int end = (claimsIndex > 0 && claimsIndex > abstractIndex) ? claimsIndex : list.size();
            abstractSection = list.subList(abstractIndex, end);
        }
        if (claimsIndex >= 0) {
            int end = (descriptionIndex > 0 && descriptionIndex > claimsIndex) ? descriptionIndex : list.size();
            claimsSection = list.subList(claimsIndex, end);
        }
        if (descriptionIndex >= 0) {
            descriptionSection = list.subList(descriptionIndex, list.size());
        }
        String abstractStr = StringUtils.join(abstractSection, "\n");
        String claimsStr = StringUtils.join(claimsSection, "\n");
        String descriptionStr = StringUtils.join(descriptionSection, "\n");
        Map<String, String> map = new HashMap<>();
        map.put("abstract", abstractStr);
        map.put("claims", claimsStr);
        map.put("description", descriptionStr);
        return map;
    }

    public void compareText1(String text, String text1, Integer type) {
        double similarity = cosineSimilarityService.calculateCosineSimilarity(text, text1);
        int total = text.length();
        System.out.println("firstDoc:" + total);
        StringsComparator comparator = new StringsComparator(text, text1);
        EditScript<Character> script = comparator.getScript();
        SectionDiffCommandVisitor commandVisitor = new SectionDiffCommandVisitor();
        script.visit(commandVisitor);
        commandVisitor.finish();
        String secondDoc = commandVisitor.getRightTemp().toString();
        String secondDocReplace = secondDoc.replaceAll("(\\r\\n|\\n)", "<br>");
        String html = "<p>" + secondDocReplace + "</p>";
        // 使用Jsoup解析HTML
        Document doc = Jsoup.parse(html);
        Elements paragraphs = doc.select("p");
        String html3 = doc.html();
//        String secondDocReplace1 = html3.replaceAll("(\\r\\n|\\n)", "<br>");
        System.out.println(html3);
        // 创建Word文档
        XWPFDocument wordDocument = new XWPFDocument();
        // 遍历每个段落
        for (Element paragraph : paragraphs) {
            List<String> emList = new ArrayList<>();
            List<String> delList = new ArrayList<>();
            // 遍历段落中的每个节点
            for (Node node : paragraph.childNodes()) {
                if (node instanceof Element element) {
                    // 处理HTML元素
                    if ("em".equalsIgnoreCase(element.tagName())) {
                        String s = element.text();
                        if (StringUtils.isNotEmpty(s)) {
                            emList.add(s);
                        }
                    } else if ("del".equalsIgnoreCase(element.tagName())) {
                        String s = element.text();
                        if (StringUtils.isNotEmpty(s)) {
                            delList.add(s);
                        }
                    }
                }
            }
            //修改处数
            int emNum = 0;
            //修改字数
            int emSum = 0;
            //删除处数
            int delNum = 0;
            //删除字数
            int delSum = 0;
            if (!CollectionUtils.isEmpty(emList)) {
                emSum = emList.stream().distinct().mapToInt(String::length).sum();
                emNum = (int) emList.stream().distinct().count();
            }
            if (!CollectionUtils.isEmpty(delList)) {
                delSum = delList.stream().distinct().mapToInt(String::length).sum();
                delNum = (int) delList.stream().distinct().count();
            }

            System.out.println("Em:" + emNum);
            System.out.println("Del:" + delNum);
            System.out.println("Total:" + total);
            //修改总处数
            int editSum = emNum + delNum;
            int editWordNum = emSum + delSum;
            BigDecimal sumBig = new BigDecimal(editWordNum);
            BigDecimal diff = new BigDecimal(0);
            if (total != 0) {
                BigDecimal totalBig = new BigDecimal(total);
                diff = sumBig.divide(totalBig, 6, RoundingMode.HALF_UP);
            } else {
                diff = new BigDecimal(1);
                similarity = 0L;
            }
            System.out.println("Diff:" + diff);
            System.out.println("similarity:" + String.format("%.6f", similarity));
            System.out.println("AAAAAAAAAAAAAA");
//            DiscrepancyDetail detail = new DiscrepancyDetail();
//            detail.setDiscrepancyId(0);
//            detail.setTotalWorldCount(total);
//            detail.setEditCount(editSum);
//            detail.setEditWorldCount(editWordNum);
//            detail.setDelWorldCount(delSum);
//            detail.setDiffType(type);
//            detail.setDiffContent(secondDocReplace);
//            detail.setRate(diff.toString());
//            detail.setSimilarity(String.format("%.4f", similarity));
//            detail.insert();
        }

        // 关闭文档
        try {
            wordDocument.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Test
    public void test119() {
//        String path = "F:\\file\\测试\\word对比\\AAA-copy.doc";
        String path = "F:\\file\\测试\\word对比\\AAA.docx";
        String path1 = "F:\\file\\测试\\word对比\\BBBB.docx";
        try {
            String text = "";
            String text1 = "";
            if (path.endsWith(".docx")) {
                XWPFDocument document = new XWPFDocument(new FileInputStream(path));
                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
                text = extractor.getText();
                document.close();
            } else if (path.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                final String[] paragraphText = wordExtractor.getParagraphText();
                text = wordExtractor.getText();
            }
            if (path1.endsWith(".docx")) {
                XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
                text1 = extractor1.getText();
                // 关闭流
                document1.close();
            } else if (path1.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path1);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                text1 = wordExtractor.getText();
            }
            compareText2(text, text1);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void compareText2(String text, String text1) {
        double su = cosineSimilarityService.calculateCosineSimilarity(text1, text);
        int total = text.length();
        int total1 = text1.length();
        System.out.println("firstDoc:" + total);
        // commons-text
        StringsComparator comparator = new StringsComparator(text, text1);
        EditScript<Character> script = comparator.getScript();
        SectionDiffCommandVisitor commandVisitor = new SectionDiffCommandVisitor();
        script.visit(commandVisitor);
        commandVisitor.finish();
//        System.out.println(commandVisitor.getLeftTemp());
//        System.out.println(commandVisitor.getRightTemp());
        String secondDoc = commandVisitor.getRightTemp().toString();
        String html = "<p>" + secondDoc + "</p>";
//        System.out.println(html);
        // 使用Jsoup解析HTML
        Document doc = Jsoup.parse(html);
        Elements paragraphs = doc.select("p");
        String html3 = doc.html();
        String secondDocReplace = html3.replace("(\r\n|\n)", "<br>");
        System.out.println(secondDocReplace);
        // 创建Word文档
        XWPFDocument wordDocument = new XWPFDocument();
        // 遍历每个段落
        for (Element paragraph : paragraphs) {
            List<String> emList = new ArrayList<>();
            List<String> delList = new ArrayList<>();
            // 遍历段落中的每个节点
            for (Node node : paragraph.childNodes()) {
                if (node instanceof Element element) {
                    // 处理HTML元素
                    if ("em".equalsIgnoreCase(element.tagName())) {
                        String s = element.text();
                        emList.add(s);
                    } else if ("del".equalsIgnoreCase(element.tagName())) {
                        String s = element.text();
                        delList.add(s);
                    }
                }
            }
            //修改处数
            int emNum = 0;
            //修改字数
            int emSum = 0;
            //删除处数
            int delNum = 0;
            //删除字数
            int delSum = 0;
            if (!CollectionUtils.isEmpty(emList)) {
                emSum = emList.stream().mapToInt(String::length).sum();
                emNum = (int) emList.stream().distinct().count();
            }
            if (!CollectionUtils.isEmpty(delList)) {
                delSum = delList.stream().mapToInt(String::length).sum();
                delNum = (int) delList.stream().distinct().count();
            }

            System.out.println("Em:" + emNum);
            System.out.println("Del:" + delNum);
            System.out.println("Total:" + total);
            //修改总处数
            int editSum = emNum + delNum;
            int editWordNum = emSum + delSum;
            BigDecimal sumBig = new BigDecimal(editWordNum);
            BigDecimal totalBig = new BigDecimal(total);
            BigDecimal diff = sumBig.divide(totalBig, 4, RoundingMode.HALF_UP);
            System.out.println("Diff:" + diff);
            System.out.println("similar：" + String.format("%.4f", su));

        }

        // 关闭文档
        try {
            wordDocument.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    @Test
    public void test120() throws IOException {
//        String path = "F:\\file\\测试\\word对比\\AAA-copy.doc";
//        String path = "F:\\file\\测试\\word对比\\AAA.docx";
//        String path1 = "F:\\file\\测试\\word对比\\BBBB.docx";
        String path = "F:\\file\\测试\\word对比\\S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-权利要求书-v1r01-sq.docx";
        String path1 = "F:\\file\\测试\\word对比\\240722-S2418519-测试卷-PACN2414802-一种用于降低GIDL的混合型Gate结构及制备-申请全文-v1F.docx";
        List<String> list = new ArrayList<>();
        List<String> list1 = new ArrayList<>();
        try {
            String text = "";
            String text1 = "";
            if (path.endsWith(".docx")) {
                XWPFDocument document = new XWPFDocument(new FileInputStream(path));
                List<XWPFParagraph> paragraphs = document.getParagraphs();
                for (XWPFParagraph paragraph : paragraphs) {
                    final String s = paragraph.getText().trim();
                    if (StringUtils.isNotEmpty(s)) {
                        list.add(s);
                    }
                }
                XWPFWordExtractor extractor = new XWPFWordExtractor(document);
                text = extractor.getText();
                document.close();
            } else if (path.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                String[] paragraphText = wordExtractor.getParagraphText();
                for (String s : paragraphText) {
                    if (StringUtils.isNotEmpty(s)) {
                        list.add(s);
                    }
                }
                list = Arrays.asList(paragraphText);
                text = wordExtractor.getText();
            }
            if (path1.endsWith(".docx")) {
                XWPFDocument document1 = new XWPFDocument(new FileInputStream(path1));
                List<XWPFParagraph> paragraphs = document1.getParagraphs();
                for (XWPFParagraph paragraph : paragraphs) {
                    String s = paragraph.getText().trim();
                    if (StringUtils.isNotEmpty(s)) {
                        list1.add(s);
                    }
                }
                XWPFWordExtractor extractor1 = new XWPFWordExtractor(document1);
                text1 = extractor1.getText();
                // 关闭流
                document1.close();
            } else if (path1.endsWith(".doc")) {
                InputStream inputStream = new FileInputStream(path1);
                WordExtractor wordExtractor = new WordExtractor(inputStream);
                text1 = wordExtractor.getText();
            }
//            final double cscwv = cosineSimilarityService.calculateAverageCSCWV(list, list1);
//            final double su = cosineSimilarityService.calculateCosineSimilarity(text1, text);
//            final double su1 = cosineSimilarityService.calculateCosineSimilarity(text, text);
//            System.out.println(su);
            Map<String, String> firstMap = this.getText(list);
            Map<String, String> secondMap = this.getText(list1);
            System.out.println(firstMap);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}