|
@@ -0,0 +1,864 @@
|
|
|
|
|
+package com.cslg.ids.service;
|
|
|
|
|
+
|
|
|
|
|
+import cn.hutool.core.codec.Base64;
|
|
|
|
|
+import com.alibaba.fastjson2.JSON;
|
|
|
|
|
+import com.alibaba.fastjson2.JSONArray;
|
|
|
|
|
+import com.alibaba.fastjson2.JSONObject;
|
|
|
|
|
+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
|
|
|
|
+import com.cslg.ids.common.exception.ExceptionEnum;
|
|
|
|
|
+import com.cslg.ids.common.exception.XiaoShiException;
|
|
|
|
|
+import com.cslg.ids.dto.*;
|
|
|
|
|
+import com.cslg.ids.entity.FileInfo;
|
|
|
|
|
+import com.cslg.ids.service.common.FileManagerService;
|
|
|
|
|
+import com.cslg.ids.vo.*;
|
|
|
|
|
+import com.google.gson.Gson;
|
|
|
|
|
+import com.itextpdf.html2pdf.HtmlConverter;
|
|
|
|
|
+import com.vladsch.flexmark.ast.*;
|
|
|
|
|
+import com.vladsch.flexmark.ext.tables.TableBlock;
|
|
|
|
|
+import com.vladsch.flexmark.ext.tables.TableCell;
|
|
|
|
|
+import com.vladsch.flexmark.ext.tables.TableRow;
|
|
|
|
|
+import com.vladsch.flexmark.html.HtmlRenderer;
|
|
|
|
|
+import com.vladsch.flexmark.parser.Parser;
|
|
|
|
|
+import com.vladsch.flexmark.util.ast.Node;
|
|
|
|
|
+import com.vladsch.flexmark.util.data.MutableDataSet;
|
|
|
|
|
+import com.vladsch.flexmark.util.sequence.BasedSequence;
|
|
|
|
|
+import lombok.RequiredArgsConstructor;
|
|
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
+import okhttp3.MediaType;
|
|
|
|
|
+import okhttp3.OkHttpClient;
|
|
|
|
|
+import okhttp3.Request;
|
|
|
|
|
+import okhttp3.RequestBody;
|
|
|
|
|
+import org.apache.commons.lang3.ObjectUtils;
|
|
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.PDDocument;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.PDPage;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.font.PDFont;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
|
|
|
|
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
|
|
|
|
+import org.apache.pdfbox.text.PDFTextStripper;
|
|
|
|
|
+import org.apache.poi.util.Units;
|
|
|
|
|
+import org.apache.poi.xwpf.usermodel.*;
|
|
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
+import org.springframework.context.annotation.Lazy;
|
|
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
|
|
+
|
|
|
|
|
+import java.io.*;
|
|
|
|
|
+import java.net.ConnectException;
|
|
|
|
|
+import java.nio.file.Files;
|
|
|
|
|
+import java.nio.file.Path;
|
|
|
|
|
+import java.util.*;
|
|
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
|
|
+import java.util.regex.Matcher;
|
|
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
|
+
|
|
|
|
|
+@Slf4j
|
|
|
|
|
+@Service
|
|
|
|
|
+@RequiredArgsConstructor(onConstructor_ = {@Lazy})
|
|
|
|
|
+public class TranslateService {
|
|
|
|
|
+ private static final String BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1";
|
|
|
|
|
+ private static final String API_KEY = "sk-4f9fdd48c67a41d18d2817aed393e854";
|
|
|
|
|
+// private static final String MODEL_ID = "qwen3-max";
|
|
|
|
|
+ private static final String MODEL_ID = "qwen-plus";
|
|
|
|
|
+ private static final String TARGET_LANGUAGE = "English";
|
|
|
|
|
+ private static final String THINKING_MODE = "disable";
|
|
|
|
|
+ private static final String CONVERT_ENGINE = "mineru";
|
|
|
|
|
+ private static final long CHUNK_SIZE = 5000L;
|
|
|
|
|
+ private static final long CONCURRENT = 40L;
|
|
|
|
|
+ private static final float TEMPERATURE = 0.8f;
|
|
|
|
|
+ private static final long RETRY_COUNT = 2L;
|
|
|
|
|
+ private static final String CUSTOM_PROMPT = "你是一位资深专业的英文翻译者,擅长根据文件内容将非英文文件内容翻译成英文文件内容,翻译误差率仅有1%。\\n\\n**翻译规则**\\n- 先根据文件内容判断出是否为英文文件内容,如果不是才需要翻译,反之不需要翻译。\\n- 翻译内容需要符合原文内容以及格式\\n- 翻译内容必须为英文";
|
|
|
|
|
+ private static final String MINERU_TOKEN = "eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiI2OTgwMDQyMSIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTc2NDcyOTQ2OSwiY2xpZW50SWQiOiJsa3pkeDU3bnZ5MjJqa3BxOXgydyIsInBob25lIjoiMTU3MDUyMjA1MzMiLCJvcGVuSWQiOm51bGwsInV1aWQiOiIwZDlkYmUwZS04MjI0LTQyMWQtOGM5My0yOTlhNGFhNjA2YTgiLCJlbWFpbCI6IiIsImV4cCI6MTc2NTkzOTA2OX0.nAOIssRaG5HSYDvbxtH0CtAjCf7nNWaJImA2EdwkDxkTK3_6Gh7Kl_3wldfrOoNvik9h33bOUspXSiFiz1meaw";
|
|
|
|
|
+ private static final String DOCUTRANSLATE_URL = "http://192.168.2.24:8010";
|
|
|
|
|
+
|
|
|
|
|
+ @Autowired
|
|
|
|
|
+ private FileManagerService fileManagerService;
|
|
|
|
|
+ @Autowired
|
|
|
|
|
+ private FileInfoService fileInfoService;
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 递归处理 Markdown 节点并添加到 Word 文档
|
|
|
|
|
+ */
|
|
|
|
|
+ private static void processNode(Node node, XWPFDocument document) {
|
|
|
|
|
+ if (node instanceof Heading) {
|
|
|
|
|
+ Heading heading = (Heading) node;
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(heading.getText().toString());
|
|
|
|
|
+ run.setBold(true);
|
|
|
|
|
+ run.setFontSize(20 - (2 * heading.getLevel()));
|
|
|
|
|
+ } else if (node instanceof Paragraph) {
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ processChildren(node, document, paragraph);
|
|
|
|
|
+ } else if (node instanceof Text) {
|
|
|
|
|
+ Text text = (Text) node;
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(text.getChars().toString());
|
|
|
|
|
+ } else if (node instanceof StrongEmphasis) {
|
|
|
|
|
+ StrongEmphasis strong = (StrongEmphasis) node;
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(strong.getText().toString());
|
|
|
|
|
+ run.setBold(true);
|
|
|
|
|
+ } else if (node instanceof Emphasis) {
|
|
|
|
|
+ Emphasis emphasis = (Emphasis) node;
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(emphasis.getText().toString());
|
|
|
|
|
+ run.setItalic(true);
|
|
|
|
|
+ } else if (node instanceof Image) {
|
|
|
|
|
+ // 处理图片(示例:占位符,实际需下载图片)
|
|
|
|
|
+ Image image = (Image) node;
|
|
|
|
|
+ processImage(image.getUrl().toString(), document, null);
|
|
|
|
|
+ } else if (node instanceof HtmlInline) {
|
|
|
|
|
+ HtmlInline htmlInline = (HtmlInline) node;
|
|
|
|
|
+ String htmlContent = htmlInline.getChars().toString();
|
|
|
|
|
+ processHtmlContent(htmlContent, document, null);
|
|
|
|
|
+ } else if (node instanceof BulletList) {
|
|
|
|
|
+ node.getChildren().forEach(child -> {
|
|
|
|
|
+ if (child instanceof ListItem) {
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ paragraph.setStyle("ListBullet");
|
|
|
|
|
+ processChildren(child, document, paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ } else if (node instanceof OrderedList) {
|
|
|
|
|
+ node.getChildren().forEach(child -> {
|
|
|
|
|
+ if (child instanceof ListItem) {
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ paragraph.setStyle("ListNumber");
|
|
|
|
|
+ processChildren(child, document, paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ } else if (node instanceof HtmlBlock) {
|
|
|
|
|
+ HtmlBlock htmlBlock = (HtmlBlock) node;
|
|
|
|
|
+ processHtmlContent(htmlBlock.getContentChars().toString(), document, null);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static void processChildren(Node node, XWPFDocument document, XWPFParagraph paragraph) {
|
|
|
|
|
+ node.getChildren().forEach(child -> {
|
|
|
|
|
+ if (child instanceof Text) {
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(((Text) child).getChars().toString());
|
|
|
|
|
+ } else if (child instanceof StrongEmphasis) {
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(((StrongEmphasis) child).getText().toString());
|
|
|
|
|
+ run.setBold(true);
|
|
|
|
|
+ } else if (child instanceof Emphasis) {
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(((Emphasis) child).getText().toString());
|
|
|
|
|
+ run.setItalic(true);
|
|
|
|
|
+ } else if (child instanceof HtmlInline) {
|
|
|
|
|
+ HtmlInline htmlInline = (HtmlInline) child;
|
|
|
|
|
+ processHtmlContent(htmlInline.getChars().toString(), document, paragraph);
|
|
|
|
|
+ } else if (child instanceof Image) {
|
|
|
|
|
+ // 处理图片(示例:占位符,实际需下载图片)
|
|
|
|
|
+ Image image = (Image) child;
|
|
|
|
|
+ // 居中对齐
|
|
|
|
|
+ processImage(image.getUrl().toString(), document, paragraph);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ processChildren(child, document, paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static void processImage(String imageUrl, XWPFDocument document, XWPFParagraph paragraph) {
|
|
|
|
|
+ if (paragraph == null) {
|
|
|
|
|
+ paragraph = document.createParagraph();
|
|
|
|
|
+ paragraph.setAlignment(ParagraphAlignment.CENTER);
|
|
|
|
|
+ }
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ // 检查是否是base64编码的图片
|
|
|
|
|
+ if (imageUrl.startsWith("data:image")) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ // 提取base64数据
|
|
|
|
|
+ String base64Data = imageUrl.substring(imageUrl.indexOf(",") + 1);
|
|
|
|
|
+ byte[] imageBytes = Base64.decode(base64Data);
|
|
|
|
|
+
|
|
|
|
|
+ // 确定图片类型
|
|
|
|
|
+ int pictureType = org.apache.poi.xwpf.usermodel.Document.PICTURE_TYPE_PNG;
|
|
|
|
|
+ String extension = "png";
|
|
|
|
|
+ if (imageUrl.startsWith("data:image/jpeg") || imageUrl.startsWith("data:image/jpg")) {
|
|
|
|
|
+ pictureType = org.apache.poi.xwpf.usermodel.Document.PICTURE_TYPE_JPEG;
|
|
|
|
|
+ extension = "jpg";
|
|
|
|
|
+ } else if (imageUrl.startsWith("data:image/gif")) {
|
|
|
|
|
+ pictureType = org.apache.poi.xwpf.usermodel.Document.PICTURE_TYPE_GIF;
|
|
|
|
|
+ extension = "gif";
|
|
|
|
|
+ } else if (imageUrl.startsWith("data:image/bmp")) {
|
|
|
|
|
+ pictureType = org.apache.poi.xwpf.usermodel.Document.PICTURE_TYPE_BMP;
|
|
|
|
|
+ extension = "bmp";
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 将图片添加到Word文档
|
|
|
|
|
+ run.addPicture(new ByteArrayInputStream(imageBytes),
|
|
|
|
|
+ pictureType,
|
|
|
|
|
+ "image." + extension,
|
|
|
|
|
+ Units.toEMU(400), // 宽度
|
|
|
|
|
+ Units.toEMU(300)); // 高度
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("处理图片时发生错误: ", e);
|
|
|
|
|
+ run.setText("[图片]");
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ run.setText("[图片: " + imageUrl + "]");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static void processHtmlContent(String htmlContent, XWPFDocument document, XWPFParagraph paragraph) {
|
|
|
|
|
+ // 处理HTML表格,包括带有colspan属性的情况
|
|
|
|
|
+ if (htmlContent.startsWith("<table>")) {
|
|
|
|
|
+ if (paragraph != null) {
|
|
|
|
|
+ paragraph.createRun().setText("[表格内容]");
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ XWPFTable table = document.createTable();
|
|
|
|
|
+ // 使用正则表达式或HTML解析器来更准确地解析HTML表格
|
|
|
|
|
+ // 这里简化处理,假设表格结构相对简单且colspan值已知或可推断
|
|
|
|
|
+ String[] rows = htmlContent.split("<tr>|</tr>");
|
|
|
|
|
+ boolean firstRow = true;
|
|
|
|
|
+ int maxColumns = 0; // 用于记录最大列数,以处理colspan
|
|
|
|
|
+
|
|
|
|
|
+ // 第一遍遍历确定最大列数
|
|
|
|
|
+ for (String row : rows) {
|
|
|
|
|
+ if (row.trim().isEmpty() || !row.contains("<td")) continue;
|
|
|
|
|
+ String[] cells = row.split("<td[^>]*>|</td>"); // 匹配<td>及其属性(如colspan)和</td>
|
|
|
|
|
+ int columnCount = 0;
|
|
|
|
|
+ for (String cell : cells) {
|
|
|
|
|
+ if (cell.trim().isEmpty() || cell.trim().equalsIgnoreCase("<td>") || cell.trim().equalsIgnoreCase("</td>")) continue;
|
|
|
|
|
+ // 检查是否有colspan属性
|
|
|
|
|
+ if (cell.contains("colspan=")) {
|
|
|
|
|
+ Pattern pattern = Pattern.compile("colspan\\s*=\\s*[\"']?(\\d+)[\"']?");
|
|
|
|
|
+ Matcher matcher = pattern.matcher(cell);
|
|
|
|
|
+ if (matcher.find()) {
|
|
|
|
|
+ columnCount += Integer.parseInt(matcher.group(1)) - 1; // 减去1,因为下面的循环会加1
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ columnCount++;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (columnCount > maxColumns) {
|
|
|
|
|
+ maxColumns = columnCount;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 第二遍遍历创建表格行和单元格
|
|
|
|
|
+ int currentColumn = 0;
|
|
|
|
|
+ for (String row : rows) {
|
|
|
|
|
+ if (row.trim().isEmpty() || !row.contains("<td")) continue;
|
|
|
|
|
+ XWPFTableRow tableRow = firstRow ? table.getRow(0) : table.createRow();
|
|
|
|
|
+ if (firstRow && tableRow.getTableCells().size() < maxColumns) {
|
|
|
|
|
+ // 如果第一行单元格数不足,则补充
|
|
|
|
|
+ for (int i = tableRow.getTableCells().size(); i < maxColumns; i++) {
|
|
|
|
|
+ tableRow.addNewTableCell();
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (!firstRow && tableRow.getTableCells().isEmpty()) {
|
|
|
|
|
+ // 对于非第一行,如果单元格为空,则根据maxColumns创建
|
|
|
|
|
+ for (int i = 0; i < maxColumns; i++) {
|
|
|
|
|
+ tableRow.createCell();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ String[] cells = row.split("<td[^>]*>|</td>"); // 重新分割以处理单元格内容
|
|
|
|
|
+ currentColumn = 0;
|
|
|
|
|
+ for (String cell : cells) {
|
|
|
|
|
+ if (cell.trim().isEmpty() || cell.trim().equalsIgnoreCase("<td>") || cell.trim().equalsIgnoreCase("</td>")) continue;
|
|
|
|
|
+ int colspan = 1; // 默认colspan为1
|
|
|
|
|
+ if (cell.contains("colspan=")) {
|
|
|
|
|
+ Pattern pattern = Pattern.compile("colspan\\s*=\\s*[\"']?(\\d+)[\"']?");
|
|
|
|
|
+ Matcher matcher = pattern.matcher(cell);
|
|
|
|
|
+ if (matcher.find()) {
|
|
|
|
|
+ colspan = Integer.parseInt(matcher.group(1));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ // 确保不超出当前行的单元格范围(考虑colspan)
|
|
|
|
|
+ if (currentColumn < tableRow.getTableCells().size()) {
|
|
|
|
|
+ XWPFTableCell xwpfTableCell = currentColumn < tableRow.getTableCells().size() ?
|
|
|
|
|
+ tableRow.getTableCells().get(currentColumn) : tableRow.createCell();
|
|
|
|
|
+ // 处理colspan,可能需要合并单元格(这里简化处理,不实际合并,只是跳过后续单元格)
|
|
|
|
|
+ processHtmlCellContent(cell.replaceAll("<[^>]*>", ""), xwpfTableCell); // 移除HTML标签后处理内容
|
|
|
|
|
+ currentColumn += colspan; // 跳过被colspan覆盖的单元格
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // 如果超出范围,则忽略(或根据需要处理)
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ firstRow = false;
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // 处理非表格HTML内容
|
|
|
|
|
+ if (paragraph == null) {
|
|
|
|
|
+ paragraph = document.createParagraph();
|
|
|
|
|
+ }
|
|
|
|
|
+ processHtmlInlineContent(htmlContent, paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static void processHtmlCellContent(String cellContent, XWPFTableCell cell) {
|
|
|
|
|
+ XWPFParagraph paragraph = cell.getParagraphs().isEmpty() ?
|
|
|
|
|
+ cell.addParagraph() : cell.getParagraphs().get(0);
|
|
|
|
|
+ processHtmlInlineContent(cellContent, paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static void processHtmlInlineContent(String htmlContent, XWPFParagraph paragraph) {
|
|
|
|
|
+ if (htmlContent.contains("<img") && htmlContent.contains("data:image")) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ Pattern pattern = Pattern.compile("src\\s*=\\s*[\"']?(data:image[^\"'>]+)[\"']?");
|
|
|
|
|
+ Matcher matcher = pattern.matcher(htmlContent);
|
|
|
|
|
+ if (matcher.find()) {
|
|
|
|
|
+ String imageUrl = matcher.group(1);
|
|
|
|
|
+ if (imageUrl.startsWith("data:image")) {
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ String base64Data = imageUrl.substring(imageUrl.indexOf(",") + 1);
|
|
|
|
|
+ byte[] imageBytes = Base64.decode(base64Data);
|
|
|
|
|
+ int pictureType = Document.PICTURE_TYPE_PNG;
|
|
|
|
|
+ String extension = "png";
|
|
|
|
|
+ if (imageUrl.startsWith("data:image/jpeg") || imageUrl.startsWith("data:image/jpg")) {
|
|
|
|
|
+ pictureType = Document.PICTURE_TYPE_JPEG;
|
|
|
|
|
+ extension = "jpg";
|
|
|
|
|
+ } else if (imageUrl.startsWith("data:image/gif")) {
|
|
|
|
|
+ pictureType = Document.PICTURE_TYPE_GIF;
|
|
|
|
|
+ extension = "gif";
|
|
|
|
|
+ } else if (imageUrl.startsWith("data:image/bmp")) {
|
|
|
|
|
+ pictureType = Document.PICTURE_TYPE_BMP;
|
|
|
|
|
+ extension = "bmp";
|
|
|
|
|
+ }
|
|
|
|
|
+ run.addPicture(new ByteArrayInputStream(imageBytes),
|
|
|
|
|
+ pictureType,
|
|
|
|
|
+ "image." + extension,
|
|
|
|
|
+ Units.toEMU(400),
|
|
|
|
|
+ Units.toEMU(300));
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ paragraph.createRun().setText("[图片处理错误]");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ // 如果不是图片或者处理失败,按普通文本处理(移除HTML标签)
|
|
|
|
|
+ String textContent = htmlContent.replaceAll("<[^>]*>", "");
|
|
|
|
|
+ paragraph.createRun().setText(textContent);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static File convertHtmlToPdf(String htmlContent, String outputPdfPath) throws IOException {
|
|
|
|
|
+ try (OutputStream outputStream = new FileOutputStream(outputPdfPath)) {
|
|
|
|
|
+ HtmlConverter.convertToPdf(htmlContent, outputStream);
|
|
|
|
|
+ }
|
|
|
|
|
+ return new File(outputPdfPath);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private static File convertHtmlToDocx(String htmlContent, String outputPdfPath) throws IOException {
|
|
|
|
|
+ XWPFDocument document = new XWPFDocument();
|
|
|
|
|
+ XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
|
+ XWPFRun run = paragraph.createRun();
|
|
|
|
|
+ run.setText(htmlContent);
|
|
|
|
|
+ FileOutputStream out = new FileOutputStream(outputPdfPath);
|
|
|
|
|
+ document.write(out);
|
|
|
|
|
+ out.close();
|
|
|
|
|
+ document.close();
|
|
|
|
|
+ return new File(outputPdfPath);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ //提交翻译任务
|
|
|
|
|
+ public String translate(TranslateFileDTO fileDTO) throws IOException {
|
|
|
|
|
+ String param = new Gson().toJson(fileDTO);
|
|
|
|
|
+ RequestBody requestBody = RequestBody.create(MediaType.parse("application/json"), param);
|
|
|
|
|
+ OkHttpClient okHttpClient = new OkHttpClient.Builder()
|
|
|
|
|
+ .connectTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .writeTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ Request request = new Request.Builder()
|
|
|
|
|
+ .url(DOCUTRANSLATE_URL + "/service/translate")
|
|
|
|
|
+ .post(requestBody)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ return Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ //获取翻译状态
|
|
|
|
|
+ public String getTranslateStatus(String taskId) throws IOException {
|
|
|
|
|
+ OkHttpClient okHttpClient = new OkHttpClient.Builder()
|
|
|
|
|
+ .connectTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .writeTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ Request request = new Request.Builder()
|
|
|
|
|
+ .url(DOCUTRANSLATE_URL + "/service/status/" + taskId)
|
|
|
|
|
+ .get()
|
|
|
|
|
+ .build();
|
|
|
|
|
+ return Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ //获取翻译任务日志
|
|
|
|
|
+ public String getTranslateLogs(String taskId) throws IOException {
|
|
|
|
|
+ OkHttpClient okHttpClient = new OkHttpClient.Builder()
|
|
|
|
|
+ .connectTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .writeTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ Request request = new Request.Builder()
|
|
|
|
|
+ .url(DOCUTRANSLATE_URL + "/service/logs/" + taskId)
|
|
|
|
|
+ .get()
|
|
|
|
|
+ .build();
|
|
|
|
|
+ return Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ //翻译文件下载
|
|
|
|
|
+ public String translateFileDownload(String taskId, String type) throws IOException {
|
|
|
|
|
+ OkHttpClient okHttpClient = new OkHttpClient.Builder()
|
|
|
|
|
+ .connectTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .writeTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ Request request = new Request.Builder()
|
|
|
|
|
+ .url(DOCUTRANSLATE_URL + "/service/download/" + taskId + "/" + type)
|
|
|
|
|
+ .get()
|
|
|
|
|
+ .build();
|
|
|
|
|
+ return Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ //释放文件资源
|
|
|
|
|
+ public String release(String taskId) throws IOException {
|
|
|
|
|
+ RequestBody requestBody = RequestBody.create(MediaType.parse("application/json"), "");
|
|
|
|
|
+ OkHttpClient okHttpClient = new OkHttpClient.Builder()
|
|
|
|
|
+ .connectTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .writeTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ Request request = new Request.Builder()
|
|
|
|
|
+ .url(DOCUTRANSLATE_URL + "/service/release/" + taskId)
|
|
|
|
|
+ .post(requestBody)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ return Objects.requireNonNull(okHttpClient.newCall(request).execute().body()).string();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 删除固定的image标签
|
|
|
|
|
+ */
|
|
|
|
|
+ private String removeFixedImages(String markdownContent) {
|
|
|
|
|
+// markdownContent = markdownContent.replaceAll("!\\[.*?\\]\\(data:image/[^)]+\\)", "");
|
|
|
|
|
+ // 使用正则表达式匹配并删除特定的base64图片标签
|
|
|
|
|
+// markdownContent = markdownContent.replaceAll("!\\[Image\\]\\(data:image/png;base64,iVBORw0KGgoAAA[a-zA-Z0-9+/=]*\\)", "");
|
|
|
|
|
+
|
|
|
|
|
+ // 使用正则表达式匹配并删除特定的base64图片标签
|
|
|
|
|
+ markdownContent = markdownContent.replaceAll("!\\[Image\\]\\(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA[a-zA-Z0-9+/=]*\\)", "");
|
|
|
|
|
+
|
|
|
|
|
+ // 清理多余的空行
|
|
|
|
|
+// markdownContent = markdownContent.replaceAll("\n\\s*\n\\s*", "\n\n");
|
|
|
|
|
+
|
|
|
|
|
+ return markdownContent;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public TranslateFilePayLoadDTO load() {
|
|
|
|
|
+ TranslateFilePayLoadDTO loadDTO = new TranslateFilePayLoadDTO();
|
|
|
|
|
+ loadDTO.setApi_key(API_KEY);
|
|
|
|
|
+ loadDTO.setBase_url(BASE_URL);
|
|
|
|
|
+ loadDTO.setChunk_size(CHUNK_SIZE);
|
|
|
|
|
+ loadDTO.setCode_ocr(true);
|
|
|
|
|
+ loadDTO.setConcurrent(CONCURRENT);
|
|
|
|
|
+ loadDTO.setConvert_engine(CONVERT_ENGINE);
|
|
|
|
|
+ loadDTO.setCustom_prompt(CUSTOM_PROMPT);
|
|
|
|
|
+ loadDTO.setForce_json(false);
|
|
|
|
|
+ loadDTO.setFormula_ocr(true);
|
|
|
|
|
+ loadDTO.setGlossary_generate_enable(false);
|
|
|
|
|
+ loadDTO.setMineru_deploy_backend("pipeline");
|
|
|
|
|
+ loadDTO.setMineru_deploy_base_url("http://127.0.0.1:8000");
|
|
|
|
|
+ loadDTO.setMineru_deploy_end_page_id(99999L);
|
|
|
|
|
+ loadDTO.setMineru_deploy_formula_enable(true);
|
|
|
|
|
+ loadDTO.setMineru_deploy_start_page_id(0L);
|
|
|
|
|
+ loadDTO.setMineru_token(MINERU_TOKEN);
|
|
|
|
|
+ loadDTO.setModel_version("vlm");
|
|
|
|
|
+ loadDTO.setModel_id(MODEL_ID);
|
|
|
|
|
+ loadDTO.setRetry(RETRY_COUNT);
|
|
|
|
|
+ loadDTO.setSkip_translate(false);
|
|
|
|
|
+ loadDTO.setSystem_proxy_enable(false);
|
|
|
|
|
+ loadDTO.setTemperature(TEMPERATURE);
|
|
|
|
|
+ loadDTO.setThinking(THINKING_MODE);
|
|
|
|
|
+ loadDTO.setTo_lang(TARGET_LANGUAGE);
|
|
|
|
|
+ loadDTO.setWorkflow_type("markdown_based");
|
|
|
|
|
+ return loadDTO;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public void fileTranslate2(File file) throws Exception {
|
|
|
|
|
+ TranslateFileDTO fileDTO = new TranslateFileDTO();
|
|
|
|
|
+ String fileName = file.getName();
|
|
|
|
|
+ fileDTO.setFile_name(fileName);
|
|
|
|
|
+ fileDTO.setFile_content(Base64.encode(file));
|
|
|
|
|
+ TranslateFilePayLoadDTO loadDTO = new TranslateFilePayLoadDTO();
|
|
|
|
|
+ loadDTO.setApi_key(API_KEY);
|
|
|
|
|
+ loadDTO.setBase_url(BASE_URL);
|
|
|
|
|
+ loadDTO.setChunk_size(CHUNK_SIZE);
|
|
|
|
|
+ loadDTO.setCode_ocr(true);
|
|
|
|
|
+ loadDTO.setConcurrent(CONCURRENT);
|
|
|
|
|
+ loadDTO.setConvert_engine(CONVERT_ENGINE);
|
|
|
|
|
+ loadDTO.setCustom_prompt(CUSTOM_PROMPT);
|
|
|
|
|
+ loadDTO.setForce_json(false);
|
|
|
|
|
+ loadDTO.setFormula_ocr(true);
|
|
|
|
|
+ loadDTO.setGlossary_generate_enable(false);
|
|
|
|
|
+ loadDTO.setMineru_deploy_backend("pipeline");
|
|
|
|
|
+ loadDTO.setMineru_deploy_base_url("http://127.0.0.1:8000");
|
|
|
|
|
+ loadDTO.setMineru_deploy_end_page_id(99999L);
|
|
|
|
|
+ loadDTO.setMineru_deploy_formula_enable(true);
|
|
|
|
|
+ loadDTO.setMineru_deploy_start_page_id(0L);
|
|
|
|
|
+ loadDTO.setMineru_token(MINERU_TOKEN);
|
|
|
|
|
+ loadDTO.setModel_version("vlm");
|
|
|
|
|
+ loadDTO.setModel_id(MODEL_ID);
|
|
|
|
|
+ loadDTO.setRetry(RETRY_COUNT);
|
|
|
|
|
+ loadDTO.setSkip_translate(false);
|
|
|
|
|
+ loadDTO.setSystem_proxy_enable(false);
|
|
|
|
|
+ loadDTO.setTemperature(TEMPERATURE);
|
|
|
|
|
+ loadDTO.setThinking(THINKING_MODE);
|
|
|
|
|
+ loadDTO.setTo_lang(TARGET_LANGUAGE);
|
|
|
|
|
+ loadDTO.setWorkflow_type("markdown_based");
|
|
|
|
|
+ fileDTO.setPayload(loadDTO);
|
|
|
|
|
+ String str = this.translate(fileDTO);
|
|
|
|
|
+ TranslateFileVO translateFileVO = JSONObject.parseObject(str, TranslateFileVO.class);
|
|
|
|
|
+ boolean status = false;
|
|
|
|
|
+ String taskId = "";
|
|
|
|
|
+ if (ObjectUtils.isNotEmpty(translateFileVO) && StringUtils.isNotEmpty(translateFileVO.getTask_id())) {
|
|
|
|
|
+ taskId = translateFileVO.getTask_id();
|
|
|
|
|
+ while (true) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ String res = this.getTranslateStatus(taskId);
|
|
|
|
|
+ JSONObject jsonObject = JSONObject.parseObject(res);
|
|
|
|
|
+ Boolean bool = jsonObject.getBoolean("download_ready");
|
|
|
|
|
+ if (bool) {
|
|
|
|
|
+ status = true;
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ Thread.sleep(2000);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (status && StringUtils.isNotEmpty(taskId)) {
|
|
|
|
|
+ String outputPdfPath = "output.docx";
|
|
|
|
|
+ String result = translateFileDownload(taskId, "markdown");
|
|
|
|
|
+ System.out.println("--------------");
|
|
|
|
|
+
|
|
|
|
|
+ MutableDataSet options = new MutableDataSet();
|
|
|
|
|
+ Parser parser = Parser.builder(options).build();
|
|
|
|
|
+ Node document = parser.parse(result);
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 创建 Word 文档
|
|
|
|
|
+ XWPFDocument wordDocument = new XWPFDocument();
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 遍历 Markdown AST (抽象语法树) 并转换为 Word 内容
|
|
|
|
|
+ document.getChildren().forEach(node -> processNode(node, wordDocument));
|
|
|
|
|
+ // 4. 保存到临时文件
|
|
|
|
|
+ try (FileOutputStream out = new FileOutputStream(outputPdfPath)) {
|
|
|
|
|
+ wordDocument.write(out);
|
|
|
|
|
+ }
|
|
|
|
|
+ wordDocument.close();
|
|
|
|
|
+ File pdfFile = new File(outputPdfPath);
|
|
|
|
|
+ System.out.println(pdfFile.getAbsolutePath());
|
|
|
|
|
+ file.deleteOnExit();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 预处理HTML中的图片标签,为图片添加适当的宽度属性以确保自适应
|
|
|
|
|
+ */
|
|
|
|
|
+ private String preprocessImages(String htmlContent) {
|
|
|
|
|
+ // 使用正则表达式查找所有<img>标签并添加宽度属性
|
|
|
|
|
+ // 设置图片最大宽度为页面宽度的90%,保持宽高比
|
|
|
|
|
+ String processedContent = htmlContent.replaceAll(
|
|
|
|
|
+ "<img([^>]*?)src=\"([^\"]*?)\"([^>]*?)>",
|
|
|
|
|
+ "<img$1src=\"$2\" width=\"100%\" style=\"max-width: 100%; height: auto; display: block; margin: 0 auto;\"$3>"
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ return processedContent;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 为HTML内容添加CSS样式,使图片自适应页面大小并居中,保持原始宽高比
|
|
|
|
|
+ */
|
|
|
|
|
+ private String addImageStyles(String htmlContent) {
|
|
|
|
|
+ // 添加CSS样式到HTML头部,确保图片按原始宽高比缩放以适应页面
|
|
|
|
|
+ String cssStyles = "<style>\n" +
|
|
|
|
|
+ "img {\n" +
|
|
|
|
|
+ " max-width: 100% !important;\n" +
|
|
|
|
|
+ " height: auto !important;\n" +
|
|
|
|
|
+ " display: block !important;\n" +
|
|
|
|
|
+ " margin-left: auto !important;\n" +
|
|
|
|
|
+ " margin-right: auto !important;\n" +
|
|
|
|
|
+ " object-fit: contain;\n" + // 保持宽高比
|
|
|
|
|
+ " page-break-inside: avoid;\n" + // 避免图片跨页分割
|
|
|
|
|
+ "}\n" +
|
|
|
|
|
+ // 添加页面大小设置,确保内容适应页面
|
|
|
|
|
+ "@page {\n" +
|
|
|
|
|
+ " size: A4;\n" +
|
|
|
|
|
+ " margin: 1cm;\n" + // 减小页边距以增加可用空间
|
|
|
|
|
+ "}\n" +
|
|
|
|
|
+ "body {\n" +
|
|
|
|
|
+ " margin: 0;\n" +
|
|
|
|
|
+ " padding: 0.5cm;\n" +
|
|
|
|
|
+ "}\n" +
|
|
|
|
|
+ "</style>\n";
|
|
|
|
|
+
|
|
|
|
|
+ // 如果HTML已有<head>标签,则在其中添加样式
|
|
|
|
|
+ if (htmlContent.contains("<head>")) {
|
|
|
|
|
+ htmlContent = htmlContent.replace("<head>", "<head>\n" + cssStyles);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // 否则在<html>标签后添加<head>和样式
|
|
|
|
|
+ htmlContent = htmlContent.replace("<html>", "<html>\n<head>\n" + cssStyles + "</head>");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return htmlContent;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public FileTranslateVO fileTranslate(File file, Integer type) throws IOException {
|
|
|
|
|
+ String fileName = file.getName();
|
|
|
|
|
+ if (!fileName.endsWith(".pdf")) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "请上传pdf文件");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ List<String> guids = null;
|
|
|
|
|
+ try {
|
|
|
|
|
+ guids = fileManagerService.uploadFileGetGuid2(Collections.singletonList(file));
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "上传pdf文件失败");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (CollectionUtils.isEmpty(guids)) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "上传pdf文件失败");
|
|
|
|
|
+ }
|
|
|
|
|
+ String guid = guids.get(0);
|
|
|
|
|
+
|
|
|
|
|
+ TranslateFileDTO fileDTO = new TranslateFileDTO();
|
|
|
|
|
+ fileDTO.setFile_name(fileName);
|
|
|
|
|
+ fileDTO.setFile_content(Base64.encode(file));
|
|
|
|
|
+
|
|
|
|
|
+ TranslateFilePayLoadDTO loadDTO = this.load();
|
|
|
|
|
+ fileDTO.setPayload(loadDTO);
|
|
|
|
|
+ FileTranslateVO translateVO = new FileTranslateVO();
|
|
|
|
|
+ try {
|
|
|
|
|
+ String str = this.translate(fileDTO);
|
|
|
|
|
+ TranslateFileVO translateFileVO = JSONObject.parseObject(str, TranslateFileVO.class);
|
|
|
|
|
+
|
|
|
|
|
+ FileInfoDTO infoDTO = new FileInfoDTO();
|
|
|
|
|
+ infoDTO.setType(type);
|
|
|
|
|
+ infoDTO.setStatus(1);
|
|
|
|
|
+ infoDTO.setOldFileName(fileName);
|
|
|
|
|
+ infoDTO.setOldFileGuid(guid);
|
|
|
|
|
+ if (ObjectUtils.isNotEmpty(translateFileVO) && StringUtils.isNotEmpty(translateFileVO.getTask_id())) {
|
|
|
|
|
+ String taskId = translateFileVO.getTask_id();
|
|
|
|
|
+ infoDTO.setTaskId(taskId);
|
|
|
|
|
+ Integer fileInfoId = fileInfoService.addOrEditFileInfo(infoDTO);
|
|
|
|
|
+ translateVO.setFileInfoId(fileInfoId);
|
|
|
|
|
+ translateVO.setTaskId(taskId);
|
|
|
|
|
+ translateVO.setOldFileName(fileName);
|
|
|
|
|
+ translateVO.setOldFileGuid(guid);
|
|
|
|
|
+ translateVO.setType(type);
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "请求翻译服务失败");
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ file.delete();
|
|
|
|
|
+ }
|
|
|
|
|
+ return translateVO;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public SelectTranslatePlanVO selectTranslatePlan(TaskIdDTO vo) throws IOException {
|
|
|
|
|
+ String taskId = vo.getTaskId();
|
|
|
|
|
+ SelectTranslatePlanVO planVO = new SelectTranslatePlanVO();
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ // 获取日志和状态
|
|
|
|
|
+ String translateLogs = this.getTranslateLogs(taskId);
|
|
|
|
|
+ String translateStatus = this.getTranslateStatus(taskId);
|
|
|
|
|
+
|
|
|
|
|
+ // 处理日志
|
|
|
|
|
+ JSONObject logsObject = JSONObject.parseObject(translateLogs);
|
|
|
|
|
+ List<String> logs = JSON.parseArray(logsObject.getJSONArray("logs").toJSONString(), String.class);
|
|
|
|
|
+ SelectTranslateLogsVO logsVO = new SelectTranslateLogsVO();
|
|
|
|
|
+ logsVO.setLogs(logs);
|
|
|
|
|
+ planVO.setTranslateLogsVO(logsVO);
|
|
|
|
|
+
|
|
|
|
|
+ // 处理状态
|
|
|
|
|
+ SelectTranslateStatusVO statusVO = JSONObject.parseObject(translateStatus, SelectTranslateStatusVO.class);
|
|
|
|
|
+ planVO.setTranslateStatusVO(statusVO);
|
|
|
|
|
+
|
|
|
|
|
+ // 更新状态逻辑
|
|
|
|
|
+ if (statusVO != null && Boolean.FALSE.equals(statusVO.getIs_processing())) {
|
|
|
|
|
+ if (Boolean.TRUE.equals(statusVO.getDownload_ready())) {
|
|
|
|
|
+ updateStatus(taskId, 2);
|
|
|
|
|
+ } else if (Boolean.TRUE.equals(statusVO.getError_flag())) {
|
|
|
|
|
+ updateStatus(taskId, 4);
|
|
|
|
|
+ this.release(taskId);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ // 异常处理
|
|
|
|
|
+ updateStatus(taskId, 4);
|
|
|
|
|
+ this.release(taskId);
|
|
|
|
|
+ SelectTranslateStatusVO statusVO = new SelectTranslateStatusVO();
|
|
|
|
|
+ statusVO.setIs_processing(false);
|
|
|
|
|
+ planVO.setTranslateStatusVO(statusVO);
|
|
|
|
|
+ planVO.setTranslateLogsVO(new SelectTranslateLogsVO());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return planVO;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public void updateStatus(String taskId, Integer status) {
|
|
|
|
|
+ LambdaQueryWrapper<FileInfo> queryWrapper = new LambdaQueryWrapper<>();
|
|
|
|
|
+ queryWrapper.eq(FileInfo::getTaskId, taskId);
|
|
|
|
|
+ FileInfo fileInfo = fileInfoService.getOne(queryWrapper, false);
|
|
|
|
|
+ fileInfo.setStatus(status);
|
|
|
|
|
+ fileInfo.setId(fileInfo.getId());
|
|
|
|
|
+ fileInfo.updateById();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public String generateTranslateFile(TaskIdDTO vo) throws IOException {
|
|
|
|
|
+ String taskId = vo.getTaskId();
|
|
|
|
|
+ LambdaQueryWrapper<FileInfo> queryWrapper = new LambdaQueryWrapper<>();
|
|
|
|
|
+ queryWrapper.eq(FileInfo::getTaskId, taskId);
|
|
|
|
|
+ FileInfo fileInfo = fileInfoService.getOne(queryWrapper, false);
|
|
|
|
|
+ String oldFileName = fileInfo.getOldFileName();
|
|
|
|
|
+ String fileName = null;
|
|
|
|
|
+ if (oldFileName.contains(".")) {
|
|
|
|
|
+ fileName = oldFileName.substring(0, oldFileName.indexOf("."));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ fileName = oldFileName;
|
|
|
|
|
+ }
|
|
|
|
|
+ String outputPdfPath = fileName + "-译文.docx";
|
|
|
|
|
+ String result = translateFileDownload(taskId, "markdown");
|
|
|
|
|
+
|
|
|
|
|
+ MutableDataSet options = new MutableDataSet();
|
|
|
|
|
+ Parser parser = Parser.builder(options).build();
|
|
|
|
|
+ Node document = parser.parse(result);
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 创建 Word 文档
|
|
|
|
|
+ XWPFDocument wordDocument = new XWPFDocument();
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 遍历 Markdown AST (抽象语法树) 并转换为 Word 内容
|
|
|
|
|
+ document.getChildren().forEach(node -> processNode(node, wordDocument));
|
|
|
|
|
+ // 4. 保存到临时文件
|
|
|
|
|
+ try (FileOutputStream out = new FileOutputStream(outputPdfPath)) {
|
|
|
|
|
+ wordDocument.write(out);
|
|
|
|
|
+ }
|
|
|
|
|
+ wordDocument.close();
|
|
|
|
|
+ File pdfFile = new File(outputPdfPath);
|
|
|
|
|
+ List<String> guids = fileManagerService.uploadFileGetGuid2(Collections.singletonList(pdfFile));
|
|
|
|
|
+ if (CollectionUtils.isEmpty(guids)) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "翻译文件保存失败");
|
|
|
|
|
+ }
|
|
|
|
|
+ String guid = guids.get(0);
|
|
|
|
|
+
|
|
|
|
|
+ fileInfo.setNewFileGuid(guid);
|
|
|
|
|
+ fileInfo.setNewFileName(outputPdfPath);
|
|
|
|
|
+ fileInfo.setStatus(3);
|
|
|
|
|
+ fileInfo.setId(fileInfo.getId());
|
|
|
|
|
+ fileInfo.updateById();
|
|
|
|
|
+
|
|
|
|
|
+ pdfFile.delete();
|
|
|
|
|
+
|
|
|
|
|
+ this.release(taskId);
|
|
|
|
|
+
|
|
|
|
|
+ return guid;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public String generateTranslatePDFFile(TaskIdDTO vo) throws IOException {
|
|
|
|
|
+ String taskId = vo.getTaskId();
|
|
|
|
|
+ LambdaQueryWrapper<FileInfo> queryWrapper = new LambdaQueryWrapper<>();
|
|
|
|
|
+ queryWrapper.eq(FileInfo::getTaskId, taskId);
|
|
|
|
|
+ FileInfo fileInfo = fileInfoService.getOne(queryWrapper, false);
|
|
|
|
|
+ String oldFileName = fileInfo.getOldFileName();
|
|
|
|
|
+ String fileName = null;
|
|
|
|
|
+ if (oldFileName.contains(".")) {
|
|
|
|
|
+ fileName = oldFileName.substring(0, oldFileName.indexOf("."));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ fileName = oldFileName;
|
|
|
|
|
+ }
|
|
|
|
|
+ String outputPdfPath = fileName + "-译文.pdf";
|
|
|
|
|
+ String result = translateFileDownload(taskId, "markdown");
|
|
|
|
|
+
|
|
|
|
|
+ if (fileInfo.getType() == 3) {
|
|
|
|
|
+ // 1. 删除特定的image标签
|
|
|
|
|
+ result = removeFixedImages(result);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 解析Markdown并转换为HTML
|
|
|
|
|
+ Parser parser = Parser.builder().build();
|
|
|
|
|
+ HtmlRenderer renderer = HtmlRenderer.builder().build();
|
|
|
|
|
+ Node document = parser.parse(result);
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 转换为HTML并应用CSS样式使图片自适应和居中
|
|
|
|
|
+ String htmlContent = renderer.render(document);
|
|
|
|
|
+
|
|
|
|
|
+ // 4. 预处理HTML中的图片标签,添加宽度和高度属性以确保自适应
|
|
|
|
|
+ String processedHtmlContent = preprocessImages(htmlContent);
|
|
|
|
|
+
|
|
|
|
|
+ // 5. 添加CSS样式使图片自适应页面大小并居中,保持原始宽高比
|
|
|
|
|
+ String styledHtmlContent = addImageStyles(processedHtmlContent);
|
|
|
|
|
+
|
|
|
|
|
+ // 6. 将HTML转换为PDF
|
|
|
|
|
+ File pdfFile = convertHtmlToPdf(styledHtmlContent, outputPdfPath);
|
|
|
|
|
+ List<String> guids = fileManagerService.uploadFileGetGuid2(Collections.singletonList(pdfFile));
|
|
|
|
|
+ if (CollectionUtils.isEmpty(guids)) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "翻译文件保存失败");
|
|
|
|
|
+ }
|
|
|
|
|
+ String guid = guids.get(0);
|
|
|
|
|
+
|
|
|
|
|
+ fileInfo.setNewFileGuid(guid);
|
|
|
|
|
+ fileInfo.setNewFileName(outputPdfPath);
|
|
|
|
|
+ fileInfo.setStatus(3);
|
|
|
|
|
+ fileInfo.setId(fileInfo.getId());
|
|
|
|
|
+ fileInfo.updateById();
|
|
|
|
|
+
|
|
|
|
|
+ pdfFile.delete();
|
|
|
|
|
+
|
|
|
|
|
+ this.release(taskId);
|
|
|
|
|
+
|
|
|
|
|
+ return guid;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public FileTranslateVO againTranslate(FileInfoIdDTO infoIdDTO) throws IOException {
|
|
|
|
|
+ Integer fileInfoId = infoIdDTO.getFileInfoId();
|
|
|
|
|
+ FileInfo fileInfo = fileInfoService.getById(fileInfoId);
|
|
|
|
|
+ if (ObjectUtils.isEmpty(fileInfo)) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "当前任务不存在");
|
|
|
|
|
+ }
|
|
|
|
|
+ Integer retry = fileInfo.getRetry();
|
|
|
|
|
+ if (retry > 5) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "该文件无法翻译");
|
|
|
|
|
+ }
|
|
|
|
|
+ String fileName = fileInfo.getOldFileName();
|
|
|
|
|
+ String guid = fileInfo.getOldFileGuid();
|
|
|
|
|
+ File file = fileManagerService.getTempFileByGuid(guid);
|
|
|
|
|
+
|
|
|
|
|
+ TranslateFileDTO fileDTO = new TranslateFileDTO();
|
|
|
|
|
+ fileDTO.setFile_name(fileName);
|
|
|
|
|
+ fileDTO.setFile_content(Base64.encode(file));
|
|
|
|
|
+
|
|
|
|
|
+ TranslateFilePayLoadDTO loadDTO = this.load();
|
|
|
|
|
+ fileDTO.setPayload(loadDTO);
|
|
|
|
|
+ FileTranslateVO translateVO = new FileTranslateVO();
|
|
|
|
|
+ try {
|
|
|
|
|
+ String str = this.translate(fileDTO);
|
|
|
|
|
+ TranslateFileVO translateFileVO = JSONObject.parseObject(str, TranslateFileVO.class);
|
|
|
|
|
+
|
|
|
|
|
+ if (ObjectUtils.isNotEmpty(translateFileVO) && StringUtils.isNotEmpty(translateFileVO.getTask_id())) {
|
|
|
|
|
+ String taskId = translateFileVO.getTask_id();
|
|
|
|
|
+ fileInfo.setStatus(1);
|
|
|
|
|
+ fileInfo.setTaskId(taskId);
|
|
|
|
|
+ fileInfo.setRetry(retry + 1);
|
|
|
|
|
+ fileInfo.updateById();
|
|
|
|
|
+ translateVO.setFileInfoId(fileInfoId);
|
|
|
|
|
+ translateVO.setTaskId(taskId);
|
|
|
|
|
+ translateVO.setOldFileName(fileName);
|
|
|
|
|
+ translateVO.setOldFileGuid(guid);
|
|
|
|
|
+ translateVO.setType(fileInfo.getType());
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ throw new XiaoShiException(ExceptionEnum.BUSINESS_ERROR, "请求翻译失败");
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ file.delete();
|
|
|
|
|
+ }
|
|
|
|
|
+ return translateVO;
|
|
|
|
|
+ }
|
|
|
|
|
+}
|