admin管理员组文章数量:1605135
java使用Apache PDFBox+POI实现PDF转Word
注:仅限简单转换,pdf中包含表格等复杂结构无法保留
<!-- PDFBox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.29</version> <!-- 使用最新版本 -->
</dependency>
<!-- Apache POI -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.0.0</version>
</dependency>
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
public class PdfToWord {
public static void main(String[] args) throws Exception {
convertPdfToWord("D:\\test.pdf", "D:\\test.docx"); //pdf文件路径和输出的word文件路径
}
public static void convertPdfToWord(String pdfFilePath, String wordFilePath) throws IOException {
PDDocument document = PDDocument.load(Files.newInputStream(Paths.get(pdfFilePath)));
XWPFDocument wordDocument = new XWPFDocument();
PDFTextStripper pdfTextStripper = new PDFTextStripper();
int totalPages = document.getNumberOfPages();
for (int pageIndex = 0; pageIndex < totalPages; pageIndex++) {
pdfTextStripper.setStartPage(pageIndex + 1);
pdfTextStripper.setEndPage(pageIndex + 1);
String text = pdfTextStripper.getText(document);
addParagraphToWord(wordDocument, text);
}
document.close();
try (FileOutputStream out = new FileOutputStream(wordFilePath)) {
wordDocument.write(out);
}
wordDocument.close();
}
private static void addParagraphToWord(XWPFDocument document, String text) {
XWPFParagraph paragraph = document.createParagraph();
XWPFRun run = paragraph.createRun();
run.setText(text);
}
}
版权声明:本文标题:java使用Apache PDFBox+POI实现PDF转Word 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://m.elefans.com/dianzi/1728478526a1159873.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论