java pdf文件转图片并进行ocr识别保存文字

【代码】java pdf文件转图片并进行ocr识别保存文字。

xiaoerbuyu1233

196人浏览 · 2025-06-07 22:00:57

xiaoerbuyu1233 · 2025-06-07 22:00:57 发布

package org.me.swing;


import com.baidu.aip.ocr.AipOcr;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.json.JSONArray;
import org.json.JSONObject;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;

//        <dependency>
//            <groupId>com.baidu.aip</groupId>
//            <artifactId>java-sdk</artifactId>
//            <version>4.8.0</version>
//        </dependency>
//        <dependency>
//            <groupId>org.apache.pdfbox</groupId>
//            <artifactId>pdfbox</artifactId>
//            <version>3.0.1</version>
//        </dependency>

public class Pdf2ImgOcr {
    // 百度OCR客户端
    private static AipOcr client;

    // OCR配置参数
    private static final HashMap<String, String> OCR_OPTIONS = new HashMap<>() {{
        put("language_type", "CHN_ENG");
        put("detect_direction", "true");
        put("detect_language", "true");
        put("probability", "true");
    }};

    // 初始化OCR客户端
    static {
        String appId = "你的 App ID";
        String apiKey = "你的 Api Key";
        String secretKey = "你的 Secret Key";
        client = new AipOcr(appId, apiKey, secretKey);
        // 可选：设置网络参数
        client.setConnectionTimeoutInMillis(60000);
        client.setSocketTimeoutInMillis(60000);
    }

    public static void main(String[] args) {

        String filePath = "D:\\03tmp\\zz权力操作要论（扫描）.pdf";
        try {
            processPdfFile(filePath);
        } catch (IOException e) {
            System.err.println("处理PDF文件时发生错误: " + e.getMessage());
            e.printStackTrace();
            System.exit(1);
        }
    }

    private static void processPdfFile(String filePath) throws IOException {
        File file = new File(filePath);
        if (!file.exists()) {
            throw new IOException("文件不存在: " + filePath);
        }

        String outputTextPath = file.getParent() + "/" + getBaseName(file.getName()) + ".txt";
        String imageOutputDir = file.getParent() + "/images/";

        // 创建图片输出目录
        new File(imageOutputDir).mkdirs();

        try (PDDocument doc = Loader.loadPDF(file);
             BufferedWriter writer = new BufferedWriter(new FileWriter(outputTextPath))) {

            PDFRenderer renderer = new PDFRenderer(doc);
            int totalPages = doc.getNumberOfPages();

            System.out.println("开始处理PDF文件: " + file.getName());
            System.out.println("总页数: " + totalPages);
            System.out.println("输出文本文件: " + outputTextPath);

            for (int i = 0; i < totalPages; i++) {
                System.out.printf("正在处理第 %d/%d 页...\n", i + 1, totalPages);

                // 渲染PDF页面为图片
                BufferedImage image = renderer.renderImageWithDPI(i, 300);
                Path imagePath = Paths.get(imageOutputDir + String.format("page_%03d.jpg", i + 1));
                ImageIO.write(image, "jpg", Files.newOutputStream(imagePath));

//                // 执行OCR识别
//                String text = imgOcr(imagePath.toAbsolutePath().toString());
//
//                // 写入识别结果
//                writer.write(String.format("\n=== 第 %d 页 ===\n", i + 1));
//                writer.write(text);
//                writer.flush();
            }

            System.out.println("PDF处理完成!");
        }
    }

    private static String imgOcr(String imgPath) {
        try {
            // 传入可选参数调用接口
            JSONObject res = client.basicGeneral(imgPath, OCR_OPTIONS);
            if (!res.has("words_result")) {
                System.err.println("OCR识别失败: " + res.toString());
                return "";
            }

            JSONArray wordsResult = res.getJSONArray("words_result");
            StringBuilder sb = new StringBuilder();

            for (int i = 0; i < wordsResult.length(); i++) {
                JSONObject jo = wordsResult.getJSONObject(i);
                sb.append(jo.getString("words")).append("\n");
            }

            return sb.toString();
        } catch (Exception e) {
            System.err.println("OCR处理图片时发生错误: " + imgPath);
            e.printStackTrace();
            return "";
        }
    }

    private static String getBaseName(String fileName) {
        int dotIndex = fileName.lastIndexOf('.');
        return (dotIndex == -1) ? fileName : fileName.substring(0, dotIndex);
    }
}