百度的OCR识别,发票功能还是比较简单,直接看API就行。

每月有免费试用次数,

package com.hzsmk.ocr.service;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Base64;
import org.springframework.stereotype.Service;

import com.hzsmk.common.exception.BusinessException;
import com.hzsmk.common.util.RJson;
import com.hzsmk.common.util.RString;

/**
 * @author luoyang
 * @time 20220302
 */
@Service
public class BaiduOcrService implements Serializable {

    private static final long serialVersionUID = 1L;
    
   //百度开放平台应用申请一个KEY
    private String baiduApiKey = "11111";
    
    private String baiduSecretKey = "11111111";
    
    //
    private String vat_url  = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice";
    
    
    private  String  token_url = "https://aip.baidubce.com/oauth/2.0/token";
    
    private static final String image_type = "image";
    
    private static final String pdf_type = "pdf_file";
    
    //先获取token
    private String getBaiduToken() throws Exception {
    	String url = token_url+"?grant_type=client_credentials&client_id="+baiduApiKey+"&client_secret="+baiduSecretKey;
    	String tokenJson = HttpUtil.get(url);
    	Map<String, Object> tokenMap = RJson.parseJson2Map(tokenJson);
    	if(null == tokenMap) {
    		throw new BusinessException("百度token获取失败");
    	}
    	return RString.toString(tokenMap.get("access_token"));
    }
    
    //进行识别
    public String  handVat(String  path) throws Exception {
    	String token = getBaiduToken();
    	String fileType = path.substring(path.lastIndexOf(".") + 1);
    	String checkType = "";
    	List<String> imagetypes = Arrays.asList("jpg","jpeg","png","bmp");// ;
    	if(fileType.equals("pdf")) {
    		checkType = pdf_type;
    	}else if(imagetypes.contains(fileType)){
    		checkType =image_type;
    	}else {
    		throw new BusinessException("仅支持pdf/jpg/jpeg/png/bmp格式!");
    	}
    	
        String imgStr = Base64.encodeBase64String(readFileByBytes(path));
        String bas = "";
		try {
			bas = URLEncoder.encode(imgStr, "UTF-8");
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
    	String jsons =HttpUtil.post(vat_url,token,checkType+"="+bas);
    	System.out.println(jsons);
    	return jsons;
    }
    
    
    /**
     * 根据文件路径读取byte[] 数组
     */
    public static byte[] readFileByBytes(String filePath) throws IOException {
        File file = new File(filePath);
        if (!file.exists()) {
            throw new FileNotFoundException(filePath);
        } else {
            ByteArrayOutputStream bos = new ByteArrayOutputStream((int) file.length());
            BufferedInputStream in = null;

            try {
                in = new BufferedInputStream(new FileInputStream(file));
                short bufSize = 1024;
                byte[] buffer = new byte[bufSize];
                int len1;
                while (-1 != (len1 = in.read(buffer, 0, bufSize))) {
                    bos.write(buffer, 0, len1);
                }

                byte[] var7 = bos.toByteArray();
                return var7;
            } finally {
                try {
                    if (in != null) {
                        in.close();
                    }
                } catch (IOException var14) {
                    var14.printStackTrace();
                }

                bos.close();
            }
        }
    }
    
    public static void main(String[] args) throws Exception {
    	BaiduOcrService s = new BaiduOcrService();
    	s.handVat("D:\\ccc.pdf");
	}
    
 }
package com.hzsmk.ocr.service;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.util.List;
import java.util.Map;

/**
 * http 工具类
 */
public class HttpUtil {

    public static String post(String requestUrl, String accessToken, String params)
            throws Exception {
        String contentType = "application/x-www-form-urlencoded";
        return HttpUtil.post(requestUrl, accessToken, contentType, params);
    }

    public static String post(String requestUrl, String accessToken, String contentType, String params)
            throws Exception {
        String encoding = "UTF-8";
        if (requestUrl.contains("nlp")) {
            encoding = "GBK";
        }
        return HttpUtil.post(requestUrl, accessToken, contentType, params, encoding);
    }

    public static String post(String requestUrl, String accessToken, String contentType, String params, String encoding)
            throws Exception {
        String url = requestUrl + "?access_token=" + accessToken;
        return HttpUtil.postGeneralUrl(url, contentType, params, encoding);
    }

    public static String postGeneralUrl(String generalUrl, String contentType, String params, String encoding)
            throws Exception {
    	URL url = new URL(generalUrl);
		// 打开和URL之间的连接
		HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        try {
			connection.setRequestMethod("POST");
			// 设置通用的请求属性
			connection.setRequestProperty("Content-Type", contentType);
			connection.setRequestProperty("Connection", "Keep-Alive");
			connection.setUseCaches(false);
			connection.setDoOutput(true);
			connection.setDoInput(true);

			// 得到请求的输出流对象
			DataOutputStream out = new DataOutputStream(connection.getOutputStream());
			out.write(params.getBytes(encoding));
			out.flush();
			out.close();

			// 建立实际的连接
			connection.connect();
			// 获取所有响应头字段
			Map<String, List<String>> headers = connection.getHeaderFields();
			// 遍历所有的响应头字段
			for (String key : headers.keySet()) {
			    System.err.println(key + "--->" + headers.get(key));
			}
			// 定义 BufferedReader输入流来读取URL的响应
			BufferedReader in = null;
			in = new BufferedReader(
			        new InputStreamReader(connection.getInputStream(), encoding));
			String result = "";
			String getLine;
			while ((getLine = in.readLine()) != null) {
			    result += getLine;
			}
			in.close();
			System.err.println("result:" + result);
			return result;
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ProtocolException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (UnsupportedEncodingException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			connection.disconnect();
		}
        return null;
    }
    
    public static String get(String generalUrl)
            throws Exception {
        URL url = new URL(generalUrl);
        // 打开和URL之间的连接
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        try {
			connection.setRequestMethod("GET");
			// 设置通用的请求属性
			connection.setRequestProperty("Connection", "Keep-Alive");
			connection.setUseCaches(false);
			connection.setDoOutput(true);
			connection.setDoInput(true);

			// 建立实际的连接
			connection.connect();
			// 获取所有响应头字段
			Map<String, List<String>> headers = connection.getHeaderFields();
			// 遍历所有的响应头字段
			for (String key : headers.keySet()) {
			    System.err.println(key + "--->" + headers.get(key));
			}
			// 定义 BufferedReader输入流来读取URL的响应
			BufferedReader in = null;
			in = new BufferedReader(
			        new InputStreamReader(connection.getInputStream(), "UTF-8"));
			String result = "";
			String getLine;
			while ((getLine = in.readLine()) != null) {
			    result += getLine;
			}
			in.close();
			System.err.println("result:" + result);
			return result;
		} catch (ProtocolException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (UnsupportedEncodingException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			connection.disconnect();
		}
        return null;
    }
    
}

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐