百度OCR-发票识别
百度的OCR识别,发票功能还是比较简单,直接看API就行。每月有免费试用次数,package com.hzsmk.ocr.service;import java.io.BufferedInputStream;import java.io.ByteArrayOutputStream;import java.io.File;import java.io.FileInputStream;import j
·
百度的OCR识别,发票功能还是比较简单,直接看API就行。
每月有免费试用次数,
package com.hzsmk.ocr.service;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Base64;
import org.springframework.stereotype.Service;
import com.hzsmk.common.exception.BusinessException;
import com.hzsmk.common.util.RJson;
import com.hzsmk.common.util.RString;
/**
* @author luoyang
* @time 20220302
*/
@Service
public class BaiduOcrService implements Serializable {
private static final long serialVersionUID = 1L;
//百度开放平台应用申请一个KEY
private String baiduApiKey = "11111";
private String baiduSecretKey = "11111111";
//
private String vat_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice";
private String token_url = "https://aip.baidubce.com/oauth/2.0/token";
private static final String image_type = "image";
private static final String pdf_type = "pdf_file";
//先获取token
private String getBaiduToken() throws Exception {
String url = token_url+"?grant_type=client_credentials&client_id="+baiduApiKey+"&client_secret="+baiduSecretKey;
String tokenJson = HttpUtil.get(url);
Map<String, Object> tokenMap = RJson.parseJson2Map(tokenJson);
if(null == tokenMap) {
throw new BusinessException("百度token获取失败");
}
return RString.toString(tokenMap.get("access_token"));
}
//进行识别
public String handVat(String path) throws Exception {
String token = getBaiduToken();
String fileType = path.substring(path.lastIndexOf(".") + 1);
String checkType = "";
List<String> imagetypes = Arrays.asList("jpg","jpeg","png","bmp");// ;
if(fileType.equals("pdf")) {
checkType = pdf_type;
}else if(imagetypes.contains(fileType)){
checkType =image_type;
}else {
throw new BusinessException("仅支持pdf/jpg/jpeg/png/bmp格式!");
}
String imgStr = Base64.encodeBase64String(readFileByBytes(path));
String bas = "";
try {
bas = URLEncoder.encode(imgStr, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String jsons =HttpUtil.post(vat_url,token,checkType+"="+bas);
System.out.println(jsons);
return jsons;
}
/**
* 根据文件路径读取byte[] 数组
*/
public static byte[] readFileByBytes(String filePath) throws IOException {
File file = new File(filePath);
if (!file.exists()) {
throw new FileNotFoundException(filePath);
} else {
ByteArrayOutputStream bos = new ByteArrayOutputStream((int) file.length());
BufferedInputStream in = null;
try {
in = new BufferedInputStream(new FileInputStream(file));
short bufSize = 1024;
byte[] buffer = new byte[bufSize];
int len1;
while (-1 != (len1 = in.read(buffer, 0, bufSize))) {
bos.write(buffer, 0, len1);
}
byte[] var7 = bos.toByteArray();
return var7;
} finally {
try {
if (in != null) {
in.close();
}
} catch (IOException var14) {
var14.printStackTrace();
}
bos.close();
}
}
}
public static void main(String[] args) throws Exception {
BaiduOcrService s = new BaiduOcrService();
s.handVat("D:\\ccc.pdf");
}
}
package com.hzsmk.ocr.service;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.util.List;
import java.util.Map;
/**
* http 工具类
*/
public class HttpUtil {
public static String post(String requestUrl, String accessToken, String params)
throws Exception {
String contentType = "application/x-www-form-urlencoded";
return HttpUtil.post(requestUrl, accessToken, contentType, params);
}
public static String post(String requestUrl, String accessToken, String contentType, String params)
throws Exception {
String encoding = "UTF-8";
if (requestUrl.contains("nlp")) {
encoding = "GBK";
}
return HttpUtil.post(requestUrl, accessToken, contentType, params, encoding);
}
public static String post(String requestUrl, String accessToken, String contentType, String params, String encoding)
throws Exception {
String url = requestUrl + "?access_token=" + accessToken;
return HttpUtil.postGeneralUrl(url, contentType, params, encoding);
}
public static String postGeneralUrl(String generalUrl, String contentType, String params, String encoding)
throws Exception {
URL url = new URL(generalUrl);
// 打开和URL之间的连接
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
try {
connection.setRequestMethod("POST");
// 设置通用的请求属性
connection.setRequestProperty("Content-Type", contentType);
connection.setRequestProperty("Connection", "Keep-Alive");
connection.setUseCaches(false);
connection.setDoOutput(true);
connection.setDoInput(true);
// 得到请求的输出流对象
DataOutputStream out = new DataOutputStream(connection.getOutputStream());
out.write(params.getBytes(encoding));
out.flush();
out.close();
// 建立实际的连接
connection.connect();
// 获取所有响应头字段
Map<String, List<String>> headers = connection.getHeaderFields();
// 遍历所有的响应头字段
for (String key : headers.keySet()) {
System.err.println(key + "--->" + headers.get(key));
}
// 定义 BufferedReader输入流来读取URL的响应
BufferedReader in = null;
in = new BufferedReader(
new InputStreamReader(connection.getInputStream(), encoding));
String result = "";
String getLine;
while ((getLine = in.readLine()) != null) {
result += getLine;
}
in.close();
System.err.println("result:" + result);
return result;
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
connection.disconnect();
}
return null;
}
public static String get(String generalUrl)
throws Exception {
URL url = new URL(generalUrl);
// 打开和URL之间的连接
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
try {
connection.setRequestMethod("GET");
// 设置通用的请求属性
connection.setRequestProperty("Connection", "Keep-Alive");
connection.setUseCaches(false);
connection.setDoOutput(true);
connection.setDoInput(true);
// 建立实际的连接
connection.connect();
// 获取所有响应头字段
Map<String, List<String>> headers = connection.getHeaderFields();
// 遍历所有的响应头字段
for (String key : headers.keySet()) {
System.err.println(key + "--->" + headers.get(key));
}
// 定义 BufferedReader输入流来读取URL的响应
BufferedReader in = null;
in = new BufferedReader(
new InputStreamReader(connection.getInputStream(), "UTF-8"));
String result = "";
String getLine;
while ((getLine = in.readLine()) != null) {
result += getLine;
}
in.close();
System.err.println("result:" + result);
return result;
} catch (ProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
connection.disconnect();
}
return null;
}
}
更多推荐
所有评论(0)