1、问题:

在使用jsoup爬取数据时,碰到了使用https的网站,遇到报错:javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path validation failed: java.security.cert.CertPathValidatorException: validity check failed

2、解决

后自己在网上查了一些资料,找到相关解决代码:

package com.curtao.company.qualify.crawler.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;

/**
 * <p>绕过安全证书</p>
 *
 * @author fengjianhao@curtao.com
 * @date 2021/2/19 9:15
 */
public class HttpsUrlValidator {
    static HostnameVerifier hv = new HostnameVerifier() {

    @Override
    public boolean verify(String urlHostName, SSLSession session) {
        System.out.println("Warning: URL Host: " + urlHostName + " vs. "
                + session.getPeerHost());
        return true;
    }
};

    public final static String retrieveResponseFromServer(final String url) {
        HttpURLConnection connection = null;

        try {
            URL validationUrl = new URL(url);
            trustAllHttpsCertificates();
            HttpsURLConnection.setDefaultHostnameVerifier(hv);

            connection = (HttpURLConnection) validationUrl.openConnection();
            final BufferedReader in = new BufferedReader(new InputStreamReader(
                    connection.getInputStream()));

            String line;
            final StringBuffer stringBuffer = new StringBuffer(255);

            synchronized (stringBuffer) {
                while ((line = in.readLine()) != null) {
                    stringBuffer.append(line);
                    stringBuffer.append("\n");
                }
                return stringBuffer.toString();
            }

        } catch (final IOException e) {
            System.out.println(e.getMessage());
            return null;
        } catch (final Exception e1){
            System.out.println(e1.getMessage());
            return null;
        }finally {
            if (connection != null) {
                connection.disconnect();
            }
        }
    }

    public static void trustAllHttpsCertificates() throws Exception {
        javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
        javax.net.ssl.TrustManager tm = new miTM();
        trustAllCerts[0] = tm;
        javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext
                .getInstance("SSL");
        sc.init(null, trustAllCerts, null);
        javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc
                .getSocketFactory());
    }

    static class miTM implements javax.net.ssl.TrustManager,
            javax.net.ssl.X509TrustManager {
        @Override
        public java.security.cert.X509Certificate[] getAcceptedIssuers() {
            return null;
        }

        public boolean isServerTrusted(
                java.security.cert.X509Certificate[] certs) {
            return true;
        }

        public boolean isClientTrusted(
                java.security.cert.X509Certificate[] certs) {
            return true;
        }

        @Override
        public void checkServerTrusted(
                java.security.cert.X509Certificate[] certs, String authType)
                throws java.security.cert.CertificateException {
            return;
        }

        @Override
        public void checkClientTrusted(
                java.security.cert.X509Certificate[] certs, String authType)
                throws java.security.cert.CertificateException {
            return;
        }
    }

}

方法调用

 //发送post请求(https)
    public static String sendPostRequest(DynamicAddress address, String url,
                                         Map<String, String> formdata, Map<String, String> headers) {
        String result = null;
        if (headers == null) {
            headers = new HashMap<>();
        }
        try {
            //先调用下忽略https证书的再请求才可以
            HttpsUrlValidator.retrieveResponseFromServer(url);
            org.jsoup.Connection connection = Jsoup.connect(url).method(Connection.Method.POST);
            if (address != null) {
                connection = connection
                        .proxy(new java.net.Proxy(Proxy.Type.SOCKS, new InetSocketAddress(address.getIp(), address.getPort())));
            }
            result = connection
                    .headers(headers)
                    .ignoreContentType(true)
                    .data(formdata)
                    .ignoreHttpErrors(true)
                    .timeout(10000)
                    .execute()
                    .body();
        } catch (Exception e) {
            LOGGER.info("post请求url异常是:{}", e);
        }
        return result;
    }
Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐