1.环境

  1. DrissionPage==4.1.0.18
  2. ddddocr==1.5.6
  3. python-3.1x

pip安装环境,不赘述,python环境建议3.10以上

drssionPage基于chrome协议开发,一般不会被检测。官网地址

2. 关键代码

1. 启动浏览器

def exec(url, proxies, port, user_agent):
    co = ChromiumOptions()
    # 随机端口
    if port is not None:
        co.set_local_port(port)
    else:
        co.auto_port(on_off=True)
    # co.headless()  # 无头模式
    # 匿名模式
    co.incognito()
    # 无沙盒模式
    co.set_argument('--no-sandbox')
    # DOM 就绪即停止加载
    co.set_load_mode('eager')
    # 无痕
    co.incognito(True)
    # 设置初始窗口大小
    co.set_argument('--window-size', '1500,800')
    co.set_argument('--proxy-server', proxies.get("http"))
    co.set_user_agent(user_agent)
        try:
        tab = page.latest_tab
        # 开始监听请求
        tab.listen.start(targets=True)
        tab.get(url)
        for step in tab.listen.steps(timeout=10):
            # 认证请求
            if step.url.find("/captcha/get") != -1:
                # 滑块验证
                if listen_verify(step, tab, proxies) == False:
                    return {"code": 500}
                data = get_data(url, tab)
                page.close()
                return data
            # 正常请求
            if step.url.find(urlparse(url).path) != -1:
                data = step.response.body
                if data.find("路径") != -1:
                    page.close()
                    return {"code": 200, "data": data}
    except Exception as e:
        page.close()
        raise e

无头模式:在无桌面的环境运行,后台浏览器运行。

2. 识别认证图片

    # 认证图片
    background_bytes = http.request(method="GET", url=bg_url).data
    target_bytes = http.request(method="GET", url=target_url).data
    
	img_width = 552  # 下载宽度
	# 图片屏幕比率
    ratio = (img_width) / 800
    
    # ocr识别距离
    distance = get_distance(target_bytes, background_bytes) - deviation
    distance = int(distance * ratio)

    time.sleep(1)
    tab.actions.move_to(tab.ele("@class=xxx"))
    # 移动滑块
    slide_div = tab.ele("@class=xxx")
    # 点击滑块位置
    tab.actions.move_to(slide_div, offset_x=random.randint(0, 20), duration=0.5)
    action_ele = tab.actions.hold(slide_div)
    current_position = 0

    time.sleep(1)
    # 随机滑动,滑块轨迹
    for x, y, t in get_tracks(distance):
        offset_x = (x - current_position)
        if offset_x == 0:
            time.sleep(0.02)
            continue
        action_ele.move(offset_x=offset_x, offset_y=y, duration=t / 20000)
        current_position = x
    time.sleep(0.2)
    tab.actions.release()
    

3. ocr识别滑块距离

def get_distance(target_bytes, background_bytes):
    """
    ddddocr 识别滑动距离
    """
    det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)
    return int(det.slide_match(target_bytes, background_bytes)["target"][0])

4.轨迹模拟算法

def get_tracks(distance):
    """
    轨迹算法
    """
    tracks = list()
    y, v, t, current = 0, 0, 1, 0
    mid = distance * 3 / 4
    exceed = random.randint(40, 90)
    z = random.randint(30, 150)
    while current < (distance + exceed):
        if current < mid / 2:
            a = 2
        elif current < mid:
            a = 3
        else:
            a = -3
        a /= 2
        v0 = v
        s = v0 * t + 0.5 * a * (t * t)
        current += int(s)
        v = v0 + a * t
        y += random.randint(-3, 3)
        z = z + random.randint(5, 10)
        tracks.append([min(current, (distance + exceed)), y, z])
    while exceed > 0:
        exceed -= random.randint(0, 5)
        y += random.randint(-5, 5)
        z = z + random.randint(5, 9)
        tracks.append([min(current, (distance + exceed)), y, z])
    return tracks

3.后记

一般来说,不算太严格的滑块验证码都能过,轨迹模拟,和滑块点击滑动可自己改,drssionPage语法可在官网看。

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐