新建项目

打开终端输入命令

mkdir midscene-demo
cd midscene-demo/

设置API密钥

touch .env
OPENAI_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/"
OPENAI_API_KEY="AIzaSyCV_sz0_Jsnu4stkwjNE1jPvB6EiXc10TA"
MIDSCENE_MODEL_NAME="gemini-2.5-pro-preview-05-06"
MIDSCENE_USE_GEMINI=1

安装库

npm install @midscene/android --save-dev

编写脚本

touch demo.ts
import 'dotenv/config';
import {
  AndroidAgent,
  AndroidDevice,
  getConnectedDevices,
  overrideAIConfig,
} from '@midscene/android';

// Configure AI model settings for Gemini vision-language model
overrideAIConfig({
  OPENAI_BASE_URL: 'https://generativelanguage.googleapis.com/v1beta/openai/',
  OPENAI_API_KEY: process.env.OPENAI_API_KEY || 'your-gemini-api-key-here',
  MIDSCENE_MODEL_NAME: 'gemini-2.5-pro-preview-05-06',
  MIDSCENE_USE_GEMINI: '1',
});

const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
Promise.resolve(
  (async () => {
    const devices = await getConnectedDevices();
    const page = new AndroidDevice(devices[0].udid);

    // 👀 init Midscene agent
    const agent = new AndroidAgent(page, {
      aiActionContext:
        'If any location, permission, user agreement, etc. popup, click agree. If login page pops up, close it.',
    });
    await page.connect();
    await agent.aiAction('打开Play Store');
    await agent.aiAction('点击Books Tab');
    await agent.aiAction('点击第1个封面');
    await agent.aiAction('关闭Play Store');
  })(),
);

运行测试

npx tsx demo.ts

测试报告
在这里插入图片描述

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐