nodejs langchain 文件分割
nodejs langchain 文件分割。
·
langchain 文件分割
安装langchain包
npm i langchain
代码示例
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { PDFLoader } = require("langchain/document_loaders/fs/pdf");
const { TextLoader } = require("langchain/document_loaders/fs/text");
const { DocxLoader } = require("langchain/document_loaders/fs/docx");
const { EPubLoader } = require("langchain/document_loaders/fs/epub");
documentClassification(filepath, mimeType) {
if (!filepath) throw new Error(`路径出现问题:${filepath}`)
let loader = null;
switch (mimeType) {
case 'pdf':
loader = new PDFLoader(filepath)
break;
case 'epub':
loader = new EPubLoader(filepath)
break;
case 'txt':
loader = new TextLoader(filepath)
break;
case 'docx':
loader = new DocxLoader(filepath)
break;
default:
break;
}
if (!loader) throw new Error(`无法解析的类型:${mimeType}`)
return loader;
}
//文档读取
/**
* 上传资料分割
* @param {String} filepath 文件路径
* @param {String} filename 文件名称
* @returns {Object} 上传状态
*/
async documentReading( { filepath, filename, }) {
let mimeType = filepath.split('.')[1];
console.log('文件类型', mimeType)
//初始化分割文档分割工具
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000 //分割大小
});
// 加载
const loader = documentClassification(filepath, mimeType);
const docs = await loader.load();
// 分割
const splitterDocs = await splitter.splitDocuments(docs);
console.log( '文档分割完成');
return { splitterDocs, useTokens: splitterDocs.length * 1000 }
}
module.exports = documentReading
更多推荐
所有评论(0)