有些PDF文件是XFA文件格式,都是XML样式,这种PDF文件需要使用XFA读取方式,下面是JAVA实现方式

package com.xxx.xxx.util.pdf;

import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.xfa.XfaForm;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;

import java.io.File;
import java.io.FileOutputStream;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
 * 读取XFA PDF文件,结果是XML文件
 * @author xxx
 *
 */
public class ReadXFAUtil {

	/**
     * 读取xfa pdf
     * @param str 
     * @throws Exception 
     */
    public static void manipulatePdf(String src,String dest) throws Exception {
        PdfDocument pdfDoc = new PdfDocument(new PdfReader(src));
        PdfAcroForm form = PdfAcroForm.getAcroForm(pdfDoc, true);
        XfaForm xfa = form.getXfaForm();

        // Get XFA data under datasets/data.
        Node node = xfa.getDatasetsNode();
        NodeList list = node.getChildNodes();
        for (int i = 0; i < list.getLength(); i++) {
            if ("data".equals(list.item(i).getLocalName())) {
                node = list.item(i);
                break;
            }
        }

        try (FileOutputStream os = new FileOutputStream(dest)) {
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            transformer.transform(new DOMSource(node), new StreamResult(os));
        }

        pdfDoc.close();
    }
    
    public static void main(String[] args) throws Exception {
        String src = "e:/4000-2021-6增值税申报表.pdf";
        String dest = "e:/1.xml";

        ReadXFAUtil.manipulatePdf(src,dest);
    }
}

如有问题,请私信。

xObP8s/gudi/zrPMoaJKU7K5u7e+s7/Os8yhokpBVkHP4LnYv86zzMjn0OjSqtKyv8nS1MGqz7VRUaGjDQoNCtf31d8gUVEgNDA0NTQwMjI5

Logo

技术共进,成长同行——讯飞AI开发者社区

更多推荐