package org.apache.tika.parser.pdf;

import cj.m;
import com.google.firebase.analytics.FirebaseAnalytics;
import gj.f;
import gj.g;
import gj.h;
import ii.d;
import ii.i;
import ii.o;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.stream.XMLStreamException;
import k5.x;
import li.c;
import mi.a;
import ni.b;
import org.apache.pdfbox.text.e;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.ocr.TesseractOCRParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.EmbeddedContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlErrorCodes;
import org.apache.xmlbeans.impl.jam.xml.JamXmlElements;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: classes3.dex */
class AbstractPDF2XHTML extends e {
    private static final TesseractOCRConfig DEFAULT_TESSERACT_CONFIG = new TesseractOCRConfig();
    private static final int MAX_ACROFORM_RECURSIONS = 10;
    final PDFParserConfig config;
    private final ParseContext context;
    private final Metadata metadata;
    final c pdDocument;
    final XHTMLContentHandler xhtml;
    private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.ROOT);
    final List<IOException> exceptions = new ArrayList();
    private int pageIndex = 0;

    public AbstractPDF2XHTML(c cVar, ContentHandler contentHandler, ParseContext parseContext, Metadata metadata, PDFParserConfig pDFParserConfig) {
        this.pdDocument = cVar;
        this.xhtml = new XHTMLContentHandler(contentHandler, metadata);
        this.context = parseContext;
        this.metadata = metadata;
        this.config = pDFParserConfig;
    }

    private void addFieldString(f fVar) {
        String y02 = fVar.f5928n.y0(i.G5);
        String y03 = fVar.f5928n.y0(i.P5);
        StringBuilder sb2 = new StringBuilder();
        AttributesImpl attributesImpl = new AttributesImpl();
        if (y02 != null) {
            sb2.append(y02);
            sb2.append(": ");
        }
        if (y03 != null) {
            attributesImpl.addAttribute("", "altName", "altName", "CDATA", y03);
        }
        if (fVar instanceof h) {
            handleSignature(attributesImpl, (h) fVar);
            return;
        }
        String e6 = fVar.e();
        if (e6 != null && !e6.equals("null")) {
            sb2.append(e6);
        }
        if (attributesImpl.getLength() > 0 || sb2.length() > 0) {
            this.xhtml.startElement("li", attributesImpl);
            this.xhtml.characters(sb2.toString());
            this.xhtml.endElement("li");
        }
    }

    private void extractEmbeddedDocuments(c cVar) {
        d dVar = (d) new li.f(cVar.a()).f8303b.r0(i.W2);
        mi.e eVar = dVar != null ? new mi.e(dVar) : null;
        if (eVar == null) {
            return;
        }
        Map e6 = eVar.e();
        if (e6 != null) {
            processEmbeddedDocNames(e6);
            return;
        }
        a d10 = eVar.d();
        if (d10 == null) {
            return;
        }
        Iterator it = d10.f8677i.iterator();
        while (it.hasNext()) {
            Map e10 = ((mi.e) it.next()).e();
            if (e10 != null) {
                processEmbeddedDocNames(e10);
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r4v2, types: [cj.m] */
    /* JADX WARN: Type inference failed for: r4v3, types: [cj.m] */
    /* JADX WARN: Type inference failed for: r4v4, types: [cj.m] */
    /* JADX WARN: Type inference failed for: r9v2, types: [cj.m] */
    private void extractMultiOSPDEmbeddedFiles(String str, ni.a aVar, EmbeddedDocumentExtractor embeddedDocumentExtractor) {
        if (aVar == null) {
            return;
        }
        i iVar = i.U5;
        d dVar = aVar.f9136b;
        String y02 = dVar.y0(iVar);
        i iVar2 = i.f6804d3;
        String y03 = dVar.y0(iVar2);
        o oVar = (o) aVar.b(iVar2);
        extractPDEmbeddedFile(str, y02, y03, oVar != null ? new m(2, oVar) : null, embeddedDocumentExtractor);
        String y04 = dVar.y0(iVar);
        i iVar3 = i.f6870s4;
        String y05 = dVar.y0(iVar3);
        o oVar2 = (o) aVar.b(iVar3);
        extractPDEmbeddedFile(str, y04, y05, oVar2 != null ? new m(2, oVar2) : null, embeddedDocumentExtractor);
        String y06 = dVar.y0(iVar);
        i iVar4 = i.Q2;
        String y07 = dVar.y0(iVar4);
        o oVar3 = (o) aVar.b(iVar4);
        extractPDEmbeddedFile(str, y06, y07, oVar3 != null ? new m(2, oVar3) : null, embeddedDocumentExtractor);
        String y08 = dVar.y0(iVar);
        i iVar5 = i.V5;
        String y09 = dVar.y0(iVar5);
        o oVar4 = (o) aVar.b(iVar5);
        extractPDEmbeddedFile(str, y08, y09, oVar4 != null ? new m(2, oVar4) : null, embeddedDocumentExtractor);
    }

    private void extractPDEmbeddedFile(String str, String str2, String str3, b bVar, EmbeddedDocumentExtractor embeddedDocumentExtractor) {
        if (bVar == null) {
            return;
        }
        String str4 = str3 == null ? str : str3;
        Metadata metadata = new Metadata();
        metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, str4);
        o oVar = (o) bVar.f3624i;
        metadata.set("Content-Type", oVar.x0(i.E5));
        oVar.getClass();
        i a10 = i.a("Size");
        TikaInputStream tikaInputStream = null;
        metadata.set("Content-Length", Long.toString(((d) oVar.r0(i.a("Params"))) != null ? r5.v0(a10, null, -1) : -1));
        metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString());
        metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, str4);
        if (!embeddedDocumentExtractor.shouldParseEmbedded(metadata)) {
            return;
        }
        try {
            TikaInputStream tikaInputStream2 = TikaInputStream.get((InputStream) bVar.a());
            try {
                embeddedDocumentExtractor.parseEmbedded(tikaInputStream2, new EmbeddedContentHandler(this.xhtml), metadata, false);
                AttributesImpl attributesImpl = new AttributesImpl();
                attributesImpl.addAttribute("", JamXmlElements.CLASS, JamXmlElements.CLASS, "CDATA", "embedded");
                attributesImpl.addAttribute("", "id", "id", "CDATA", str4);
                this.xhtml.startElement("div", attributesImpl);
                this.xhtml.endElement("div");
                ef.d.a(tikaInputStream2);
            } catch (Throwable th2) {
                th = th2;
                tikaInputStream = tikaInputStream2;
                ef.d.a(tikaInputStream);
                throw th;
            }
        } catch (Throwable th3) {
            th = th3;
        }
    }

    private void handleSignature(AttributesImpl attributesImpl, h hVar) {
        hVar.getClass();
        ii.b r02 = hVar.f5928n.r0(i.X5);
        dj.a aVar = r02 == null ? null : new dj.a((d) r02);
        if (aVar == null) {
            return;
        }
        TreeMap treeMap = new TreeMap();
        i iVar = i.F4;
        d dVar = aVar.f4673b;
        treeMap.put("name", dVar.y0(iVar));
        treeMap.put("contactInfo", dVar.y0(i.f6853o2));
        treeMap.put(FirebaseAnalytics.Param.LOCATION, dVar.y0(i.f6846m4));
        treeMap.put("reason", dVar.y0(i.f6811e5));
        GregorianCalendar q02 = dVar.q0(i.f6866r4);
        if (q02 != null) {
            this.dateFormat.setTimeZone(q02.getTimeZone());
            treeMap.put(XmlErrorCodes.DATE, this.dateFormat.format(q02.getTime()));
        }
        int i4 = 0;
        for (String str : treeMap.keySet()) {
            if (str != null && !str.equals("")) {
                i4++;
            }
        }
        if (i4 > 0) {
            this.xhtml.startElement("li", attributesImpl);
            AttributesImpl attributesImpl2 = new AttributesImpl();
            attributesImpl2.addAttribute("", "type", "type", "CDATA", "signaturedata");
            this.xhtml.startElement("ol", attributesImpl2);
            for (Map.Entry entry : treeMap.entrySet()) {
                if (entry.getValue() != null && !((String) entry.getValue()).equals("")) {
                    AttributesImpl attributesImpl3 = new AttributesImpl();
                    attributesImpl3.addAttribute("", "signdata", "signdata", "CDATA", (String) entry.getKey());
                    this.xhtml.startElement("li", attributesImpl3);
                    this.xhtml.characters((String) entry.getValue());
                    this.xhtml.endElement("li");
                }
            }
            this.xhtml.endElement("ol");
            this.xhtml.endElement("li");
        }
    }

    private void processAcroField(f fVar, int i4) {
        if (i4 >= 10) {
            return;
        }
        addFieldString(fVar);
        if (fVar instanceof g) {
            int i10 = i4 + 1;
            this.xhtml.startElement("ol");
            g gVar = (g) fVar;
            gVar.getClass();
            ArrayList arrayList = new ArrayList();
            ii.a aVar = (ii.a) gVar.f5928n.r0(i.f6800c4);
            for (int i11 = 0; i11 < aVar.size(); i11++) {
                f a10 = f.a(gVar.f5926b, (d) aVar.h0(i11), gVar);
                if (a10 != null) {
                    arrayList.add(a10);
                }
            }
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                processAcroField((f) it.next(), i10);
            }
            this.xhtml.endElement("ol");
        }
    }

    private void processEmbeddedDocNames(Map<String, ni.a> map) {
        if (map == null || map.isEmpty()) {
            return;
        }
        EmbeddedDocumentExtractor embeddedDocumentExtractor = getEmbeddedDocumentExtractor();
        for (Map.Entry<String, ni.a> entry : map.entrySet()) {
            extractMultiOSPDEmbeddedFiles(entry.getKey(), entry.getValue(), embeddedDocumentExtractor);
        }
    }

    public void doOCROnCurrentPage() {
        OutputStream newOutputStream;
        if (this.config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.NO_OCR)) {
            return;
        }
        TesseractOCRConfig tesseractOCRConfig = (TesseractOCRConfig) this.context.get(TesseractOCRConfig.class, DEFAULT_TESSERACT_CONFIG);
        TesseractOCRParser tesseractOCRParser = new TesseractOCRParser();
        if (!tesseractOCRParser.hasTesseract(tesseractOCRConfig)) {
            throw new TikaException("Tesseract is not available. Please set the OCR_STRATEGY to NO_OCR or configure Tesseract correctly");
        }
        ij.h hVar = new ij.h(this.pdDocument);
        TemporaryResources temporaryResources = new TemporaryResources();
        try {
            try {
                BufferedImage a10 = hVar.a(this.pageIndex, this.config.getOcrImageType());
                Path createTempFile = temporaryResources.createTempFile();
                newOutputStream = Files.newOutputStream(createTempFile, new OpenOption[0]);
                try {
                    jj.a.c(a10, this.config.getOcrImageFormatName(), newOutputStream, this.config.getOcrDPI());
                    if (newOutputStream != null) {
                        newOutputStream.close();
                    }
                    TikaInputStream tikaInputStream = TikaInputStream.get(createTempFile);
                    try {
                        tesseractOCRParser.parseInline(tikaInputStream, this.xhtml, tesseractOCRConfig);
                        if (tikaInputStream != null) {
                            tikaInputStream.close();
                        }
                    } finally {
                    }
                } catch (Throwable th2) {
                    try {
                        throw th2;
                    } catch (Throwable th3) {
                        if (newOutputStream != null) {
                            try {
                                newOutputStream.close();
                            } catch (Throwable th4) {
                                th2.addSuppressed(th4);
                            }
                        }
                        throw th3;
                    }
                }
            } catch (Throwable th5) {
                temporaryResources.dispose();
                throw th5;
            }
        } catch (IOException e6) {
            handleCatchableIOE(e6);
        } catch (SAXException e10) {
            throw new x("error writing OCR content from PDF", e10, 5);
        }
        temporaryResources.dispose();
    }

    @Override // org.apache.pdfbox.text.e
    public void endDocument(c cVar) {
        try {
            extractBookmarkText();
            try {
                extractEmbeddedDocuments(cVar);
            } catch (IOException e6) {
                handleCatchableIOE(e6);
            }
            if (this.config.getExtractAcroFormContent()) {
                try {
                    extractAcroForm(cVar);
                } catch (IOException e10) {
                    handleCatchableIOE(e10);
                }
            }
            this.xhtml.endDocument();
        } catch (TikaException e11) {
            throw new x("Unable to end a document", e11, 5);
        } catch (SAXException e12) {
            throw new x("Unable to end a document", e12, 5);
        }
    }

    @Override // org.apache.pdfbox.text.e
    public void endPage(li.h hVar) {
        String y02;
        try {
            try {
                EmbeddedDocumentExtractor embeddedDocumentExtractor = getEmbeddedDocumentExtractor();
                for (cj.a aVar : hVar.d().f8677i) {
                    if (aVar instanceof cj.b) {
                        cj.b bVar = (cj.b) aVar;
                        try {
                            extractMultiOSPDEmbeddedFiles(bVar.m(), (ni.a) bVar.n(), embeddedDocumentExtractor);
                        } catch (IOException e6) {
                            handleCatchableIOE(e6);
                        } catch (TikaException e10) {
                            throw new x("file embedded in annotation tika exception", e10, 5);
                        } catch (SAXException e11) {
                            throw new x("file embedded in annotation sax exception", e11, 5);
                        }
                    }
                    if (this.config.getExtractAnnotationText()) {
                        if (aVar instanceof cj.d) {
                            cj.d dVar = (cj.d) aVar;
                            dVar.getClass();
                            d dVar2 = dVar.f3622b;
                            i iVar = i.C;
                            if (rg.c.k((d) dVar2.r0(iVar)) != null) {
                                bj.a k10 = rg.c.k((d) dVar2.r0(iVar));
                                if ((k10 instanceof bj.d) && (y02 = ((bj.d) k10).f3252b.y0(i.W5)) != null && y02.trim().length() > 0) {
                                    this.xhtml.startElement("div", JamXmlElements.CLASS, JamXmlElements.ANNOTATION);
                                    this.xhtml.startElement("a", "href", y02);
                                    this.xhtml.characters(y02);
                                    this.xhtml.endElement("a");
                                    this.xhtml.endElement("div");
                                }
                            }
                        }
                        if (aVar instanceof cj.e) {
                            cj.e eVar = (cj.e) aVar;
                            String l10 = eVar.l();
                            d dVar3 = eVar.f3622b;
                            String y03 = dVar3.y0(i.C5);
                            String y04 = dVar3.y0(i.f6857p2);
                            if (l10 != null || y03 != null || y04 != null) {
                                this.xhtml.startElement("div", JamXmlElements.CLASS, JamXmlElements.ANNOTATION);
                                if (l10 != null) {
                                    this.xhtml.startElement("div", JamXmlElements.CLASS, "annotationTitle");
                                    this.xhtml.characters(l10);
                                    this.xhtml.endElement("div");
                                }
                                if (y03 != null) {
                                    this.xhtml.startElement("div", JamXmlElements.CLASS, "annotationSubject");
                                    this.xhtml.characters(y03);
                                    this.xhtml.endElement("div");
                                }
                                if (y04 != null) {
                                    this.xhtml.startElement("div", JamXmlElements.CLASS, "annotationContents");
                                    this.xhtml.characters(y04);
                                    this.xhtml.endElement("div");
                                }
                                this.xhtml.endElement("div");
                            }
                        }
                    }
                }
                if (this.config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
                    doOCROnCurrentPage();
                }
                this.xhtml.endElement("div");
            } catch (IOException e12) {
                this.exceptions.add(e12);
            } catch (TikaException e13) {
                e = e13;
                throw new x("Unable to end a page", e, 5);
            } catch (SAXException e14) {
                e = e14;
                throw new x("Unable to end a page", e, 5);
            }
        } finally {
            this.pageIndex++;
        }
    }

    public void extractAcroForm(c cVar) {
        gj.a a10;
        List aVar;
        f a11;
        ListIterator listIterator;
        li.d a12 = cVar.a();
        if (a12 == null || (a10 = a12.a()) == null) {
            return;
        }
        m a13 = a10.a();
        if (a13 != null) {
            XFAExtractor xFAExtractor = new XFAExtractor();
            try {
                BufferedInputStream bufferedInputStream = new BufferedInputStream(new ByteArrayInputStream(a13.b()));
                try {
                    xFAExtractor.extract(bufferedInputStream, this.xhtml, this.metadata, this.context);
                    bufferedInputStream.close();
                    return;
                } finally {
                }
            } catch (IOException | XMLStreamException unused) {
            }
        }
        ii.a aVar2 = (ii.a) a10.f5925b.r0(i.f6813f3);
        if (aVar2 == null) {
            aVar = Collections.emptyList();
        } else {
            ArrayList arrayList = new ArrayList();
            for (int i4 = 0; i4 < aVar2.f6776b.size(); i4++) {
                d dVar = (d) aVar2.h0(i4);
                if (dVar != null && (a11 = f.a(a10, dVar, null)) != null) {
                    arrayList.add(a11);
                }
            }
            aVar = new a(arrayList, aVar2);
        }
        if (aVar == null || (listIterator = aVar.listIterator()) == null) {
            return;
        }
        this.xhtml.startElement("div", JamXmlElements.CLASS, "acroform");
        this.xhtml.startElement("ol");
        while (listIterator.hasNext()) {
            Object next = listIterator.next();
            if (next != null && (next instanceof f)) {
                processAcroField((f) next, 0);
            }
        }
        this.xhtml.endElement("ol");
        this.xhtml.endElement("div");
    }

    public void extractBookmarkText() {
        fj.c cVar;
        li.d a10 = this.document.a();
        a10.getClass();
        i iVar = i.N4;
        d dVar = (d) a10.f8298b.r0(iVar);
        if (dVar == null) {
            cVar = null;
        } else {
            fj.c cVar2 = new fj.c(dVar);
            dVar.D0(iVar.f6903b, i.Q5);
            cVar = cVar2;
        }
        if (cVar != null) {
            extractBookmarkText(cVar);
        }
    }

    public void extractBookmarkText(fj.c cVar) {
        cVar.getClass();
        d dVar = (d) cVar.f5442b.r0(i.f6828i3);
        fj.c cVar2 = dVar != null ? new fj.c(dVar) : null;
        if (cVar2 == null) {
            return;
        }
        this.xhtml.startElement("ul");
        while (true) {
            XHTMLContentHandler xHTMLContentHandler = this.xhtml;
            if (cVar2 == null) {
                xHTMLContentHandler.endElement("ul");
                return;
            }
            xHTMLContentHandler.startElement("li");
            XHTMLContentHandler xHTMLContentHandler2 = this.xhtml;
            i iVar = i.H5;
            d dVar2 = cVar2.f5442b;
            xHTMLContentHandler2.characters(dVar2.y0(iVar));
            this.xhtml.endElement("li");
            extractBookmarkText(cVar2);
            d dVar3 = (d) dVar2.r0(i.H4);
            cVar2 = dVar3 != null ? new fj.c(dVar3) : null;
        }
    }

    public EmbeddedDocumentExtractor getEmbeddedDocumentExtractor() {
        EmbeddedDocumentExtractor embeddedDocumentExtractor = (EmbeddedDocumentExtractor) this.context.get(EmbeddedDocumentExtractor.class);
        return embeddedDocumentExtractor == null ? new ParsingEmbeddedDocumentExtractor(this.context) : embeddedDocumentExtractor;
    }

    public void handleCatchableIOE(IOException iOException) {
        if (!this.config.isCatchIntermediateIOExceptions()) {
            throw iOException;
        }
        if ((iOException.getCause() instanceof SAXException) && iOException.getCause().getMessage() != null && iOException.getCause().getMessage().contains("Your document contained more than")) {
            throw iOException;
        }
        String message = iOException.getMessage();
        if (message == null) {
            message = "IOException, no message";
        }
        this.metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING, message);
        this.exceptions.add(iOException);
    }

    @Override // org.apache.pdfbox.text.e
    public void startDocument(c cVar) {
        try {
            this.xhtml.startDocument();
        } catch (SAXException e6) {
            throw new x("Unable to start a document", e6, 5);
        }
    }

    @Override // org.apache.pdfbox.text.e
    public void startPage(li.h hVar) {
        try {
            this.xhtml.startElement("div", JamXmlElements.CLASS, "page");
            writeParagraphStart();
        } catch (SAXException e6) {
            throw new x("Unable to start a page", e6, 5);
        }
    }
}
