package org.apache.tika.parser.pdf;

import a0.f;
import ai.e;
import cj.m;
import ii.a;
import ii.b;
import ii.d;
import ii.i;
import ii.o;
import ii.p;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.stream.XMLStreamException;
import li.c;
import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.AccessPermissions;
import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.PDF;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.image.xmp.JempboxExtractor;
import org.apache.tika.parser.ocr.TesseractOCRParser;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.XHTMLContentHandler;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import q.b0;

/* loaded from: classes3.dex */
public class PDFParser extends AbstractParser {
    private static final MediaType MEDIA_TYPE;
    public static final String PASSWORD = "org.apache.tika.parser.pdf.password";
    private static final Set<MediaType> SUPPORTED_TYPES;
    private static final long serialVersionUID = -752276948656079347L;
    private PDFParserConfig defaultConfig = new PDFParserConfig();

    static {
        MediaType application = MediaType.application("pdf");
        MEDIA_TYPE = application;
        SUPPORTED_TYPES = Collections.singleton(application);
    }

    private void addMetadata(Metadata metadata, String str, b bVar) {
        String obj;
        if (bVar instanceof a) {
            Iterator it = ((a) bVar).q0().iterator();
            while (it.hasNext()) {
                addMetadata(metadata, str, (b) it.next());
            }
            return;
        }
        if (bVar instanceof p) {
            obj = ((p) bVar).a();
        } else if (bVar == null || (bVar instanceof d)) {
            return;
        } else {
            obj = bVar.toString();
        }
        addMetadata(metadata, str, obj);
    }

    private void addMetadata(Metadata metadata, String str, String str2) {
        if (str2 != null) {
            metadata.add(str, decode(str2));
        }
    }

    private void addMetadata(Metadata metadata, String str, Calendar calendar) {
        if (calendar != null) {
            metadata.set(str, calendar.getTime().toString());
        }
    }

    private void addMetadata(Metadata metadata, Property property, String str) {
        if (str != null) {
            String decode = decode(str);
            if (property.isMultiValuePermitted() || metadata.get(property) == null) {
                metadata.add(property, decode);
            }
        }
    }

    private void addMetadata(Metadata metadata, Property property, Calendar calendar) {
        if (calendar != null) {
            metadata.set(property, calendar.getTime());
        }
    }

    private String decode(String str) {
        return PDFEncodedStringDecoder.shouldDecode(str) ? new PDFEncodedStringDecoder().decode(str) : str;
    }

    private void extractDublinCoreListItems(Metadata metadata, Property property, String str, e eVar) {
        if (str == null || str.length() <= 0) {
            return;
        }
        addMetadata(metadata, property, str);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r5v16, types: [cj.m] */
    private void extractMetadata(c cVar, Metadata metadata, ParseContext parseContext) {
        ParseContext parseContext2;
        mi.d dVar;
        if (cVar.C == null) {
            d1.d dVar2 = new d1.d(2);
            if (!dVar2.f4415b) {
                dVar2.c(11);
            }
            if (!dVar2.f4415b) {
                dVar2.c(5);
            }
            if (!dVar2.f4415b) {
                dVar2.c(10);
            }
            if (!dVar2.f4415b) {
                dVar2.c(9);
            }
            if (!dVar2.f4415b) {
                dVar2.c(4);
            }
            if (!dVar2.f4415b) {
                dVar2.c(6);
            }
            if (!dVar2.f4415b) {
                dVar2.c(3);
            }
            if (!dVar2.f4415b) {
                dVar2.c(12);
            }
            cVar.C = dVar2;
        }
        d1.d dVar3 = cVar.C;
        metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, Boolean.toString(dVar3.b(10)));
        metadata.set(AccessPermissions.EXTRACT_CONTENT, Boolean.toString(dVar3.b(5)));
        metadata.set(AccessPermissions.ASSEMBLE_DOCUMENT, Boolean.toString(dVar3.b(11)));
        metadata.set(AccessPermissions.FILL_IN_FORM, Boolean.toString(dVar3.b(9)));
        metadata.set(AccessPermissions.CAN_MODIFY, Boolean.toString(dVar3.b(4)));
        metadata.set(AccessPermissions.CAN_MODIFY_ANNOTATIONS, Boolean.toString(dVar3.b(6)));
        metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(dVar3.b(3)));
        metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(dVar3.b(12)));
        b r02 = cVar.a().f8298b.r0(i.f6897y4);
        if (r02 instanceof o) {
            ?? mVar = new m(2, (o) r02);
            parseContext2 = parseContext;
            dVar = mVar;
        } else {
            parseContext2 = parseContext;
            dVar = null;
        }
        Document loadDOM = loadDOM(dVar, parseContext2);
        ai.a aVar = loadDOM != null ? new ai.a(loadDOM) : null;
        if (aVar != null) {
            try {
                f.u(aVar.a(e.class));
            } catch (IOException unused) {
            }
            JempboxExtractor.extractXMPMM(aVar, metadata);
        }
        li.e eVar = cVar.f8296i;
        ii.e eVar2 = cVar.f8295b;
        if (eVar == null) {
            d dVar4 = eVar2.C;
            i iVar = i.T3;
            d dVar5 = (d) dVar4.r0(iVar);
            if (dVar5 == null) {
                dVar5 = new d();
                dVar4.C0(dVar5, iVar);
            }
            cVar.f8296i = new li.e(dVar5, 0);
        }
        li.e eVar3 = cVar.f8296i;
        metadata.set(PagedText.N_PAGES, cVar.a().b().f8308b.v0(i.f6861q2, null, 0));
        Property property = TikaCoreProperties.TITLE;
        d dVar6 = eVar3.f8302i;
        i iVar2 = i.H5;
        extractMultilingualItems(metadata, property, dVar6.y0(iVar2), null);
        Property property2 = PDF.DOC_INFO_TITLE;
        d dVar7 = eVar3.f8302i;
        addMetadata(metadata, property2, dVar7.y0(iVar2));
        Property property3 = TikaCoreProperties.CREATOR;
        i iVar3 = i.A1;
        extractDublinCoreListItems(metadata, property3, dVar7.y0(iVar3), null);
        addMetadata(metadata, PDF.DOC_INFO_CREATOR, dVar7.y0(iVar3));
        extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, null);
        Property property4 = TikaCoreProperties.CREATOR_TOOL;
        i iVar4 = i.f6868s2;
        addMetadata(metadata, property4, dVar7.y0(iVar4));
        addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, dVar7.y0(iVar4));
        Property property5 = TikaCoreProperties.KEYWORDS;
        i iVar5 = i.f6795b4;
        addMetadata(metadata, property5, dVar7.y0(iVar5));
        addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, dVar7.y0(iVar5));
        i iVar6 = i.f6792a5;
        addMetadata(metadata, "producer", dVar7.y0(iVar6));
        addMetadata(metadata, PDF.DOC_INFO_PRODUCER, dVar7.y0(iVar6));
        extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, null);
        Property property6 = PDF.DOC_INFO_SUBJECT;
        i iVar7 = i.D5;
        addMetadata(metadata, property6, dVar7.y0(iVar7));
        addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, dVar7.y0(iVar7));
        i iVar8 = i.M5;
        addMetadata(metadata, "trapped", dVar7.x0(iVar8));
        addMetadata(metadata, PDF.DOC_INFO_TRAPPED, dVar7.x0(iVar8));
        i iVar9 = i.f6864r2;
        addMetadata(metadata, "created", dVar7.q0(iVar9));
        addMetadata(metadata, PDF.DOC_INFO_CREATED, dVar7.q0(iVar9));
        addMetadata(metadata, TikaCoreProperties.CREATED, dVar7.q0(iVar9));
        i iVar10 = i.C4;
        GregorianCalendar q02 = dVar7.q0(iVar10);
        addMetadata(metadata, HttpHeaders.LAST_MODIFIED, q02);
        addMetadata(metadata, TikaCoreProperties.MODIFIED, q02);
        addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, dVar7.q0(iVar10));
        List asList = Arrays.asList(MSOffice.AUTHOR, "Creator", "CreationDate", "ModDate", MSOffice.KEYWORDS, "Producer", "Subject", "Title", "Trapped");
        for (i iVar11 : dVar7.f6780b.keySet()) {
            String str = iVar11.f6903b;
            if (!asList.contains(str)) {
                addMetadata(metadata, str, dVar7.r0(iVar11));
                addMetadata(metadata, b0.e(PDF.PDF_DOC_INFO_CUSTOM_PREFIX, str), dVar7.r0(iVar11));
            }
        }
        metadata.set(PDF.PDF_VERSION, Float.toString(eVar2.f6781b));
        metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=" + Float.toString(eVar2.f6781b));
        if (aVar != null) {
            try {
                if (!ai.b.class.isAssignableFrom(bi.a.class)) {
                    throw new IllegalArgumentException("Only XMPSchemas can be mapped to.");
                }
                aVar.f612b.put("http://www.aiim.org/pdfa/ns/id/", bi.a.class);
                f.u(aVar.a(bi.a.class));
            } catch (IOException e6) {
                metadata.set("X-TIKA:pdf:metadata-xmp-parse-failed", "" + e6);
            }
        }
        d dVar8 = (d) cVar.a().f8298b.r0(i.a("Extensions"));
        if (dVar8 != null) {
            for (i iVar12 : dVar8.f6780b.keySet()) {
                if (iVar12.equals(i.a("ADBE"))) {
                    d dVar9 = (d) dVar8.r0(iVar12);
                    if (dVar9 != null) {
                        String x02 = dVar9.x0(i.a("BaseVersion"));
                        int v02 = dVar9.v0(i.a("ExtensionLevel"), null, -1);
                        if (v02 != -1) {
                            metadata.set(PDF.PDF_EXTENSION_VERSION, x02 + " Adobe Extension Level " + v02);
                            metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString() + "; version=\"" + x02 + " Adobe Extension Level " + v02 + "\"");
                        }
                    }
                } else {
                    metadata.set("pdf:foundNonAdobeExtensionName", iVar12.f6903b);
                }
            }
        }
    }

    private void extractMultilingualItems(Metadata metadata, Property property, String str, ai.b bVar) {
        if (bVar == null) {
            if (str == null || str.length() <= 0) {
                return;
            }
            addMetadata(metadata, property, str);
            return;
        }
        String name = property.getName();
        ArrayList arrayList = new ArrayList();
        Element element = bVar.f613a;
        NodeList elementsByTagName = element.getElementsByTagName(name);
        if (elementsByTagName.getLength() > 0) {
            NodeList elementsByTagName2 = ((Element) elementsByTagName.item(0)).getElementsByTagName("rdf:Alt");
            if (elementsByTagName2.getLength() > 0) {
                NodeList elementsByTagName3 = ((Element) elementsByTagName2.item(0)).getElementsByTagName("rdf:li");
                for (int i4 = 0; i4 < elementsByTagName3.getLength(); i4++) {
                    String attribute = ((Element) elementsByTagName3.item(i4)).getAttribute("xml:lang");
                    if (attribute == null) {
                        arrayList.add("x-default");
                    } else {
                        arrayList.add(attribute);
                    }
                }
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            String name2 = property.getName();
            if (str2 == null) {
                str2 = "x-default";
            }
            NodeList elementsByTagName4 = element.getElementsByTagName(name2);
            String str3 = null;
            if (elementsByTagName4.getLength() > 0) {
                Element element2 = (Element) elementsByTagName4.item(0);
                NodeList elementsByTagName5 = element2.getElementsByTagName("rdf:Alt");
                if (elementsByTagName5.getLength() > 0) {
                    NodeList elementsByTagName6 = ((Element) elementsByTagName5.item(0)).getElementsByTagName("rdf:li");
                    for (int i10 = 0; i10 < elementsByTagName6.getLength() && str3 == null; i10++) {
                        Element element3 = (Element) elementsByTagName6.item(i10);
                        if (str2.equals(element3.getAttribute("xml:lang"))) {
                            str3 = b9.c.z(element3);
                        }
                    }
                } else if (element2.getChildNodes().getLength() == 1 && 3 == element2.getFirstChild().getNodeType()) {
                    str3 = element2.getFirstChild().getNodeValue();
                }
            }
            if (str3 != null && str3.length() > 0) {
                if (str == null || !str3.equals(str)) {
                    addMetadata(metadata, property, str3);
                    if (!property.isMultiValuePermitted()) {
                        return;
                    }
                }
            }
        }
        if (str == null || str.length() <= 0) {
            return;
        }
        if (property.isMultiValuePermitted() || metadata.get(property) == null) {
            addMetadata(metadata, property, str);
        }
    }

    private String getPassword(Metadata metadata, ParseContext parseContext) {
        PasswordProvider passwordProvider = (PasswordProvider) parseContext.get(PasswordProvider.class);
        String password = passwordProvider != null ? passwordProvider.getPassword(metadata) : null;
        if (password == null && metadata.get(PASSWORD) != null) {
            password = metadata.get(PASSWORD);
        }
        return password == null ? "" : password;
    }

    /* JADX WARN: Removed duplicated region for block: B:12:0x0050  */
    /* JADX WARN: Removed duplicated region for block: B:25:? A[RETURN, SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private java.util.List<java.lang.String> getXMPBagOrSeqList(ai.b r8, java.lang.String r9) {
        /*
            r7 = this;
            org.w3c.dom.Element r0 = r8.f613a
            org.w3c.dom.NodeList r0 = r0.getElementsByTagName(r9)
            int r1 = r0.getLength()
            r2 = 0
            java.lang.String r3 = "rdf:li"
            r4 = 0
            if (r1 <= 0) goto L4d
            org.w3c.dom.Node r0 = r0.item(r4)
            org.w3c.dom.Element r0 = (org.w3c.dom.Element) r0
            java.lang.String r1 = "rdf:Bag"
            org.w3c.dom.NodeList r0 = r0.getElementsByTagName(r1)
            int r1 = r0.getLength()
            if (r1 <= 0) goto L4d
            org.w3c.dom.Node r0 = r0.item(r4)
            org.w3c.dom.Element r0 = (org.w3c.dom.Element) r0
            java.util.ArrayList r1 = new java.util.ArrayList
            r1.<init>()
            org.w3c.dom.NodeList r0 = r0.getElementsByTagName(r3)
            r5 = r4
        L32:
            int r6 = r0.getLength()
            if (r5 >= r6) goto L48
            org.w3c.dom.Node r6 = r0.item(r5)
            org.w3c.dom.Element r6 = (org.w3c.dom.Element) r6
            java.lang.String r6 = b9.c.z(r6)
            r1.add(r6)
            int r5 = r5 + 1
            goto L32
        L48:
            java.util.List r0 = java.util.Collections.unmodifiableList(r1)
            goto L4e
        L4d:
            r0 = r2
        L4e:
            if (r0 != 0) goto L98
            org.w3c.dom.Element r8 = r8.f613a
            org.w3c.dom.NodeList r8 = r8.getElementsByTagName(r9)
            int r9 = r8.getLength()
            if (r9 <= 0) goto L97
            org.w3c.dom.Node r8 = r8.item(r4)
            org.w3c.dom.Element r8 = (org.w3c.dom.Element) r8
            java.lang.String r9 = "rdf:Seq"
            org.w3c.dom.NodeList r8 = r8.getElementsByTagName(r9)
            int r9 = r8.getLength()
            if (r9 <= 0) goto L97
            org.w3c.dom.Node r8 = r8.item(r4)
            org.w3c.dom.Element r8 = (org.w3c.dom.Element) r8
            java.util.ArrayList r9 = new java.util.ArrayList
            r9.<init>()
            org.w3c.dom.NodeList r8 = r8.getElementsByTagName(r3)
        L7d:
            int r0 = r8.getLength()
            if (r4 >= r0) goto L93
            org.w3c.dom.Node r0 = r8.item(r4)
            org.w3c.dom.Element r0 = (org.w3c.dom.Element) r0
            java.lang.String r0 = b9.c.z(r0)
            r9.add(r0)
            int r4 = r4 + 1
            goto L7d
        L93:
            java.util.List r2 = java.util.Collections.unmodifiableList(r9)
        L97:
            r0 = r2
        L98:
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: org.apache.tika.parser.pdf.PDFParser.getXMPBagOrSeqList(ai.b, java.lang.String):java.util.List");
    }

    private void handleXFAOnly(c cVar, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
        XFAExtractor xFAExtractor = new XFAExtractor();
        XHTMLContentHandler h10 = lh.b.h(contentHandler, metadata);
        try {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(cVar.a().a().a().b());
            try {
                xFAExtractor.extract(byteArrayInputStream, h10, metadata, parseContext);
                byteArrayInputStream.close();
                h10.endDocument();
            } finally {
            }
        } catch (XMLStreamException e6) {
            throw new TikaException("XML error in XFA", e6);
        }
    }

    private Document loadDOM(mi.d dVar, ParseContext parseContext) {
        if (dVar == null) {
            return null;
        }
        try {
            ve.b a10 = dVar.a();
            try {
                DocumentBuilder documentBuilder = parseContext.getDocumentBuilder();
                documentBuilder.setErrorHandler(null);
                Document parse = documentBuilder.parse(a10);
                a10.close();
                return parse;
            } catch (Throwable th2) {
                try {
                    throw th2;
                } catch (Throwable th3) {
                    try {
                        a10.close();
                    } catch (Throwable th4) {
                        th2.addSuppressed(th4);
                    }
                    throw th3;
                }
            }
        } catch (IOException | TikaException | SAXException unused) {
            return null;
        }
    }

    private boolean shouldHandleXFAOnly(c cVar, PDFParserConfig pDFParserConfig) {
        return (!pDFParserConfig.getIfXFAExtractOnlyXFA() || cVar.a() == null || cVar.a().a() == null || cVar.a().a().a() == null) ? false : true;
    }

    public boolean getEnableAutoSpace() {
        return this.defaultConfig.getEnableAutoSpace();
    }

    public boolean getExtractAnnotationText() {
        return this.defaultConfig.getExtractAnnotationText();
    }

    public PDFParserConfig getPDFParserConfig() {
        return this.defaultConfig;
    }

    public boolean getSortByPosition() {
        return this.defaultConfig.getSortByPosition();
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public boolean getSuppressDuplicateOverlappingText() {
        return this.defaultConfig.getSuppressDuplicateOverlappingText();
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
        c f10;
        File file;
        PDFParserConfig pDFParserConfig = (PDFParserConfig) parseContext.get(PDFParserConfig.class, this.defaultConfig);
        AutoCloseable autoCloseable = null;
        try {
            TikaInputStream cast = TikaInputStream.cast(inputStream);
            String password = getPassword(metadata, parseContext);
            if (cast == null || !cast.hasFile()) {
                f10 = c.f(new ff.a(inputStream), password);
            } else {
                file = cast.getPath().toFile();
                f10 = c.h(file, password);
            }
            c cVar = f10;
            Property property = PDF.IS_ENCRYPTED;
            d dVar = cVar.f8295b.C;
            boolean z5 = false;
            if (dVar != null && dVar.r0(i.f6790a3) != null) {
                z5 = true;
            }
            metadata.set(property, Boolean.toString(z5));
            metadata.set("Content-Type", MEDIA_TYPE.toString());
            extractMetadata(cVar, metadata, parseContext);
            pDFParserConfig.getAccessChecker().check(metadata);
            if (contentHandler != null) {
                if (shouldHandleXFAOnly(cVar, pDFParserConfig)) {
                    handleXFAOnly(cVar, contentHandler, metadata, parseContext);
                } else if (pDFParserConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) {
                    metadata.add("X-Parsed-By", TesseractOCRParser.class.toString());
                    OCR2XHTML.process(cVar, contentHandler, parseContext, metadata, pDFParserConfig);
                } else {
                    if (pDFParserConfig.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
                        metadata.add("X-Parsed-By", TesseractOCRParser.class.toString());
                    }
                    PDF2XHTML.process(cVar, contentHandler, parseContext, metadata, pDFParserConfig);
                }
            }
            cVar.close();
        } catch (Throwable th2) {
            if (0 != 0) {
                autoCloseable.close();
            }
            throw th2;
        }
    }

    public void setEnableAutoSpace(boolean z5) {
        this.defaultConfig.setEnableAutoSpace(z5);
    }

    public void setExtractAnnotationText(boolean z5) {
        this.defaultConfig.setExtractAnnotationText(z5);
    }

    @Field
    public void setOcrImageType(String str) {
        this.defaultConfig.setOcrImageType(str);
    }

    @Field
    public void setOcrStrategy(String str) {
        this.defaultConfig.setOcrStrategy(str);
    }

    public void setPDFParserConfig(PDFParserConfig pDFParserConfig) {
        this.defaultConfig = pDFParserConfig;
    }

    @Field
    public void setSortByPosition(boolean z5) {
        this.defaultConfig.setSortByPosition(z5);
    }

    public void setSuppressDuplicateOverlappingText(boolean z5) {
        this.defaultConfig.setSuppressDuplicateOverlappingText(z5);
    }
}
