package org.apache.pdfbox.tools;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Map;
import opennlp.tools.parser.Parse;
import org.apache.pdfbox.cos.COSInputStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.text.PDFTextStripper;

/* loaded from: classes9.dex */
public final class ExtractText {
    private static final String CONSOLE = "-console";
    private static final String DEBUG = "-debug";
    private static final String ENCODING = "-encoding";
    private static final String END_PAGE = "-endPage";
    private static final String HTML = "-html";
    private static final String IGNORE_BEADS = "-ignoreBeads";
    private static final String PASSWORD = "-password";
    private static final String SORT = "-sort";
    private static final String START_PAGE = "-startPage";
    private static final String STD_ENCODING = "UTF-8";
    private boolean debug = false;

    private ExtractText() {
    }

    public static void main(String[] strArr) throws IOException {
        System.setProperty("apple.awt.UIElement", "true");
        new ExtractText().startExtraction(strArr);
    }

    private long startProcessing(String str) {
        if (this.debug) {
            System.err.println(str);
        }
        return System.currentTimeMillis();
    }

    private void stopProcessing(String str, long j) {
        if (this.debug) {
            System.err.println(str + (((float) (System.currentTimeMillis() - j)) / 1000.0f) + " seconds");
        }
    }

    private static void usage() {
        System.err.println("Usage: java -jar pdfbox-app-x.y.z.jar ExtractText [options] <inputfile> [output-text-file]\n\nOptions:\n  -password  <password>        : Password to decrypt document\n  -encoding  <output encoding> : UTF-8 (default) or ISO-8859-1, UTF-16BE, UTF-16LE, etc.\n  -console                     : Send text to console instead of file\n  -html                        : Output in HTML format instead of raw text\n  -sort                        : Sort the text before writing\n  -ignoreBeads                 : Disables the separation by beads\n  -debug                       : Enables debug output about the time consumption of every stage\n  -startPage <number>          : The first page to start extraction(1 based)\n  -endPage <number>            : The last page to extract(inclusive)\n  <inputfile>                  : The PDF document to use\n  [output-text-file]           : The file to write the text to");
        System.exit(1);
    }

    public void startExtraction(String[] strArr) throws IOException {
        PDDocument pDDocument;
        OutputStreamWriter outputStreamWriter;
        int i;
        OutputStreamWriter outputStreamWriter2;
        PDEmbeddedFilesNameTreeNode embeddedFiles;
        Map<String, PDComplexFileSpecification> names;
        int i2;
        String str = "";
        String str2 = "UTF-8";
        String str3 = ".txt";
        int i3 = Integer.MAX_VALUE;
        String str4 = "UTF-8";
        boolean z = true;
        int i4 = 0;
        String str5 = null;
        String str6 = null;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        int i5 = 1;
        while (i4 < strArr.length) {
            if (strArr[i4].equals(PASSWORD)) {
                i4++;
                if (i4 >= strArr.length) {
                    usage();
                }
                str = strArr[i4];
            } else if (strArr[i4].equals(ENCODING)) {
                i4++;
                if (i4 >= strArr.length) {
                    usage();
                }
                str4 = strArr[i4];
            } else if (strArr[i4].equals(START_PAGE)) {
                i4++;
                if (i4 >= strArr.length) {
                    usage();
                }
                i5 = Integer.parseInt(strArr[i4]);
            } else {
                if (strArr[i4].equals(HTML)) {
                    str3 = ".html";
                    i2 = 1;
                    z3 = true;
                } else if (strArr[i4].equals(SORT)) {
                    i2 = 1;
                    z4 = true;
                } else if (strArr[i4].equals(IGNORE_BEADS)) {
                    i2 = 1;
                    z = false;
                } else if (strArr[i4].equals(DEBUG)) {
                    i2 = 1;
                    this.debug = true;
                } else if (strArr[i4].equals(END_PAGE)) {
                    i4++;
                    if (i4 >= strArr.length) {
                        usage();
                    }
                    i3 = Integer.parseInt(strArr[i4]);
                } else if (strArr[i4].equals(CONSOLE)) {
                    i2 = 1;
                    z2 = true;
                } else if (str5 == null) {
                    str5 = strArr[i4];
                } else {
                    str6 = strArr[i4];
                }
                i4 += i2;
            }
            i2 = 1;
            i4 += i2;
        }
        if (str5 == null) {
            usage();
            return;
        }
        try {
            long startProcessing = startProcessing("Loading PDF " + str5);
            if (str6 != null || str5.length() <= 4) {
                i = i3;
            } else {
                i = i3;
                str6 = new File(str5.substring(0, str5.length() - 4) + str3).getAbsolutePath();
            }
            pDDocument = PDDocument.load(new File(str5), str);
            try {
                if (!pDDocument.getCurrentAccessPermission().canExtractContent()) {
                    throw new IOException("You do not have permission to extract text");
                }
                stopProcessing("Time for loading: ", startProcessing);
                if (z2) {
                    outputStreamWriter2 = new OutputStreamWriter(System.out, str4);
                } else {
                    if (!z3 || "UTF-8".equals(str4)) {
                        str2 = str4;
                    } else {
                        System.out.println("The encoding parameter is ignored when writing html output.");
                    }
                    outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(str6), str2);
                }
                OutputStreamWriter outputStreamWriter3 = outputStreamWriter2;
                try {
                    PDFTextStripper pDFText2HTML = z3 ? new PDFText2HTML() : new PDFTextStripper();
                    pDFText2HTML.setSortByPosition(z4);
                    pDFText2HTML.setShouldSeparateByBeads(z);
                    pDFText2HTML.setStartPage(i5);
                    pDFText2HTML.setEndPage(i);
                    long startProcessing2 = startProcessing("Starting text extraction");
                    if (this.debug) {
                        System.err.println("Writing to " + str6);
                    }
                    pDFText2HTML.writeText(pDDocument, outputStreamWriter3);
                    PDDocumentNameDictionary names2 = pDDocument.getDocumentCatalog().getNames();
                    if (names2 != null && (embeddedFiles = names2.getEmbeddedFiles()) != null && (names = embeddedFiles.getNames()) != null) {
                        for (Map.Entry<String, PDComplexFileSpecification> entry : names.entrySet()) {
                            if (this.debug) {
                                System.err.println("Processing embedded file " + entry.getKey() + ":");
                            }
                            PDEmbeddedFile embeddedFile = entry.getValue().getEmbeddedFile();
                            if (embeddedFile != null && "application/pdf".equals(embeddedFile.getSubtype())) {
                                if (this.debug) {
                                    System.err.println("  is PDF (size=" + embeddedFile.getSize() + Parse.BRACKET_RRB);
                                }
                                COSInputStream createInputStream = embeddedFile.createInputStream();
                                try {
                                    PDDocument load = PDDocument.load(createInputStream);
                                    try {
                                        pDFText2HTML.writeText(load, outputStreamWriter3);
                                        IOUtils.closeQuietly(load);
                                    } catch (Throwable th) {
                                        IOUtils.closeQuietly(load);
                                        throw th;
                                    }
                                } finally {
                                    createInputStream.close();
                                }
                            }
                        }
                    }
                    stopProcessing("Time for extraction: ", startProcessing2);
                    IOUtils.closeQuietly(outputStreamWriter3);
                    IOUtils.closeQuietly(pDDocument);
                } catch (Throwable th2) {
                    th = th2;
                    outputStreamWriter = outputStreamWriter3;
                    IOUtils.closeQuietly(outputStreamWriter);
                    IOUtils.closeQuietly(pDDocument);
                    throw th;
                }
            } catch (Throwable th3) {
                th = th3;
                outputStreamWriter = null;
                IOUtils.closeQuietly(outputStreamWriter);
                IOUtils.closeQuietly(pDDocument);
                throw th;
            }
        } catch (Throwable th4) {
            th = th4;
            pDDocument = null;
        }
    }
}
