package de.l3s.boilerpipe.sax;

import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/* loaded from: classes.dex */
public abstract class b implements ContentHandler {
    static final String ANCHOR_TEXT_END = ">\ue00a$";
    static final String ANCHOR_TEXT_START = "$\ue00a<";
    private static final Pattern PAT_VALID_WORD_CHARACTER = Pattern.compile("[\\p{L}\\p{Nd}\\p{Nl}\\p{No}]");
    private BitSet currentContainedTextElements;
    private boolean flush;
    LinkedList<Integer> fontSizeStack;
    int inAnchor;
    boolean inAnchorText;
    int inBody;
    int inIgnorableElement;
    LinkedList<Object> labelStack;
    private String lastEndTag;
    private a lastEvent;
    private String lastStartTag;
    private int offsetBlocks;
    boolean sbLastWasWhitespace;
    private final Map<String, f> tagActions;
    private final List<lb.a> textBlocks;
    StringBuilder textBuffer;
    private int textElementIdx;
    private String title;
    StringBuilder tokenBuffer;

    public b() {
        e eVar = e.f4666b;
        this.title = null;
        this.tokenBuffer = new StringBuilder();
        this.textBuffer = new StringBuilder();
        this.inBody = 0;
        this.inAnchor = 0;
        this.inIgnorableElement = 0;
        this.sbLastWasWhitespace = false;
        this.textElementIdx = 0;
        this.textBlocks = new ArrayList();
        this.lastStartTag = null;
        this.lastEndTag = null;
        this.lastEvent = null;
        this.offsetBlocks = 0;
        this.currentContainedTextElements = new BitSet();
        this.flush = false;
        this.inAnchorText = false;
        this.labelStack = new LinkedList<>();
        this.fontSizeStack = new LinkedList<>();
        this.tagActions = eVar;
    }

    public void addTextBlock(lb.a aVar) {
        Iterator<Integer> it = this.fontSizeStack.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Integer next = it.next();
            if (next != null) {
                String str = "font-" + next;
                if (aVar.f8280n == null) {
                    aVar.f8280n = new HashSet(2);
                }
                aVar.f8280n.add(str);
            }
        }
        Iterator<Object> it2 = this.labelStack.iterator();
        while (it2.hasNext()) {
            com.google.android.gms.internal.mlkit_vision_text_common.a.H(it2.next());
        }
        this.textBlocks.add(aVar);
    }

    public void addWhitespaceIfNecessary() {
        if (this.sbLastWasWhitespace) {
            return;
        }
        this.tokenBuffer.append(' ');
        this.textBuffer.append(' ');
        this.sbLastWasWhitespace = true;
    }

    @Override // org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i4, int i10) {
        this.textElementIdx++;
        if (this.flush) {
            flushBlock();
            this.flush = false;
        }
        if (this.inIgnorableElement == 0 && i10 != 0) {
            int i11 = i4 + i10;
            for (int i12 = i4; i12 < i11; i12++) {
                if (Character.isWhitespace(cArr[i12])) {
                    cArr[i12] = ' ';
                }
            }
            boolean z5 = false;
            while (i4 < i11 && cArr[i4] == ' ') {
                i4++;
                i10--;
                z5 = true;
            }
            boolean z10 = false;
            while (i10 > 0 && cArr[(i4 + i10) - 1] == ' ') {
                i10--;
                z10 = true;
            }
            if (i10 == 0) {
                if (z5 || z10) {
                    if (!this.sbLastWasWhitespace) {
                        this.textBuffer.append(' ');
                        this.tokenBuffer.append(' ');
                    }
                    this.sbLastWasWhitespace = true;
                } else {
                    this.sbLastWasWhitespace = false;
                }
                this.lastEvent = a.A;
                return;
            }
            if (z5 && !this.sbLastWasWhitespace) {
                this.textBuffer.append(' ');
                this.tokenBuffer.append(' ');
            }
            this.textBuffer.append(cArr, i4, i10);
            this.tokenBuffer.append(cArr, i4, i10);
            if (z10) {
                this.textBuffer.append(' ');
                this.tokenBuffer.append(' ');
            }
            this.sbLastWasWhitespace = z10;
            this.lastEvent = a.f4659n;
            this.currentContainedTextElements.set(this.textElementIdx);
        }
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    @Override // org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) {
        f fVar = this.tagActions.get(str2);
        boolean z5 = true;
        if (fVar != null) {
            switch (((c) fVar).f4660a) {
                case 0:
                    this.inIgnorableElement--;
                    break;
                case 1:
                    int i4 = this.inAnchor - 1;
                    this.inAnchor = i4;
                    if (i4 == 0 && this.inIgnorableElement == 0) {
                        addWhitespaceIfNecessary();
                        this.tokenBuffer.append(ANCHOR_TEXT_END);
                        this.tokenBuffer.append(' ');
                        this.sbLastWasWhitespace = true;
                    }
                    z5 = false;
                    break;
                case 2:
                    flushBlock();
                    this.inBody--;
                    z5 = false;
                    break;
                case 3:
                default:
                    z5 = false;
                    break;
                case 4:
                    addWhitespaceIfNecessary();
                    z5 = false;
                    break;
            }
            this.flush |= z5;
        } else {
            this.flush = true;
        }
        this.lastEvent = a.f4658i;
        this.lastEndTag = str2;
    }

    @Override // org.xml.sax.ContentHandler
    public void endPrefixMapping(String str) {
    }

    public void flushBlock() {
        int i4;
        int i10;
        if (this.inBody == 0) {
            if ("TITLE".equalsIgnoreCase(this.lastStartTag) && this.inBody == 0) {
                setTitle(this.tokenBuffer.toString().trim());
            }
            this.textBuffer.setLength(0);
            this.tokenBuffer.setLength(0);
            return;
        }
        int length = this.tokenBuffer.length();
        if (length != 0) {
            if (length == 1 && this.sbLastWasWhitespace) {
                this.textBuffer.setLength(0);
                this.tokenBuffer.setLength(0);
                return;
            }
            int i11 = -1;
            int i12 = 0;
            int i13 = 0;
            int i14 = 0;
            int i15 = 0;
            int i16 = 0;
            for (String str : ob.a.f9431b.matcher(ob.a.f9430a.matcher(this.tokenBuffer).replaceAll("\u2063")).replaceAll("$1").replaceAll("[ \u2063]+", MinimalPrettyPrinter.DEFAULT_ROOT_VALUE_SEPARATOR).trim().split("[ ]+")) {
                if (ANCHOR_TEXT_START.equals(str)) {
                    this.inAnchorText = true;
                } else if (ANCHOR_TEXT_END.equals(str)) {
                    this.inAnchorText = false;
                } else {
                    i12++;
                    if (PAT_VALID_WORD_CHARACTER.matcher(str).find()) {
                        i15++;
                        i14++;
                        if (this.inAnchorText) {
                            i16++;
                        }
                        int length2 = str.length();
                        int i17 = length2 + 1 + i11;
                        if (i17 > 80) {
                            i13++;
                            i14 = 1;
                        } else {
                            length2 = i17;
                        }
                        i11 = length2;
                    }
                }
            }
            if (i12 == 0) {
                return;
            }
            if (i13 == 0) {
                i10 = 1;
                i4 = i15;
            } else {
                i4 = i15 - i14;
                i10 = i13;
            }
            lb.a aVar = new lb.a(this.textBuffer.toString().trim(), this.currentContainedTextElements, i15, i16, i4, i10, this.offsetBlocks);
            this.currentContainedTextElements = new BitSet();
            this.offsetBlocks++;
            this.textBuffer.setLength(0);
            this.tokenBuffer.setLength(0);
            addTextBlock(aVar);
        }
    }

    public List<lb.a> getTextBlocks() {
        return this.textBlocks;
    }

    public String getTitle() {
        return this.title;
    }

    @Override // org.xml.sax.ContentHandler
    public void ignorableWhitespace(char[] cArr, int i4, int i10) {
        if (!this.sbLastWasWhitespace) {
            this.textBuffer.append(' ');
            this.tokenBuffer.append(' ');
        }
        this.sbLastWasWhitespace = true;
    }

    @Override // org.xml.sax.ContentHandler
    public void processingInstruction(String str, String str2) {
    }

    public void recycle() {
        this.tokenBuffer.setLength(0);
        this.textBuffer.setLength(0);
        this.inBody = 0;
        this.inAnchor = 0;
        this.inIgnorableElement = 0;
        this.sbLastWasWhitespace = false;
        this.textElementIdx = 0;
        this.textBlocks.clear();
        this.lastStartTag = null;
        this.lastEndTag = null;
        this.lastEvent = null;
        this.offsetBlocks = 0;
        this.currentContainedTextElements.clear();
        this.flush = false;
        this.inAnchorText = false;
    }

    @Override // org.xml.sax.ContentHandler
    public void setDocumentLocator(Locator locator) {
    }

    public void setTitle(String str) {
        if (str == null || str.length() == 0) {
            return;
        }
        this.title = str;
    }

    @Override // org.xml.sax.ContentHandler
    public void skippedEntity(String str) {
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:4:0x0010. Please report as an issue. */
    @Override // org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) {
        f fVar = this.tagActions.get(str2);
        boolean z5 = true;
        if (fVar != null) {
            switch (((c) fVar).f4660a) {
                case 0:
                    this.inIgnorableElement++;
                    this.flush |= z5;
                    break;
                case 1:
                    int i4 = this.inAnchor;
                    this.inAnchor = i4 + 1;
                    if (i4 != 0) {
                        throw new SAXException("SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again");
                    }
                    if (this.inIgnorableElement == 0) {
                        addWhitespaceIfNecessary();
                        this.tokenBuffer.append(ANCHOR_TEXT_START);
                        this.tokenBuffer.append(' ');
                        this.sbLastWasWhitespace = true;
                    }
                    z5 = false;
                    this.flush |= z5;
                    break;
                case 2:
                    flushBlock();
                    this.inBody++;
                    z5 = false;
                    this.flush |= z5;
                    break;
                case 3:
                default:
                    z5 = false;
                    this.flush |= z5;
                    break;
                case 4:
                    addWhitespaceIfNecessary();
                    z5 = false;
                    this.flush |= z5;
                    break;
            }
        } else {
            this.flush = true;
        }
        this.lastEvent = a.f4657b;
        this.lastStartTag = str2;
    }

    public lb.b toTextDocument() {
        flushBlock();
        getTitle();
        return new lb.b(getTextBlocks());
    }
}
