package org.apache.tika.parser.html;

import de.l3s.boilerpipe.sax.b;
import java.io.Writer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import kb.a;
import org.apache.tika.sax.WriteOutContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: classes3.dex */
public class BoilerpipeContentHandler extends b {
    private static final char[] NL = {'\n'};
    private ContentHandler delegate;
    private List<RecordedElement> elements;
    private a extractor;
    private int headerCharOffset;
    private boolean inFooter;
    private boolean inHeader;
    private boolean includeMarkup;

    /* renamed from: td, reason: collision with root package name */
    private lb.b f9927td;

    /* renamed from: org.apache.tika.parser.html.BoilerpipeContentHandler$1, reason: invalid class name */
    /* loaded from: classes3.dex */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$tika$parser$html$BoilerpipeContentHandler$RecordedElement$ElementType;

        static {
            int[] iArr = new int[RecordedElement.ElementType.values().length];
            $SwitchMap$org$apache$tika$parser$html$BoilerpipeContentHandler$RecordedElement$ElementType = iArr;
            try {
                iArr[RecordedElement.ElementType.START.ordinal()] = 1;
            } catch (NoSuchFieldError unused) {
            }
            try {
                $SwitchMap$org$apache$tika$parser$html$BoilerpipeContentHandler$RecordedElement$ElementType[RecordedElement.ElementType.CONTINUE.ordinal()] = 2;
            } catch (NoSuchFieldError unused2) {
            }
            try {
                $SwitchMap$org$apache$tika$parser$html$BoilerpipeContentHandler$RecordedElement$ElementType[RecordedElement.ElementType.END.ordinal()] = 3;
            } catch (NoSuchFieldError unused3) {
            }
        }
    }

    /* loaded from: classes3.dex */
    public static class RecordedElement {
        private Attributes attrs;
        private List<char[]> characters;
        private ElementType elementType;
        private String localName;
        private String qName;
        private String uri;

        /* loaded from: classes3.dex */
        public enum ElementType {
            START,
            END,
            CONTINUE
        }

        public RecordedElement() {
            this(null, null, null, null, ElementType.CONTINUE);
        }

        public RecordedElement(String str, String str2, String str3) {
            this(str, str2, str3, null, ElementType.END);
        }

        public RecordedElement(String str, String str2, String str3, Attributes attributes) {
            this(str, str2, str3, attributes, ElementType.START);
        }

        public RecordedElement(String str, String str2, String str3, Attributes attributes, ElementType elementType) {
            this.uri = str;
            this.localName = str2;
            this.qName = str3;
            this.attrs = attributes;
            this.elementType = elementType;
            this.characters = new ArrayList();
        }

        public Attributes getAttrs() {
            return this.attrs;
        }

        public List<char[]> getCharacters() {
            return this.characters;
        }

        public ElementType getElementType() {
            return this.elementType;
        }

        public String getLocalName() {
            return this.localName;
        }

        public String getQName() {
            return this.qName;
        }

        public String getUri() {
            return this.uri;
        }

        public String toString() {
            return String.format(Locale.ROOT, "<%s> of type %s", this.localName, this.elementType);
        }
    }

    public BoilerpipeContentHandler(Writer writer) {
        this(new WriteOutContentHandler(writer));
    }

    public BoilerpipeContentHandler(ContentHandler contentHandler) {
        this(contentHandler, mb.a.f8620a);
    }

    public BoilerpipeContentHandler(ContentHandler contentHandler, a aVar) {
        this.f9927td = null;
        this.delegate = contentHandler;
        this.extractor = aVar;
    }

    @Override // de.l3s.boilerpipe.sax.b, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i4, int i10) {
        super.characters(cArr, i4, i10);
        if (this.inHeader) {
            this.delegate.characters(cArr, i4, i10);
            this.headerCharOffset++;
        } else if (!this.inFooter && this.includeMarkup) {
            RecordedElement recordedElement = (RecordedElement) com.google.android.gms.internal.mlkit_vision_text_common.a.r(this.elements, 1);
            char[] cArr2 = new char[i10];
            System.arraycopy(cArr, i4, cArr2, 0, i10);
            recordedElement.getCharacters().add(cArr2);
        }
    }

    @Override // org.xml.sax.ContentHandler
    public void endDocument() {
        BitSet bitSet;
        flushBlock();
        lb.b textDocument = toTextDocument();
        this.f9927td = textDocument;
        ((mb.a) this.extractor).getClass();
        List list = textDocument.f8281a;
        if (list.size() >= 2) {
            lb.a aVar = (lb.a) list.get(0);
            ListIterator listIterator = list.listIterator(1);
            while (listIterator.hasNext()) {
                lb.a aVar2 = (lb.a) listIterator.next();
                if (aVar.O == aVar2.O) {
                    aVar.b(aVar2);
                    listIterator.remove();
                } else {
                    aVar = aVar2;
                }
            }
        }
        List list2 = textDocument.f8281a;
        if (list2.size() >= 2) {
            lb.a aVar3 = (lb.a) list2.get(0);
            ListIterator listIterator2 = list2.listIterator(1);
            while (listIterator2.hasNext()) {
                lb.a aVar4 = (lb.a) listIterator2.next();
                if (aVar4.f8278b && (aVar4.A - aVar3.C) - 1 <= 1) {
                    aVar3.b(aVar4);
                    listIterator2.remove();
                } else {
                    aVar3 = aVar4;
                }
            }
        }
        ListIterator listIterator3 = list2.listIterator();
        if (listIterator3.hasNext()) {
            lb.a aVar5 = lb.a.Z;
            lb.a aVar6 = (lb.a) listIterator3.next();
            lb.a aVar7 = listIterator3.hasNext() ? (lb.a) listIterator3.next() : aVar5;
            nb.a.a(aVar5, aVar6, aVar7);
            if (aVar7 != aVar5) {
                while (listIterator3.hasNext()) {
                    lb.a aVar8 = (lb.a) listIterator3.next();
                    nb.a.a(aVar6, aVar7, aVar8);
                    aVar6 = aVar7;
                    aVar7 = aVar8;
                }
                nb.a.a(aVar6, aVar7, lb.a.Z);
            }
        }
        AttributesImpl attributesImpl = new AttributesImpl();
        if (this.includeMarkup) {
            BitSet bitSet2 = new BitSet();
            for (lb.a aVar9 : this.f9927td.f8281a) {
                if (aVar9.f8278b && (bitSet = aVar9.Y) != null) {
                    bitSet2.or(bitSet);
                }
            }
            int i4 = this.headerCharOffset;
            for (RecordedElement recordedElement : this.elements) {
                int i10 = AnonymousClass1.$SwitchMap$org$apache$tika$parser$html$BoilerpipeContentHandler$RecordedElement$ElementType[recordedElement.getElementType().ordinal()];
                if (i10 == 1) {
                    this.delegate.startElement(recordedElement.getUri(), recordedElement.getLocalName(), recordedElement.getQName(), recordedElement.getAttrs());
                } else if (i10 != 2) {
                    if (i10 != 3) {
                        throw new RuntimeException("Unhandled element type: " + recordedElement.getElementType());
                    }
                    this.delegate.endElement(recordedElement.getUri(), recordedElement.getLocalName(), recordedElement.getQName());
                }
                for (char[] cArr : recordedElement.getCharacters()) {
                    i4++;
                    if (bitSet2.get(i4)) {
                        this.delegate.characters(cArr, 0, cArr.length);
                        if (!Character.isWhitespace(cArr[cArr.length - 1]) && XHTMLContentHandler.ENDLINE.contains(recordedElement.getLocalName())) {
                            ContentHandler contentHandler = this.delegate;
                            char[] cArr2 = NL;
                            contentHandler.ignorableWhitespace(cArr2, 0, cArr2.length);
                        }
                    }
                }
            }
        } else {
            for (lb.a aVar10 : this.f9927td.f8281a) {
                if (aVar10.f8278b) {
                    this.delegate.startElement(XHTMLContentHandler.XHTML, "p", "p", attributesImpl);
                    char[] charArray = aVar10.f8279i.toString().toCharArray();
                    this.delegate.characters(charArray, 0, charArray.length);
                    this.delegate.endElement(XHTMLContentHandler.XHTML, "p", "p");
                    ContentHandler contentHandler2 = this.delegate;
                    char[] cArr3 = NL;
                    contentHandler2.ignorableWhitespace(cArr3, 0, cArr3.length);
                }
            }
        }
        this.delegate.endElement(XHTMLContentHandler.XHTML, "body", "body");
        this.delegate.endElement(XHTMLContentHandler.XHTML, "html", "html");
        this.delegate.endPrefixMapping("");
        this.delegate.endDocument();
    }

    @Override // de.l3s.boilerpipe.sax.b, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) {
        super.endElement(str, str2, str3);
        if (this.inHeader) {
            this.delegate.endElement(str, str2, str3);
            this.inHeader = !str2.equals("head");
        } else {
            if (this.inFooter) {
                return;
            }
            if (str2.equals("body")) {
                this.inFooter = true;
            } else if (this.includeMarkup) {
                this.elements.add(new RecordedElement(str, str2, str3));
                this.elements.add(new RecordedElement());
            }
        }
    }

    public lb.b getTextDocument() {
        return this.f9927td;
    }

    public boolean isIncludeMarkup() {
        return this.includeMarkup;
    }

    public void setIncludeMarkup(boolean z5) {
        this.includeMarkup = z5;
    }

    @Override // org.xml.sax.ContentHandler
    public void startDocument() {
        this.delegate.startDocument();
        this.inHeader = true;
        this.inFooter = false;
        this.headerCharOffset = 0;
        if (this.includeMarkup) {
            this.elements = new ArrayList();
        }
    }

    @Override // de.l3s.boilerpipe.sax.b, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) {
        super.startElement(str, str2, str3, attributes);
        if (!this.inHeader) {
            if (this.inFooter) {
                return;
            }
            if (this.includeMarkup) {
                this.elements.add(new RecordedElement(str, str2, str3, attributes));
                return;
            }
        }
        this.delegate.startElement(str, str2, str3, attributes);
    }

    @Override // org.xml.sax.ContentHandler
    public void startPrefixMapping(String str, String str2) {
        this.delegate.startPrefixMapping(str, str2);
    }
}
