package opennlp.tools.tokenize;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;
import opennlp.tools.ml.model.Event;
import opennlp.tools.tokenize.lang.Factory;
import opennlp.tools.util.AbstractEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: classes2.dex */
public class TokSpanEventStream extends AbstractEventStream<TokenSample> {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) TokSpanEventStream.class);
    private final Pattern alphaNumeric;
    private final TokenContextGenerator cg;
    private final boolean skipAlphaNumerics;

    public TokSpanEventStream(ObjectStream<TokenSample> objectStream, boolean z2) {
        this(objectStream, z2, new DefaultTokenContextGenerator());
    }

    public TokSpanEventStream(ObjectStream<TokenSample> objectStream, boolean z2, Pattern pattern, TokenContextGenerator tokenContextGenerator) {
        super(objectStream);
        this.alphaNumeric = pattern;
        this.skipAlphaNumerics = z2;
        this.cg = tokenContextGenerator;
    }

    public TokSpanEventStream(ObjectStream<TokenSample> objectStream, boolean z2, TokenContextGenerator tokenContextGenerator) {
        this(objectStream, z2, new Factory().getAlphanumeric(null), tokenContextGenerator);
    }

    @Override // opennlp.tools.util.AbstractEventStream
    public Iterator<Event> createEvents(TokenSample tokenSample) {
        Span[] spanArr;
        int i2;
        String str;
        Span[] spanArr2;
        String str2;
        Span[] spanArr3;
        int i3;
        int i4 = 1;
        ArrayList arrayList = new ArrayList(50);
        Span[] tokenSpans = tokenSample.getTokenSpans();
        String text = tokenSample.getText();
        if (tokenSpans.length > 0) {
            int start = tokenSpans[0].getStart();
            String substring = text.substring(start, tokenSpans[tokenSpans.length - 1].getEnd());
            Span[] spanArr4 = WhitespaceTokenizer.INSTANCE.tokenizePos(substring);
            int length = spanArr4.length;
            int i5 = -1;
            int i6 = -1;
            int i7 = 0;
            while (i7 < length) {
                Span span = spanArr4[i7];
                String substring2 = substring.substring(span.getStart(), span.getEnd());
                Span span2 = new Span(span.getStart() + start, span.getEnd() + start);
                if (substring2.length() <= i4 || (this.skipAlphaNumerics && this.alphaNumeric.matcher(substring2).matches())) {
                    spanArr = tokenSpans;
                    i2 = start;
                    str = substring;
                    spanArr2 = spanArr4;
                } else {
                    int i8 = i5 + 1;
                    boolean z2 = false;
                    while (true) {
                        if (i8 >= tokenSpans.length) {
                            i2 = start;
                            break;
                        }
                        if (span2.contains(tokenSpans[i8])) {
                            if (!z2) {
                                i6 = i8;
                                z2 = true;
                            }
                            i5 = i8;
                            i2 = start;
                        } else {
                            i2 = start;
                            if (span2.getEnd() < tokenSpans[i8].getEnd()) {
                                break;
                            }
                            if (tokenSpans[i8].getEnd() >= span2.getStart()) {
                                Logger logger2 = logger;
                                Span span3 = tokenSpans[i8];
                                str2 = substring;
                                spanArr3 = spanArr4;
                                String substring3 = text.substring(span3.getStart(), tokenSpans[i8].getEnd());
                                i3 = 1;
                                logger2.warn("Bad training token: {} cand: {} token={}", span3, span2, substring3);
                                i8 += i3;
                                start = i2;
                                substring = str2;
                                spanArr4 = spanArr3;
                            }
                        }
                        str2 = substring;
                        spanArr3 = spanArr4;
                        i3 = 1;
                        i8 += i3;
                        start = i2;
                        substring = str2;
                        spanArr4 = spanArr3;
                    }
                    str = substring;
                    spanArr2 = spanArr4;
                    int i9 = 1;
                    if (z2) {
                        int i10 = i6;
                        while (i10 <= i5) {
                            Span span4 = tokenSpans[i10];
                            int start2 = span2.getStart();
                            int start3 = span4.getStart() + i9;
                            while (start3 < span4.getEnd()) {
                                arrayList.add(new Event(TokenizerME.NO_SPLIT, this.cg.getContext(substring2, start3 - start2)));
                                start3++;
                                tokenSpans = tokenSpans;
                            }
                            Span[] spanArr5 = tokenSpans;
                            if (span4.getEnd() != span2.getEnd()) {
                                arrayList.add(new Event("T", this.cg.getContext(substring2, span4.getEnd() - start2)));
                            }
                            i10++;
                            tokenSpans = spanArr5;
                            i9 = 1;
                        }
                    }
                    spanArr = tokenSpans;
                    i4 = 1;
                }
                i7 += i4;
                start = i2;
                substring = str;
                tokenSpans = spanArr;
                spanArr4 = spanArr2;
            }
        }
        return arrayList.iterator();
    }
}
