package opennlp.tools.formats.ad;

import androidx.constraintlayout.core.motion.utils.TypedValues;
import androidx.core.app.NotificationCompat;
import androidx.exifinterface.media.ExifInterface;
import com.applovin.sdk.AppLovinEventTypes;
import com.caverock.androidsvg.AbstractC0404c;
import io.sentry.SentryLockReason;
import io.sentry.protocol.Device;
import io.sentry.rrweb.RRWebVideoEvent;
import j$.util.DesugarCollections;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.commons.Internal;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;

@Internal
/* loaded from: classes2.dex */
public class ADNameSampleStream implements ObjectStream<NameSample> {
    private static final Map<String, String> HAREM;
    private final ObjectStream<ADSentenceStream.Sentence> adSentenceStream;
    private String leftContractionPart;
    private final boolean splitHyphenatedTokens;
    private int textID;
    private static final Pattern TAG_PATTERN = Pattern.compile("<(NER:)?(.*?)>");
    private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
    private static final Pattern UNDERLINE_PATTERN = Pattern.compile("[_]+");
    private static final Pattern HYPHEN_PATTERN = Pattern.compile("((\\p{L}+)-$)|(^-(\\p{L}+)(.*))|((\\p{L}+)-(\\p{L}+)(.*))");
    private static final Pattern ALPHANUMERIC_PATTERN = Pattern.compile("^[\\p{L}\\p{Nd}]+$");

    static {
        HashMap hashMap = new HashMap();
        hashMap.put("hum", "person");
        hashMap.put("official", "person");
        hashMap.put("member", "person");
        hashMap.put("admin", "organization");
        hashMap.put("org", "organization");
        hashMap.put("inst", "organization");
        hashMap.put("media", "organization");
        hashMap.put("party", "organization");
        hashMap.put("suborg", "organization");
        hashMap.put("groupind", "group");
        hashMap.put("groupofficial", "group");
        hashMap.put(RRWebVideoEvent.JsonKeys.TOP, "place");
        hashMap.put("civ", "place");
        hashMap.put(SentryLockReason.JsonKeys.ADDRESS, "place");
        hashMap.put("site", "place");
        hashMap.put("virtual", "place");
        hashMap.put("astro", "place");
        hashMap.put("occ", NotificationCompat.CATEGORY_EVENT);
        hashMap.put(NotificationCompat.CATEGORY_EVENT, NotificationCompat.CATEGORY_EVENT);
        hashMap.put("history", NotificationCompat.CATEGORY_EVENT);
        hashMap.put("tit", "artprod");
        hashMap.put("pub", "artprod");
        hashMap.put(AppLovinEventTypes.USER_VIEWED_PRODUCT, "artprod");
        hashMap.put(ExifInterface.GPS_MEASUREMENT_INTERRUPTED, "artprod");
        hashMap.put("artwork", "artprod");
        hashMap.put(Device.JsonKeys.BRAND, "abstract");
        hashMap.put("genre", "abstract");
        hashMap.put("school", "abstract");
        hashMap.put("idea", "abstract");
        hashMap.put("plan", "abstract");
        hashMap.put("author", "abstract");
        hashMap.put("absname", "abstract");
        hashMap.put("disease", "abstract");
        hashMap.put("object", "thing");
        hashMap.put("common", "thing");
        hashMap.put("mat", "thing");
        hashMap.put("class", "thing");
        hashMap.put("plant", "thing");
        hashMap.put("currency", "thing");
        hashMap.put("date", "time");
        hashMap.put("hour", "time");
        hashMap.put(TypedValues.CycleType.S_WAVE_PERIOD, "time");
        hashMap.put("cyclic", "time");
        hashMap.put("quantity", "numeric");
        hashMap.put("prednum", "numeric");
        hashMap.put("currency", "numeric");
        HAREM = DesugarCollections.unmodifiableMap(hashMap);
    }

    @Deprecated
    public ADNameSampleStream(InputStreamFactory inputStreamFactory, String str, boolean z2) throws IOException {
        this(new PlainTextByLineStream(inputStreamFactory, str), z2);
    }

    public ADNameSampleStream(ObjectStream<String> objectStream, boolean z2) {
        this.leftContractionPart = null;
        this.textID = -1;
        this.adSentenceStream = new ADSentenceStream(objectStream);
        this.splitHyphenatedTokens = z2;
    }

    private void addIfNotEmpty(String str, List<String> list) {
        if (str == null || str.length() <= 0) {
            return;
        }
        list.addAll(processTok(str));
    }

    private static String getNER(String str) {
        if (str.contains("<NER2>")) {
            return null;
        }
        for (String str2 : str.split("\\s+")) {
            Matcher matcher = TAG_PATTERN.matcher(str2);
            if (matcher.matches()) {
                String group = matcher.group(2);
                Map<String, String> map = HAREM;
                if (map.containsKey(group)) {
                    return map.get(group);
                }
            }
        }
        return null;
    }

    private int getTextID(ADSentenceStream.Sentence sentence) {
        Pattern compile;
        int i2;
        String metadata = sentence.getMetadata();
        if (metadata.startsWith("LIT")) {
            compile = Pattern.compile("^([a-zA-Z\\-]+)(\\d+).*?p=(\\d+).*");
            i2 = 3;
        } else if (metadata.startsWith("CIE")) {
            compile = Pattern.compile("^.*?source=\"(.*?)\".*");
            i2 = 2;
        } else {
            compile = Pattern.compile("^(?:[a-zA-Z\\-]*(\\d+)).*?p=(\\d+).*");
            i2 = 1;
        }
        if (AbstractC0404c.a(i2, 3)) {
            Matcher matcher = compile.matcher(metadata);
            if (matcher.matches()) {
                return !matcher.group(1).equals("") ? 0 : -1;
            }
            throw new RuntimeException("Invalid metadata: ".concat(metadata));
        }
        if (AbstractC0404c.a(i2, 2)) {
            Matcher matcher2 = compile.matcher(metadata);
            if (matcher2.matches()) {
                return !matcher2.group(1).equals("") ? 0 : -1;
            }
            throw new RuntimeException("Invalid metadata: ".concat(metadata));
        }
        if (!AbstractC0404c.a(i2, 1)) {
            return 0;
        }
        Matcher matcher3 = compile.matcher(metadata);
        if (matcher3.matches()) {
            return Integer.parseInt(matcher3.group(1));
        }
        throw new RuntimeException("Invalid metadata: ".concat(metadata));
    }

    private void process(ADSentenceStream.SentenceParser.Node node, List<String> list, List<Span> list2) {
        if (node != null) {
            for (ADSentenceStream.SentenceParser.TreeElement treeElement : node.getElements()) {
                if (treeElement.isLeaf()) {
                    processLeaf((ADSentenceStream.SentenceParser.Leaf) treeElement, list, list2);
                } else {
                    process((ADSentenceStream.SentenceParser.Node) treeElement, list, list2);
                }
            }
        }
    }

    private void processLeaf(ADSentenceStream.SentenceParser.Leaf leaf, List<String> list, List<Span> list2) {
        boolean z2;
        String str = null;
        boolean z3 = false;
        if (this.leftContractionPart != null) {
            String contraction = PortugueseContractionUtility.toContraction(this.leftContractionPart, leaf.getLexeme());
            if (contraction != null) {
                list.addAll(Arrays.asList(WHITESPACE_PATTERN.split(contraction)));
                z2 = true;
            } else {
                list.add(this.leftContractionPart);
                z2 = false;
            }
            this.leftContractionPart = null;
        } else {
            z2 = false;
        }
        String secondaryTag = leaf.getSecondaryTag();
        if (secondaryTag != null) {
            if (secondaryTag.contains("<sam->") && !z2) {
                String[] split = UNDERLINE_PATTERN.split(leaf.getLexeme());
                if (split.length > 1) {
                    list.addAll(Arrays.asList(split).subList(0, split.length - 1));
                }
                this.leftContractionPart = split[split.length - 1];
                return;
            }
            z3 = secondaryTag.contains("<NER2>");
            str = getNER(secondaryTag);
        }
        int size = str != null ? list.size() : -1;
        if (!z2) {
            list.addAll(processLexeme(leaf.getLexeme()));
        }
        if (str != null) {
            list2.add(new Span(size, list.size(), str));
        }
        if (z3) {
            int size2 = list2.size() - 1;
            if (list2.size() > 0) {
                Span span = list2.get(size2);
                if (span.getEnd() == list.size() - 1) {
                    list2.set(size2, new Span(span.getStart(), list.size(), span.getType()));
                }
            }
        }
    }

    private List<String> processLexeme(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : UNDERLINE_PATTERN.split(str)) {
            if (str2.length() <= 1 || ALPHANUMERIC_PATTERN.matcher(str2).matches()) {
                arrayList.add(str2);
            } else {
                arrayList.addAll(processTok(str2));
            }
        }
        return arrayList;
    }

    private List<String> processTok(String str) {
        String str2;
        String str3;
        String str4;
        ArrayList arrayList = new ArrayList();
        LinkedList linkedList = new LinkedList();
        char charAt = str.charAt(0);
        if (charAt == 171) {
            arrayList.add(Character.toString(charAt));
            str2 = str.substring(1);
        } else {
            str2 = str;
        }
        char charAt2 = str2.charAt(str2.length() - 1);
        if (charAt2 == 187 || charAt2 == ':' || charAt2 == ',' || charAt2 == '!') {
            linkedList.add(Character.toString(charAt2));
            str2 = str2.substring(0, str2.length() - 1);
        }
        if (this.splitHyphenatedTokens && str2.contains("-") && str2.length() > 1) {
            Matcher matcher = HYPHEN_PATTERN.matcher(str2);
            if (matcher.matches()) {
                String str5 = null;
                if (matcher.group(1) != null) {
                    String group = matcher.group(2);
                    str4 = null;
                    str5 = group;
                    str3 = null;
                } else if (matcher.group(3) != null) {
                    str3 = matcher.group(4);
                    str4 = matcher.group(5);
                } else if (matcher.group(6) != null) {
                    str5 = matcher.group(7);
                    str3 = matcher.group(8);
                    str4 = matcher.group(9);
                } else {
                    str3 = null;
                    str4 = null;
                }
                addIfNotEmpty(str5, arrayList);
                addIfNotEmpty("-", arrayList);
                addIfNotEmpty(str3, arrayList);
                addIfNotEmpty(str4, arrayList);
                arrayList.addAll(linkedList);
                return arrayList;
            }
        }
        if (str.equals(str2) || str2.length() <= 1 || ALPHANUMERIC_PATTERN.matcher(str2).matches()) {
            arrayList.add(str2);
        } else {
            arrayList.addAll(processTok(str2));
        }
        arrayList.addAll(linkedList);
        return arrayList;
    }

    @Override // opennlp.tools.util.ObjectStream, java.lang.AutoCloseable
    public void close() throws IOException {
        this.adSentenceStream.close();
    }

    @Override // opennlp.tools.util.ObjectStream
    public NameSample read() throws IOException {
        boolean z2;
        ADSentenceStream.Sentence read = this.adSentenceStream.read();
        if (read == null) {
            return null;
        }
        int textID = getTextID(read);
        if (textID != this.textID) {
            this.textID = textID;
            z2 = true;
        } else {
            z2 = false;
        }
        ADSentenceStream.SentenceParser.Node root = read.getRoot();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        process(root, arrayList, arrayList2);
        return new NameSample((String[]) arrayList.toArray(new String[0]), (Span[]) arrayList2.toArray(new Span[0]), z2);
    }

    @Override // opennlp.tools.util.ObjectStream
    public void reset() throws IOException, UnsupportedOperationException {
        this.adSentenceStream.reset();
    }
}
