package com.samsung.android.ocr.stride.postocr.entity;

import android.graphics.Point;
import com.microsoft.identity.common.java.AuthenticationConstants;
import com.samsung.android.ocr.MOCRLog;
import com.samsung.android.ocr.MOCRResult;
import com.samsung.android.ocr.stride.postocr.entity.CorrectionBase;
import com.samsung.android.support.senl.cm.base.common.constants.Extension;
import com.samsung.android.support.senl.nt.coedit.common.a;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import y.i;

/* loaded from: classes3.dex */
public class URLCorrection extends CorrectionBase {
    private static final String DOMAIN_EXTNS = "(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog)";
    private static final String HTTP_SYMBOL_VARIATIONS = "(([.:;\\s]{1,3})([/1l])?(/))";
    private static final String HTTP_VARIATIONS = "((http|nttp|ntip|htip|htto|httr|htp)(s)?)";
    private static final String TAG = "URLCorrection";
    private static final String WWW_VARIATIONS = "(www|vww)";
    private static final String REGEX_HTTP_SPACE_COMMA = "((http|nttp|ntip|htip|htto|httr|htp)(s)?)(([.:;\\s]{1,3})([/1l])?(/))(www)(\\.|\\s)+((([a-zA-Z0-9-]+)(\\s)*(,)(\\s)*)+)(\\s)*(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog)";
    private static final Pattern PAT_HTTP_SPACE_COMMA = Pattern.compile(REGEX_HTTP_SPACE_COMMA, 2);
    private static final String REGEX_URL_COMMA_WO_SPACE = "((www)\\s*(,)\\s*([a-zA-Z0-9-.]+)\\s*([,.])\\s*(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog))|((www)\\s*([,.])\\s*([a-zA-Z0-9-._]+)\\s*(,)\\s*(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog))";
    private static final Pattern PAT_URL_COMMA_WO_SPACE = Pattern.compile(REGEX_URL_COMMA_WO_SPACE, 2);
    private static final String REGEX_URL_COMMA = "(www)(\\s)*(,)(\\s)*([a-zA-Z0-9-.]+)(\\s)*(,)(\\s)*(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog)";
    private static final Pattern PAT_URL_COMMA = Pattern.compile(REGEX_URL_COMMA, 2);
    private static final String REGEX_URL_HTTP_BASIC = "((http|nttp|ntip|htip|htto|httr|htp)(s)?)(([.:;\\s]{1,3})([/1l])?(/))(((([a-zA-Z0-9-]+)\\s*(\\.)\\s*(\\.)?\\s*)+)(\\s)*(\\.)?(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog))";
    private static final Pattern PAT_URL_HTTP_BASIC = Pattern.compile(REGEX_URL_HTTP_BASIC, 2);
    private static final String REGEX_URL_WWW_BASIC = "(www|vww)(\\s)*(.)+(\\s)*((([a-zA-Z0-9-]+)\\s*(\\.)\\s*(\\.)?\\s*)+)(\\s)*(\\.)?(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog)";
    private static final Pattern PAT_URL_WWW_BASIC = Pattern.compile(REGEX_URL_WWW_BASIC, 2);
    private static final String REGEX_URL_BASIC = "((([a-zA-Z0-9-]+)\\s*(\\.)\\s*(\\.)?\\s*){2,})(\\s)*(\\.)?(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog)";
    private static final Pattern PAT_URL_BASIC = Pattern.compile(REGEX_URL_BASIC, 2);
    private static final String REGEX_STRICT_DOMAIN = "(?<=^|\\s)((([a-zA-Z0-9-]+)\\s*(\\.)\\s*(\\.)?\\s*)+)(\\s)*(com|in|kr|se|org|net|io|gle|mil|tk|cn|de|uk|nl|icu|ru|ir|au|ua|us|fr|es|ca|it|sg|gov|edu|co|ms|at|blog)(?=($|\\s|/))";
    private static final Pattern PAT_STRICT_DOMAIN = Pattern.compile(REGEX_STRICT_DOMAIN, 2);
    private static final String REGEX_WEB_PAGE_URL = "(?<!\\/)((\\s*\\/\\s*([a-zA-Z0-9-_]+))+)((\\s*(\\.)\\s*[a-zA-Z]{2,4})|(\\s*(\\?)\\s*)[A-Za-z0-9-_#=+&$%();\\p{InHangul_Syllables}\\p{InCJK_Unified_Ideographs}]+)?";
    private static final Pattern PAT_WEB_PAGE_URL = Pattern.compile(REGEX_WEB_PAGE_URL, 2);
    private static String[] FIND_DOMAIN = {"Inkd.in", "inkd.in"};
    private static String[] REPLACE_DOMAIN = {"lnkd.in", "lnkd.in"};
    private static int GROUP_URL_HTTP = 1;
    private static int GROUP_URL_HTTP_SYMBOLS = 4;
    private static int GROUP_URL_HTTP_POPULAR_DOMAIN = 8;

    private static void correctDomainName(MOCRResult.Line line, int i, Matcher matcher, int i4) {
        int indexOf;
        boolean z4;
        if (matcher == null) {
            return;
        }
        MOCRResult.Word[] wordArr = line.words;
        if (i >= wordArr.length) {
            return;
        }
        MOCRResult.Word word = wordArr[i];
        String group = matcher.group(i4);
        if (group == null || (indexOf = word.wordText.indexOf(group)) == -1) {
            return;
        }
        int length = group.length() + indexOf;
        StringBuilder sb = new StringBuilder();
        sb.append(word.wordText.substring(0, indexOf));
        int i5 = 0;
        while (true) {
            String[] strArr = FIND_DOMAIN;
            if (i5 >= strArr.length) {
                z4 = false;
                break;
            } else {
                if (group.equals(strArr[i5])) {
                    sb.append(REPLACE_DOMAIN[i5]);
                    z4 = true;
                    break;
                }
                i5++;
            }
        }
        if (z4) {
            sb.append(word.wordText.substring(length));
            word.wordText = sb.toString();
            MOCRResult.Char[] charArr = word.chars;
            if (charArr != null && charArr.length > 0) {
                for (int i6 = 0; i6 < word.wordText.length(); i6++) {
                    word.chars[i6].unicode = word.wordText.charAt(i6);
                }
            }
            MOCRLog.d(TAG, "Domain corrected :: " + word.getText());
        }
    }

    private static void correctHttpPrefix(MOCRResult.Line line, int i, Matcher matcher) {
        IntStream chars;
        IntStream filter;
        long count;
        int length;
        if (matcher == null) {
            return;
        }
        MOCRResult.Word[] wordArr = line.words;
        if (i >= wordArr.length) {
            return;
        }
        MOCRResult.Word word = wordArr[i];
        String group = matcher.group(GROUP_URL_HTTP);
        if (group == null) {
            return;
        }
        int indexOf = word.wordText.indexOf(group);
        int length2 = group.length() + indexOf;
        if (!group.equalsIgnoreCase("http") && !group.equalsIgnoreCase(AuthenticationConstants.HTTPS_PROTOCOL_STRING) && indexOf != -1) {
            String str = TAG;
            MOCRLog.d(str, group + " " + indexOf + " " + length2);
            String str2 = word.wordText;
            String str3 = str2.substring(0, indexOf) + (group.endsWith("s") ? AuthenticationConstants.HTTPS_PROTOCOL_STRING : "http") + str2.substring(length2);
            word.wordText = str3;
            word.charCount = str3.length();
            MOCRLog.d(str, "Http Word Correction :: " + line.getText());
        }
        String group2 = matcher.group(GROUP_URL_HTTP_SYMBOLS);
        if (group2 == null) {
            return;
        }
        String replaceAll = group2.replaceAll("\\s", "");
        int indexOf2 = word.wordText.indexOf(replaceAll);
        int length3 = replaceAll.length() + indexOf2;
        if (replaceAll.equalsIgnoreCase("://") || indexOf2 == -1) {
            return;
        }
        MOCRLog.d(TAG, replaceAll + " " + indexOf2 + " " + length3);
        chars = replaceAll.chars();
        filter = chars.filter(new i(1));
        count = filter.count();
        int i4 = length3 - ((int) count);
        String str4 = word.wordText;
        int length4 = replaceAll.length();
        String str5 = str4.substring(0, indexOf2) + "://" + str4.substring(i4);
        word.wordText = str5;
        if (word.chars == null) {
            length = str5.length();
        } else {
            ArrayList arrayList = new ArrayList(Arrays.asList(word.chars));
            MOCRResult.Char r22 = (MOCRResult.Char) arrayList.get(indexOf2);
            MOCRResult.Char r32 = (MOCRResult.Char) arrayList.get(indexOf2 + 1);
            int i5 = indexOf2 + 2;
            MOCRResult.Char r5 = (MOCRResult.Char) arrayList.get(i5);
            r22.unicode = 58;
            r32.unicode = 47;
            if (length4 < 3) {
                MOCRResult.Char r13 = new MOCRResult.Char();
                r13.unicode = 47;
                r13.cRect = (Point[]) r32.cRect.clone();
                arrayList.add(i5, r13);
            } else {
                r5.unicode = 47;
                if (length4 != 3) {
                    for (int i6 = 0; i6 < length4 - 3; i6++) {
                        arrayList.remove(indexOf2 + 3 + i6);
                    }
                }
            }
            MOCRResult.Char[] charArr = (MOCRResult.Char[]) arrayList.toArray(new MOCRResult.Char[0]);
            word.chars = charArr;
            length = charArr.length;
        }
        word.charCount = length;
        MOCRLog.d(TAG, "Http Symbol Correction :: " + line.getText());
    }

    private static void correctWwwDot(MOCRResult.Line line, int i) {
        int indexOf;
        int length;
        MOCRResult.Word word = line.words[i];
        String str = word.wordText;
        if (str.indexOf("www.") == -1 && (indexOf = str.indexOf("www")) != -1) {
            StringBuilder sb = new StringBuilder();
            int i4 = indexOf + 3;
            sb.append(str.substring(0, i4));
            sb.append(Extension.DOT);
            sb.append(str.substring(i4));
            String sb2 = sb.toString();
            word.wordText = sb2;
            if (word.chars == null) {
                length = sb2.length();
            } else {
                ArrayList arrayList = new ArrayList(Arrays.asList(word.chars));
                MOCRResult.Char r02 = (MOCRResult.Char) arrayList.get(indexOf + 2);
                MOCRResult.Char r12 = new MOCRResult.Char();
                r12.unicode = 46;
                r12.cRect = (Point[]) r02.cRect.clone();
                arrayList.add(i4, r12);
                MOCRResult.Char[] charArr = (MOCRResult.Char[]) arrayList.toArray(new MOCRResult.Char[0]);
                word.chars = charArr;
                length = charArr.length;
            }
            word.charCount = length;
        }
    }

    public static /* synthetic */ boolean lambda$correctHttpPrefix$0(int i) {
        return i == 32;
    }

    private static void validatePageURL(MOCRResult.Line line, int i) {
        String text = line.getText();
        validateWwwCase(line.words[i]);
        Matcher matcher = PAT_WEB_PAGE_URL.matcher(text);
        if (matcher.find()) {
            String str = TAG;
            StringBuilder k5 = a.k("Original line :: ", text, str, "PAT_WEB_PAGE_URL match: ");
            k5.append(matcher.group());
            MOCRLog.d(str, k5.toString());
            int[] matchingIds = CorrectionBase.getMatchingIds(matcher, line);
            int i4 = matchingIds[2];
            int i5 = matchingIds[3];
            StringBuilder q4 = androidx.constraintlayout.core.parser.a.q("Page ids: ", i4, " ", i5, " ");
            q4.append(i);
            MOCRLog.d(str, q4.toString());
            if ((i4 == i || i4 == i + 1) && i4 <= i5) {
                CorrectionBase.joinWords(line, i, i5);
            }
            MOCRLog.d(str, "Updated line :: " + line.getText());
        }
    }

    public static void validateURLLine(MOCRResult.Page page) {
        boolean z4;
        for (MOCRResult.Block block : page.blocks) {
            for (MOCRResult.Line line : block.lines) {
                String text = line.getText();
                Matcher matcher = PAT_HTTP_SPACE_COMMA.matcher(text);
                if (matcher.find()) {
                    String str = TAG;
                    StringBuilder k5 = a.k("Original line :: ", text, str, "PAT_HTTP_SPACE_COMMA match: ");
                    k5.append(matcher.group());
                    MOCRLog.d(str, k5.toString());
                    int[] matchingIds = CorrectionBase.getMatchingIds(matcher, line);
                    int i = matchingIds[2];
                    int i4 = matchingIds[3];
                    if (i <= i4) {
                        CorrectionBase.joinWords(line, i, i4, CorrectionBase.RegexActions.REPLACE_COMMA_IN_EMAIL_OR_URL);
                    }
                    correctHttpPrefix(line, i, matcher);
                    correctWwwDot(line, i);
                    validatePageURL(line, i);
                    MOCRLog.i(str, "Post OCR Entity Validation");
                    z4 = true;
                } else {
                    z4 = false;
                }
                if (!z4) {
                    Matcher matcher2 = PAT_URL_COMMA_WO_SPACE.matcher(text);
                    if (matcher2.find()) {
                        String str2 = TAG;
                        StringBuilder k6 = a.k("Original line :: ", text, str2, "PAT_URL_COMMA_WO_SPACE match: ");
                        k6.append(matcher2.group());
                        MOCRLog.d(str2, k6.toString());
                        int[] matchingIds2 = CorrectionBase.getMatchingIds(matcher2, line);
                        int i5 = matchingIds2[2];
                        int i6 = matchingIds2[3];
                        if (i5 <= i6) {
                            CorrectionBase.joinWords(line, i5, i6, CorrectionBase.RegexActions.REPLACE_COMMA_MULTIPLE_IN_URL);
                        }
                        validatePageURL(line, i5);
                        MOCRLog.i(str2, "Post OCR Entity Validation");
                        z4 = true;
                    }
                }
                if (!z4) {
                    Matcher matcher3 = PAT_URL_COMMA.matcher(text);
                    if (matcher3.find()) {
                        String str3 = TAG;
                        StringBuilder k7 = a.k("Original line :: ", text, str3, "PAT_URL_COMMA match: ");
                        k7.append(matcher3.group());
                        MOCRLog.d(str3, k7.toString());
                        int[] matchingIds3 = CorrectionBase.getMatchingIds(matcher3, line);
                        int i7 = matchingIds3[2];
                        int i8 = matchingIds3[3];
                        if (i7 <= i8) {
                            CorrectionBase.joinWords(line, i7, i8, CorrectionBase.RegexActions.REPLACE_COMMA_IN_EMAIL_OR_URL);
                            MOCRLog.i(str3, "Post OCR Entity Validation");
                        }
                        validatePageURL(line, i7);
                        z4 = true;
                    }
                }
                if (!z4) {
                    Matcher matcher4 = PAT_URL_HTTP_BASIC.matcher(text);
                    if (matcher4.find()) {
                        String str4 = TAG;
                        StringBuilder k8 = a.k("Original line :: ", text, str4, "PAT_URL_HTTP_BASIC match: ");
                        k8.append(matcher4.group());
                        MOCRLog.d(str4, k8.toString());
                        int[] matchingIds4 = CorrectionBase.getMatchingIds(matcher4, line);
                        int i9 = matchingIds4[2];
                        int i10 = matchingIds4[3];
                        if (i9 <= i10) {
                            CorrectionBase.joinWords(line, i9, i10, CorrectionBase.RegexActions.REPLACE_DOUBLE_IN_URL);
                        }
                        correctHttpPrefix(line, i9, matcher4);
                        correctWwwDot(line, i9);
                        correctDomainName(line, i9, matcher4, GROUP_URL_HTTP_POPULAR_DOMAIN);
                        MOCRLog.i(str4, "Post OCR Entity Validation");
                        validatePageURL(line, i9);
                        z4 = true;
                    }
                }
                if (!z4) {
                    Matcher matcher5 = PAT_URL_WWW_BASIC.matcher(text);
                    if (matcher5.find()) {
                        String str5 = TAG;
                        StringBuilder k9 = a.k("Original line :: ", text, str5, "PAT_URL_WWW_BASIC match: ");
                        k9.append(matcher5.group());
                        MOCRLog.d(str5, k9.toString());
                        int[] matchingIds5 = CorrectionBase.getMatchingIds(matcher5, line);
                        int i11 = matchingIds5[2];
                        int i12 = matchingIds5[3];
                        if (i11 <= i12) {
                            CorrectionBase.joinWords(line, i11, i12, CorrectionBase.RegexActions.REPLACE_DOUBLE_IN_URL);
                            MOCRLog.i(str5, "Post OCR Entity Validation");
                        }
                        validatePageURL(line, i11);
                        z4 = true;
                    }
                }
                if (!z4) {
                    Matcher matcher6 = PAT_URL_BASIC.matcher(text);
                    if (matcher6.find() && validateUrl(matcher6.group())) {
                        String str6 = TAG;
                        StringBuilder k10 = a.k("Original line :: ", text, str6, "PAT_URL_BASIC match: ");
                        k10.append(matcher6.group());
                        MOCRLog.d(str6, k10.toString());
                        int[] matchingIds6 = CorrectionBase.getMatchingIds(matcher6, line);
                        int i13 = matchingIds6[2];
                        int i14 = matchingIds6[3];
                        if (i13 <= i14) {
                            CorrectionBase.joinWords(line, i13, i14, CorrectionBase.RegexActions.REPLACE_DOUBLE_IN_URL);
                            MOCRLog.i(str6, "Post OCR Entity Validation");
                        }
                        validatePageURL(line, i13);
                        z4 = true;
                    }
                }
                if (!z4) {
                    Matcher matcher7 = PAT_STRICT_DOMAIN.matcher(text);
                    if (matcher7.find() && validateUrl(matcher7.group())) {
                        String str7 = TAG;
                        StringBuilder k11 = a.k("Original line :: ", text, str7, "PAT_STRICT_DOMAIN match: ");
                        k11.append(matcher7.group());
                        MOCRLog.d(str7, k11.toString());
                        int[] matchingIds7 = CorrectionBase.getMatchingIds(matcher7, line);
                        int i15 = matchingIds7[2];
                        int i16 = matchingIds7[3];
                        if (i15 <= i16) {
                            CorrectionBase.joinWords(line, i15, i16, CorrectionBase.RegexActions.REPLACE_DOUBLE_IN_URL);
                            MOCRLog.i(str7, "Post OCR Entity Validation");
                        }
                        validatePageURL(line, i15);
                    }
                }
            }
        }
    }

    private static boolean validateUrl(String str) {
        String trim = str.toLowerCase().trim();
        return (trim.endsWith(". in") || trim.endsWith(". it") || trim.endsWith(". at")) ? false : true;
    }

    private static void validateWwwCase(MOCRResult.Word word) {
        String str = word.wordText;
        int indexOf = str.toLowerCase().indexOf("www");
        if (indexOf == -1) {
            indexOf = str.toLowerCase().indexOf("vww");
        }
        if (indexOf != -1) {
            int i = indexOf + 3;
            if (str.substring(indexOf, i).equals("www") || str.toUpperCase().equals(str)) {
                return;
            }
            MOCRLog.d(TAG, "www correction");
            word.wordText = str.substring(0, indexOf) + "www" + str.substring(i);
            MOCRResult.Char[] charArr = word.chars;
            if (charArr != null) {
                charArr[indexOf].unicode = 119;
                charArr[indexOf + 1].unicode = 119;
                charArr[indexOf + 2].unicode = 119;
            }
        }
    }
}
