package smile.nlp.tokenizer;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;

/* loaded from: classes5.dex */
public class BreakIteratorTokenizer implements Tokenizer {
    private BreakIterator boundary;

    public BreakIteratorTokenizer() {
        this.boundary = BreakIterator.getWordInstance();
    }

    public BreakIteratorTokenizer(Locale locale) {
        this.boundary = BreakIterator.getWordInstance(locale);
    }

    @Override // smile.nlp.tokenizer.Tokenizer
    public String[] split(String str) {
        this.boundary.setText(str);
        ArrayList arrayList = new ArrayList();
        int first = this.boundary.first();
        int next = this.boundary.next();
        while (true) {
            int i = next;
            int i2 = first;
            first = i;
            if (first == -1) {
                break;
            }
            String trim = str.substring(i2, first).trim();
            if (!trim.isEmpty()) {
                arrayList.add(trim);
            }
            next = this.boundary.next();
        }
        int size = arrayList.size();
        String[] strArr = new String[size];
        for (int i3 = 0; i3 < size; i3++) {
            strArr[i3] = (String) arrayList.get(i3);
        }
        return strArr;
    }
}
