package weka.core.tokenizers;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.Utils;

/* loaded from: classes2.dex */
public class CharacterNGramTokenizer extends Tokenizer {
    private static final long serialVersionUID = -1181896253171647218L;
    protected int m_CurrentPosition;
    protected int m_N;
    protected int m_NMax = 3;
    protected int m_NMin = 1;
    protected String m_String;

    public static void main(String[] strArr) {
        runTokenizer(new CharacterNGramTokenizer(), strArr);
    }

    public String NGramMaxSizeTipText() {
        return "The maximum size of an n-gram.";
    }

    public String NGramMinSizeTipText() {
        return "The minimum size of an n-gram.";
    }

    public int getNGramMaxSize() {
        return this.m_NMax;
    }

    public int getNGramMinSize() {
        return this.m_NMin;
    }

    @Override // weka.core.tokenizers.Tokenizer, weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        vector.add("-max");
        vector.add("" + getNGramMaxSize());
        vector.add("-min");
        vector.add("" + getNGramMinSize());
        Collections.addAll(vector, super.getOptions());
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    @Override // weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 10971 $");
    }

    @Override // weka.core.tokenizers.Tokenizer
    public String globalInfo() {
        return "Splits a string into all character n-grams it contains based on the given maximum and minimum for n.";
    }

    @Override // weka.core.tokenizers.Tokenizer, java.util.Enumeration
    public boolean hasMoreElements() {
        return this.m_CurrentPosition + this.m_N <= this.m_String.length();
    }

    @Override // weka.core.tokenizers.Tokenizer, weka.core.OptionHandler
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.addElement(new Option("\tThe maximum number of characters (default = 3).", "max", 1, "-max <int>"));
        vector.addElement(new Option("\tThe minimum number of characters (default = 1).", "min", 1, "-min <int>"));
        vector.addAll(Collections.list(super.listOptions()));
        return vector.elements();
    }

    @Override // weka.core.tokenizers.Tokenizer, java.util.Enumeration
    public String nextElement() {
        String str;
        try {
            str = this.m_String.substring(this.m_CurrentPosition, this.m_CurrentPosition + this.m_N);
        } catch (StringIndexOutOfBoundsException unused) {
            str = null;
        }
        this.m_N++;
        if (this.m_N > this.m_NMax || this.m_CurrentPosition + this.m_N > this.m_String.length()) {
            this.m_N = this.m_NMin;
            this.m_CurrentPosition++;
        }
        return str;
    }

    public void setNGramMaxSize(int i) {
        if (i < 1) {
            this.m_NMax = 1;
        } else {
            this.m_NMax = i;
        }
    }

    public void setNGramMinSize(int i) {
        if (i < 1) {
            this.m_NMin = 1;
        } else {
            this.m_NMin = i;
        }
    }

    @Override // weka.core.tokenizers.Tokenizer, weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption("max", strArr);
        if (option.length() != 0) {
            setNGramMaxSize(Integer.parseInt(option));
        } else {
            setNGramMaxSize(3);
        }
        String option2 = Utils.getOption("min", strArr);
        if (option2.length() != 0) {
            setNGramMinSize(Integer.parseInt(option2));
        } else {
            setNGramMinSize(1);
        }
        super.setOptions(strArr);
    }

    @Override // weka.core.tokenizers.Tokenizer
    public void tokenize(String str) {
        this.m_CurrentPosition = 0;
        this.m_String = str;
        this.m_N = this.m_NMin;
    }
}
