/*
 * Decompiled with CFR 0.152.
 */
package weka.core.tokenizers;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.tokenizers.Tokenizer;

public class CharacterNGramTokenizer
extends Tokenizer {
    private static final long serialVersionUID = -1181896253171647218L;
    protected int m_NMax = 3;
    protected int m_NMin = 1;
    protected int m_N;
    protected int m_CurrentPosition;
    protected String m_String;

    @Override
    public String globalInfo() {
        return "Splits a string into all character n-grams it contains based on the given maximum and minimum for n.";
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tThe maximum number of characters (default = 3).", "max", 1, "-max <int>"));
        result.addElement(new Option("\tThe minimum number of characters (default = 1).", "min", 1, "-min <int>"));
        result.addAll(Collections.list(super.listOptions()));
        return result.elements();
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-max");
        result.add("" + this.getNGramMaxSize());
        result.add("-min");
        result.add("" + this.getNGramMinSize());
        Collections.addAll(result, super.getOptions());
        return result.toArray(new String[result.size()]);
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String value = Utils.getOption("max", options);
        if (value.length() != 0) {
            this.setNGramMaxSize(Integer.parseInt(value));
        } else {
            this.setNGramMaxSize(3);
        }
        value = Utils.getOption("min", options);
        if (value.length() != 0) {
            this.setNGramMinSize(Integer.parseInt(value));
        } else {
            this.setNGramMinSize(1);
        }
        super.setOptions(options);
    }

    public int getNGramMaxSize() {
        return this.m_NMax;
    }

    public void setNGramMaxSize(int value) {
        this.m_NMax = value < 1 ? 1 : value;
    }

    public String NGramMaxSizeTipText() {
        return "The maximum size of an n-gram.";
    }

    public void setNGramMinSize(int value) {
        this.m_NMin = value < 1 ? 1 : value;
    }

    public int getNGramMinSize() {
        return this.m_NMin;
    }

    public String NGramMinSizeTipText() {
        return "The minimum size of an n-gram.";
    }

    @Override
    public boolean hasMoreElements() {
        return this.m_CurrentPosition + this.m_N <= this.m_String.length();
    }

    @Override
    public String nextElement() {
        String result = null;
        try {
            result = this.m_String.substring(this.m_CurrentPosition, this.m_CurrentPosition + this.m_N);
        }
        catch (StringIndexOutOfBoundsException stringIndexOutOfBoundsException) {
            // empty catch block
        }
        ++this.m_N;
        if (this.m_N > this.m_NMax || this.m_CurrentPosition + this.m_N > this.m_String.length()) {
            this.m_N = this.m_NMin;
            ++this.m_CurrentPosition;
        }
        return result;
    }

    @Override
    public void tokenize(String s) {
        this.m_CurrentPosition = 0;
        this.m_String = s;
        this.m_N = this.m_NMin;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 10971 $");
    }

    public static void main(String[] args) {
        CharacterNGramTokenizer.runTokenizer(new CharacterNGramTokenizer(), args);
    }
}

