package uk.ac.shef.wit.simmetrics.tokenisers;

import com.intuit.logging.ILConstants;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import uk.ac.shef.wit.simmetrics.wordhandlers.DummyStopTermHandler;
import uk.ac.shef.wit.simmetrics.wordhandlers.InterfaceTermHandler;

/* loaded from: classes6.dex */
public final class TokeniserCSVBasic implements Serializable, InterfaceTokeniser {
    private InterfaceTermHandler stopWordHandler = new DummyStopTermHandler();
    private final String delimiters = ILConstants.COMMA;

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public final String getDelimiters() {
        return ILConstants.COMMA;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public final String getShortDescriptionString() {
        return "TokeniserCSVBasic";
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public InterfaceTermHandler getStopWordHandler() {
        return this.stopWordHandler;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public void setStopWordHandler(InterfaceTermHandler interfaceTermHandler) {
        this.stopWordHandler = interfaceTermHandler;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public final ArrayList<String> tokenizeToArrayList(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        int i = 0;
        while (i < str.length()) {
            if (Character.isWhitespace(str.charAt(i))) {
                i++;
            }
            int length = str.length();
            for (int i2 = 0; i2 < 1; i2++) {
                int indexOf = str.indexOf(ILConstants.COMMA.charAt(i2), i);
                if (indexOf < length && indexOf != -1) {
                    length = indexOf;
                }
            }
            String substring = str.substring(i, length);
            if (!this.stopWordHandler.isWord(substring) && !substring.equals(StringUtils.SPACE)) {
                arrayList.add(substring);
            }
            i = length;
        }
        return arrayList;
    }

    @Override // uk.ac.shef.wit.simmetrics.tokenisers.InterfaceTokeniser
    public Set<String> tokenizeToSet(String str) {
        HashSet hashSet = new HashSet();
        hashSet.addAll(tokenizeToArrayList(str));
        return hashSet;
    }
}
