package rita.support;

import java.util.List;
import processing.core.PApplet;
import rita.RiObject;
import rita.support.ifs.RiTokenizerIF;

/* loaded from: classes.dex */
public final class PennWordTokenizer extends RiObject implements RiTokenizerIF {
    private static final String SPC = " ";
    protected boolean splitContractions;

    public PennWordTokenizer() {
        this((PApplet) null);
    }

    public PennWordTokenizer(PApplet pApplet) {
        this(pApplet, true);
    }

    public PennWordTokenizer(PApplet pApplet, boolean z) {
        super(pApplet);
        this.splitContractions = z;
    }

    public PennWordTokenizer(boolean z) {
        this(null, z);
    }

    public static void main(String[] strArr) {
        PennWordTokenizer pennWordTokenizer = new PennWordTokenizer();
        pennWordTokenizer.splitContractions = false;
        String[] strArr2 = pennWordTokenizer.tokenize("2");
        System.out.print("[");
        for (String str : strArr2) {
            System.out.print("'" + str + "' ");
        }
        System.out.println("]");
    }

    public boolean isSplittingContractions() {
        return this.splitContractions;
    }

    public void setSplitContractions(boolean z) {
        this.splitContractions = z;
    }

    @Override // rita.support.ifs.RiTokenizerIF
    public void tokenize(String str, List list) {
        for (String str2 : tokenize(str)) {
            list.add(str2);
        }
    }

    @Override // rita.support.ifs.RiTokenizerIF
    public String[] tokenize(String str) {
        String replaceAll = str.replaceAll("``", "`` ").replaceAll("''", "  ''").replaceAll("([\\?!\"\\.,;:@#$%&])", " $1 ").replaceAll("\\.\\.\\.", " ... ").replaceAll("\\s+", " ").replaceAll(",([^0-9])", " , $1").replaceAll("([^.])([.])([\\])}>\"']*)\\s*$", "$1 $2$3 ").replaceAll("([\\[\\](){}<>])", " $1 ").replaceAll("--", " -- ").replaceAll("$", " ").replaceAll("^", " ").replaceAll("([^'])' ", "$1 ' ").replaceAll("'([SMD]) ", " '$1 ");
        if (this.splitContractions) {
            replaceAll = replaceAll.replaceAll("'ll ", " 'll ").replaceAll("'re ", " 're ").replaceAll("'ve ", " 've ").replaceAll("n't ", " n't ").replaceAll("'LL ", " 'LL ").replaceAll("'RE ", " 'RE ").replaceAll("'VE ", " 'VE ").replaceAll("N'T ", " N'T ");
        }
        return replaceAll.replaceAll(" ([Cc])annot ", " $1an not ").replaceAll(" ([Dd])'ye ", " $1' ye ").replaceAll(" ([Gg])imme ", " $1im me ").replaceAll(" ([Gg])onna ", " $1on na ").replaceAll(" ([Gg])otta ", " $1ot ta ").replaceAll(" ([Ll])emme ", " $1em me ").replaceAll(" ([Mm])ore'n ", " $1ore 'n ").replaceAll(" '([Tt])is ", " $1 is ").replaceAll(" '([Tt])was ", " $1 was ").replaceAll(" ([Ww])anna ", " $1an na ").replaceAll(" ([A-Z]) \\.", " $1. ").replaceAll("\\s+", " ").replaceAll("^\\s+", "").split(" ");
    }

    public String tokenizeInline(String str) {
        throw new RuntimeException("unimplemented!");
    }
}
