package com.streamscape.recasepunc;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

/* loaded from: input_file:com/streamscape/recasepunc/RecasepuncTokenizer.class */
public class RecasepuncTokenizer {
    private RecasepuncVocab recasepuncVocab;

    /* loaded from: input_file:com/streamscape/recasepunc/RecasepuncTokenizer$Token.class */
    public static class Token {
        public String token;
        public int tokenId;
        public double start;
        public double end;

        public Token(String str, int i) {
            this.token = str;
            this.tokenId = i;
            this.start = 0.0d;
            this.end = 0.0d;
        }

        public Token(String str, int i, double d, double d2) {
            this.token = str;
            this.tokenId = i;
            this.start = d;
            this.end = d2;
        }
    }

    public RecasepuncTokenizer(RecasepuncVocab recasepuncVocab) {
        this.recasepuncVocab = recasepuncVocab;
    }

    public List<Token> tokenizeToTokens(String str) {
        ArrayList arrayList = new ArrayList();
        tokenizeToWords(str).forEach(str2 -> {
            arrayList.add(new Token(str2, this.recasepuncVocab.getIdOrUnknown(str2)));
        });
        return arrayList;
    }

    public int getTokenId(String str) {
        return this.recasepuncVocab.getIdOrUnknown(str);
    }

    public List<String> tokenizeToWords(String str) {
        ArrayList arrayList = new ArrayList();
        StringTokenizer stringTokenizer = new StringTokenizer(str, " \t\n\r\f,.:;?![]'\"(){}<>", true);
        while (stringTokenizer.hasMoreTokens()) {
            String trim = stringTokenizer.nextToken().toLowerCase().trim();
            if (trim.length() > 0) {
                arrayList.add(trim);
            }
        }
        return arrayList;
    }
}
