com.github.steveash.jg2p.util.TokenSeqUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.github.steveash.jg2p.util.TokenSeqUtil.java

Source

/*
 * Copyright 2014 Steve Ash
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.steveash.jg2p.util;

import com.google.common.base.CharMatcher;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import java.util.List;

import javax.annotation.Nullable;

import cc.mallet.types.Token;

import static com.google.common.base.CharMatcher.WHITESPACE;

/**
 * @author Steve Ash
 */
public class TokenSeqUtil {

    private static final CharMatcher vowels = CharMatcher.anyOf("AEIOUY").precomputed();
    private static final CharMatcher consonants = CharMatcher.inRange('A', 'Z').and(vowels.negate()).precomputed();
    private static final CharMatcher other = CharMatcher.ANY.and(vowels.or(consonants).negate()).precomputed();

    @Nullable
    public static String getWindow(List<Token> ts, int current, int windowOffset, int windowWidth) {
        List<String> ss = Lists.transform(ts, tokenToString);
        return getWindowFromStrings(ss, current, windowOffset, windowWidth);
    }

    public static String getWindowFromStrings(List<String> ts, int current, int windowOffset, int windowWidth) {
        if (windowOffset < 0) {
            return getBakwardWindowFromString(ts, current, windowOffset, windowWidth);
        }
        return getForwardWindowFromString(ts, current, windowOffset, windowWidth);
    }

    private static String getBakwardWindowFromString(List<String> ts, int current, int windowOffset,
            int windowWidth) {
        Preconditions.checkArgument(windowOffset < 0);
        Preconditions.checkArgument(windowOffset + windowWidth <= 0);

        int start = -(windowOffset + windowWidth);

        StringBuilder sb = new StringBuilder(windowWidth);
        int strIndex = -1;
        String str = "";
        int eaten = 0;
        while (true) {
            if (strIndex < 0) {
                findnext: while (true) {
                    current -= 1;
                    if (current < 0) {
                        return null; // ran out of chars to eat
                    }
                    str = ts.get(current);
                    if (str.length() > 0) {
                        break findnext;
                    }
                }
                strIndex = str.length() - 1;
            }

            if (eaten >= start) {
                char c = str.charAt(strIndex);
                sb.append(c);
                if (sb.length() == windowWidth) {
                    return sb.reverse().toString();
                }
            }
            strIndex -= 1;
            eaten += 1;
        }
    }

    private static String getForwardWindowFromString(List<String> ts, int current, int windowOffset,
            int windowWidth) {
        Preconditions.checkArgument(windowOffset > 0);
        Preconditions.checkArgument(windowOffset + windowWidth > 0);

        int start = windowOffset - 1; // we're starting one character over from us, to be symmetric needs to be shifted

        StringBuilder sb = new StringBuilder(windowWidth);
        int strIndex = 1;
        String str = "";
        int eaten = 0;
        while (true) {
            if (strIndex >= str.length()) {
                findnext: while (true) {
                    current += 1;
                    if (current > (ts.size() - 1)) {
                        return null; // ran out of chars to eat
                    }
                    str = ts.get(current);
                    if (str.length() > 0) {
                        break findnext;
                    }
                }
                strIndex = 0;
            }

            if (eaten >= start) {
                char c = str.charAt(strIndex);
                sb.append(c);
                if (sb.length() == windowWidth) {
                    return sb.toString();
                }
            }
            strIndex += 1;
            eaten += 1;
        }
    }

    private static final Function<Token, String> tokenToString = new Function<Token, String>() {
        @Override
        public String apply(Token input) {
            return input.getText();
        }
    };

    public static String convertShape(String winStr) {
        if (winStr == null)
            return null;
        StringBuilder sb = new StringBuilder(winStr.length());
        for (int i = 0; i < winStr.length(); i++) {
            char c = winStr.charAt(i);
            if (consonants.matches(c)) {
                sb.append('c');
            } else if (vowels.matches(c)) {
                sb.append('v');
            } else if (WHITESPACE.matches(c)) {
                sb.append('s');
            } else {
                sb.append('p');
            }
        }
        return sb.toString();
    }
}