Example usage for org.apache.lucene.analysis.util StemmerUtil delete

List of usage examples for org.apache.lucene.analysis.util StemmerUtil delete

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.util StemmerUtil delete.

Prototype

public static int delete(char s[], int pos, int len) 

Source Link

Document

Delete a character in-place

Usage

From source file:com.romeikat.datamessie.core.processing.service.stemming.text.KeywordAwareGermanNormalizationFilter.java

License:Open Source License

@Override
public boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
        if (keywordAttr.isKeyword()) {
            return true;
        }/*from  w  w  w .  j ava2s. com*/

        int state = N;
        char buffer[] = termAtt.buffer();
        int length = termAtt.length();
        for (int i = 0; i < length; i++) {
            final char c = buffer[i];
            switch (c) {
            case 'a':
            case 'o':
                state = U;
                break;
            case 'u':
                state = (state == N) ? U : V;
                break;
            case 'e':
                if (state == U) {
                    length = StemmerUtil.delete(buffer, i--, length);
                }
                state = V;
                break;
            case 'i':
            case 'q':
            case 'y':
                state = V;
                break;
            case '\u00e4':
                buffer[i] = 'a';
                state = V;
                break;
            case '\u00f6':
                buffer[i] = 'o';
                state = V;
                break;
            case '\u00fc':
                buffer[i] = 'u';
                state = V;
                break;
            case '\u00df':
                buffer[i++] = 's';
                buffer = termAtt.resizeBuffer(1 + length);
                if (i < length) {
                    System.arraycopy(buffer, i, buffer, i + 1, (length - i));
                }
                buffer[i] = 's';
                length++;
                state = N;
                break;
            default:
                state = N;
            }
        }
        termAtt.setLength(length);
        return true;
    } else {
        return false;
    }
}

From source file:jp.ameba.elasticsearch.analysis.japanese.tiny.CJKWidthFilter.java

License:Apache License

public boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
        char text[] = termAtt.buffer();
        int length = termAtt.length();
        for (int i = 0; i < length; i++) {
            final char ch = text[i];
            if (ch >= 0xFF01 && ch <= 0xFF5E) {
                // Fullwidth ASCII variants
                text[i] -= 0xFEE0;//from w  ww  .ja  va  2  s  . co m
            } else if (ch >= 0xFF65 && ch <= 0xFF9F) {
                // Halfwidth Katakana variants
                if ((ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(text, i, length, ch)) {
                    length = StemmerUtil.delete(text, i--, length);
                } else {
                    text[i] = KANA_NORM[ch - 0xFF65];
                }
            }
        }
        termAtt.setLength(length);
        return true;
    } else {
        return false;
    }
}