List of usage examples for org.apache.lucene.analysis.util StemmerUtil delete
public static int delete(char s[], int pos, int len)
From source file:com.romeikat.datamessie.core.processing.service.stemming.text.KeywordAwareGermanNormalizationFilter.java
License:Open Source License
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (keywordAttr.isKeyword()) { return true; }/*from w w w . j ava2s. com*/ int state = N; char buffer[] = termAtt.buffer(); int length = termAtt.length(); for (int i = 0; i < length; i++) { final char c = buffer[i]; switch (c) { case 'a': case 'o': state = U; break; case 'u': state = (state == N) ? U : V; break; case 'e': if (state == U) { length = StemmerUtil.delete(buffer, i--, length); } state = V; break; case 'i': case 'q': case 'y': state = V; break; case '\u00e4': buffer[i] = 'a'; state = V; break; case '\u00f6': buffer[i] = 'o'; state = V; break; case '\u00fc': buffer[i] = 'u'; state = V; break; case '\u00df': buffer[i++] = 's'; buffer = termAtt.resizeBuffer(1 + length); if (i < length) { System.arraycopy(buffer, i, buffer, i + 1, (length - i)); } buffer[i] = 's'; length++; state = N; break; default: state = N; } } termAtt.setLength(length); return true; } else { return false; } }
From source file:jp.ameba.elasticsearch.analysis.japanese.tiny.CJKWidthFilter.java
License:Apache License
public boolean incrementToken() throws IOException { if (input.incrementToken()) { char text[] = termAtt.buffer(); int length = termAtt.length(); for (int i = 0; i < length; i++) { final char ch = text[i]; if (ch >= 0xFF01 && ch <= 0xFF5E) { // Fullwidth ASCII variants text[i] -= 0xFEE0;//from w ww .ja va 2 s . co m } else if (ch >= 0xFF65 && ch <= 0xFF9F) { // Halfwidth Katakana variants if ((ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(text, i, length, ch)) { length = StemmerUtil.delete(text, i--, length); } else { text[i] = KANA_NORM[ch - 0xFF65]; } } } termAtt.setLength(length); return true; } else { return false; } }