List of usage examples for org.apache.lucene.analysis Token toString
@Override
public String toString()
From source file:com.globalsight.ling.lucene.analysis.cn.ChineseFilter.java
License:Apache License
public final Token next() throws IOException { Iterator<AttributeImpl> attIt = input.getAttributeImplsIterator(); while (attIt.hasNext()) { AttributeImpl att = attIt.next(); Token token = null; if (att instanceof GSAttributeImpl) { token = ((GSAttributeImpl) att).getToken(); }//w w w . jav a 2 s . c om if (token == null) { continue; } String text = token.toString(); // why not key off token type here assuming // ChineseTokenizer comes first? if (stopTable.get(text) == null) { switch (Character.getType(text.charAt(0))) { case Character.LOWERCASE_LETTER: case Character.UPPERCASE_LETTER: // English word/token should be larger than 1 character. if (text.length() > 1) { return token; } break; case Character.OTHER_LETTER: // One Chinese character as one Chinese word. // Chinese word extraction to be added later here. return token; } } } return null; }
From source file:com.globalsight.ling.lucene.analysis.GSTokenFilter.java
License:Apache License
@Override public boolean incrementToken() throws IOException { clearAttributes();/* w ww . j a va2 s .c o m*/ Token tt = next(); if (tt == null) { return false; } else { gsAtt.setToken(tt); termAtt.append(tt.toString()); if (termAtt instanceof TermToBytesRefAttribute) { ((TermToBytesRefAttribute) termAtt).fillBytesRef(); } return true; } }
From source file:com.globalsight.ling.lucene.analysis.GSTokenizer.java
License:Apache License
@Override public boolean incrementToken() throws IOException { clearAttributes();// w ww . j a va2 s . com Token tt = next(); if (tt == null) { return false; } else { gsAtt.setToken(tt); termAtt.append(tt.toString()); offsetAtt.setOffset(tt.startOffset(), tt.endOffset()); posIncrAtt.setPositionIncrement(tt.getPositionIncrement()); return true; } }
From source file:com.globalsight.ling.lucene.analysis.GSTokenStream.java
License:Apache License
@Override public boolean incrementToken() throws IOException { clearAttributes();//from w w w. j av a 2s . c o m Token tt = next(); if (tt == null) { return false; } else { termAtt.append(tt.toString()); gsAtt.setToken(tt); return true; } }
From source file:com.globalsight.ling.lucene.analysis.GSTokenTokenizer.java
License:Apache License
@Override public boolean incrementToken() throws IOException { clearAttributes();//from ww w . java 2 s . c om Token tt = next(); if (tt == null) { return false; } else { gsAtt.setToken(tt); termAtt.append(tt.toString()); return true; } }
From source file:com.globalsight.ling.lucene.analysis.ngram.NgramAnalyzer.java
License:Apache License
static void test(String p_text) throws java.io.IOException { NgramAnalyzer x = new NgramAnalyzer(3); NgramTokenizer y = new NgramTokenizer(new java.io.StringReader(p_text), 3); System.out.println("Text = " + p_text); Token t; while ((t = y.next()) != null) { System.out.println(t.toString() + " (" + t.startOffset() + ":" + t.endOffset() + ")"); }/*from w w w. j a v a 2 s.c o m*/ }
From source file:com.globalsight.ling.lucene.analysis.ngram.NgramNoPunctuationAnalyzer.java
License:Apache License
static void test(String p_text) throws java.io.IOException { Analyzer x = new NgramNoPunctuationAnalyzer(3); NgramNoPunctuationTokenizer y = new NgramNoPunctuationTokenizer(new java.io.StringReader(p_text), 3); System.out.println("Text = " + p_text); Token t; while ((t = y.next()) != null) { System.out.println(t.toString() + " (" + t.startOffset() + ":" + t.endOffset() + ")"); }//from ww w . j a v a 2 s. c o m }
From source file:com.globalsight.ling.lucene.analysis.pl.PolishFilter.java
License:Apache License
/** Returns the next input Token, after being stemmed */ public final Token next() throws IOException { Token token = getNextToken(); if (token == null) { return null; } else {/*from w w w. ja va 2 s . c om*/ String s = stemmer.stem(token.toString(), true); if (!s.equals(token.toString())) { // reconstruct the input token. This is silly... Token res = new Token(s, token.startOffset(), token.endOffset(), token.type()); res.setPositionIncrement(token.getPositionIncrement()); return res; } return token; } }
From source file:com.globalsight.ling.lucene.analysis.ru.RussianLowerCaseFilter.java
License:Apache License
public final Token next() throws java.io.IOException { Token t = getNextToken(); if (t == null) return null; String txt = t.toString(); char[] chArray = txt.toCharArray(); for (int i = 0; i < chArray.length; i++) { chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset); }//w ww. j a va 2s. c o m String newTxt = new String(chArray); // create new token Token newToken = new Token(newTxt, t.startOffset(), t.endOffset()); return newToken; }
From source file:com.globalsight.ling.lucene.analysis.snowball.SnowballFilter.java
License:Apache License
/** * Returns the next input Token, after being stemmed. *//*from www .j a v a2s. c om*/ public final Token next() throws IOException { Token token = getNextToken(); if (token == null) { return null; } stemmer.setCurrent(token.toString()); stemmer.stem(); return new Token(stemmer.getCurrent(), token.startOffset(), token.endOffset(), token.type()); }