List of usage examples for org.apache.lucene.analysis TokenStream incrementToken
public abstract boolean incrementToken() throws IOException;
From source file:analysis.AnalyzerUtils.java
License:Apache License
public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.println("posIncr=" + posIncr.getPositionIncrement()); }/*from ww w .j a va 2 s . com*/ }
From source file:analysis.FtpFilePathAnalyzer.java
License:Apache License
public static void main(String[] args) { Analyzer ana = new FtpFilePathAnalyzer(); String test2 = "c++c++"; StringReader reader = new StringReader(test2); TokenStream ts = ana.tokenStream("path", reader); try {/*from w w w. j av a 2 s . com*/ while (ts.incrementToken()) { TermAttribute termAtt = (TermAttribute) ts.getAttribute(TermAttribute.class); OffsetAttribute offsetAtt = (OffsetAttribute) ts.getAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute) ts .getAttribute(PositionIncrementAttribute.class); TypeAttribute typeAtt = (TypeAttribute) ts.getAttribute(TypeAttribute.class); System.out.print("(" + offsetAtt.startOffset() + "," + offsetAtt.endOffset() + ") [" + posIncrAtt.getPositionIncrement() + "," + typeAtt.type() + "] " + "[" + termAtt.term() + "]"); } } catch (IOException e) { e.printStackTrace(); } }
From source file:analysis.SynonymAnalyzerTest.java
License:Apache License
public void testJumps() throws Exception { TokenStream stream = synonymAnalyzer.tokenStream("contents", // #A new StringReader("jumps")); // #A TermAttribute term = stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int i = 0;//from w w w.j ava 2 s . c o m String[] expected = new String[] { "jumps", // #B "hops", // #B "leaps" }; // #B while (stream.incrementToken()) { assertEquals(expected[i], term.term()); int expectedPos; // #C if (i == 0) { // #C expectedPos = 1; // #C } else { // #C expectedPos = 0; // #C } // #C assertEquals(expectedPos, // #C posIncr.getPositionIncrement()); // #C i++; } assertEquals(3, i); }
From source file:analyzers.DebugAnalyzer.java
License:Apache License
/** * This method outputs token-by-token analysis of documents. * * @param reader the reader for the documents * @param analyzer the analyzer /*w w w. j a v a 2s .c om*/ * @throws IOException cannot load stream */ public static void showAnalysisFromStream(Reader reader, Analyzer analyzer) throws IOException { TokenStream stream = analyzer.tokenStream("text", reader); CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class); OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); try { stream.reset(); while (stream.incrementToken()) { // get starting and ending offsets int start = oa.startOffset(); int end = oa.endOffset(); // text of the token String token = cta.toString(); // part of speech tag for the token String tag = typeAtt.type(); System.out.printf("start: %4d\tend: %4d\tlength: %4d\ttag: %s\ttoken: %s\n", start, end, token.length(), tag, token); } } finally { stream.close(); } }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { AttributeSource term = stream.addAttribute(AttributeSource.class); while (stream.incrementToken()) { System.out.print("[" + term.term() + "] "); }/*from www.j a v a 2 s . c o m*/ }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; LOGGER.info();/*from ww w. j a va2 s . c om*/ System.out.print(position + ": "); } System.out.print("[" + term.term() + "] "); } LOGGER.info(); }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D LOGGER.info(); // #D System.out.print(position + ": "); // #D }/*from w ww .j a va 2 s . co m*/ System.out.print("[" + // #E term.term() + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } LOGGER.info(); }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); TermAttribute termAttr = stream.addAttribute(TermAttribute.class); for (String expected : output) { Assert.assertTrue(stream.incrementToken()); Assert.assertEquals(expected, termAttr.term()); }// w ww .ja v a 2 s.c om Assert.assertFalse(stream.incrementToken()); stream.close(); }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { LOGGER.info("posIncr=" + posIncr.getPositionIncrement()); }/*from ww w . j a v a2s .c om*/ }
From source file:aos.lucene.analysis.Fragments.java
License:Apache License
public void frag3() throws Exception { Analyzer analyzer = null;// ww w .j a va 2 s .co m String text = null; TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = (PositionIncrementAttribute) stream .addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { LOGGER.info("posIncr=" + posIncr.getPositionIncrement()); } }