List of usage examples for org.apache.lucene.analysis TokenStream addAttribute
public final <T extends Attribute> T addAttribute(Class<T> attClass)
From source file:analysis.AnalyzerUtils.java
License:Apache License
public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.println("posIncr=" + posIncr.getPositionIncrement()); }/*w w w .j av a2s .c o m*/ }
From source file:analysis.SynonymAnalyzerTest.java
License:Apache License
public void testJumps() throws Exception { TokenStream stream = synonymAnalyzer.tokenStream("contents", // #A new StringReader("jumps")); // #A TermAttribute term = stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int i = 0;/*w w w . j a va 2 s . com*/ String[] expected = new String[] { "jumps", // #B "hops", // #B "leaps" }; // #B while (stream.incrementToken()) { assertEquals(expected[i], term.term()); int expectedPos; // #C if (i == 0) { // #C expectedPos = 1; // #C } else { // #C expectedPos = 0; // #C } // #C assertEquals(expectedPos, // #C posIncr.getPositionIncrement()); // #C i++; } assertEquals(3, i); }
From source file:analyzers.DebugAnalyzer.java
License:Apache License
/** * This method outputs token-by-token analysis of documents. * * @param reader the reader for the documents * @param analyzer the analyzer /*from w ww . ja v a2 s . c o m*/ * @throws IOException cannot load stream */ public static void showAnalysisFromStream(Reader reader, Analyzer analyzer) throws IOException { TokenStream stream = analyzer.tokenStream("text", reader); CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class); OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); try { stream.reset(); while (stream.incrementToken()) { // get starting and ending offsets int start = oa.startOffset(); int end = oa.endOffset(); // text of the token String token = cta.toString(); // part of speech tag for the token String tag = typeAtt.type(); System.out.printf("start: %4d\tend: %4d\tlength: %4d\ttag: %s\ttoken: %s\n", start, end, token.length(), tag, token); } } finally { stream.close(); } }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { AttributeSource term = stream.addAttribute(AttributeSource.class); while (stream.incrementToken()) { System.out.print("[" + term.term() + "] "); }/*from w ww . ja va 2 s. c o m*/ }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; LOGGER.info();/*from w w w. j a v a 2 s .c o m*/ System.out.print(position + ": "); } System.out.print("[" + term.term() + "] "); } LOGGER.info(); }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D LOGGER.info(); // #D System.out.print(position + ": "); // #D }/*from ww w . j a v a2 s. c o m*/ System.out.print("[" + // #E term.term() + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } LOGGER.info(); }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); TermAttribute termAttr = stream.addAttribute(TermAttribute.class); for (String expected : output) { Assert.assertTrue(stream.incrementToken()); Assert.assertEquals(expected, termAttr.term()); }//from w w w.j a v a 2s .c o m Assert.assertFalse(stream.incrementToken()); stream.close(); }
From source file:aos.lucene.analysis.AnalyzerUtils.java
License:Apache License
public static void displayPositionIncrements(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { LOGGER.info("posIncr=" + posIncr.getPositionIncrement()); }//from w w w . ja va 2 s . c om }
From source file:aos.lucene.analysis.Fragments.java
License:Apache License
public void frag3() throws Exception { Analyzer analyzer = null;/*w w w . java 2 s . co m*/ String text = null; TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = (PositionIncrementAttribute) stream .addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { LOGGER.info("posIncr=" + posIncr.getPositionIncrement()); } }
From source file:aos.lucene.analysis.i18n.ChineseDemo.java
License:Apache License
private static void analyze(String string, Analyzer analyzer) throws IOException { StringBuffer buffer = new StringBuffer(); TokenStream stream = analyzer.tokenStream("contents", new StringReader(string)); TermAttribute term = stream.addAttribute(TermAttribute.class); while (stream.incrementToken()) { //C buffer.append("["); buffer.append(term.term());/*from w w w . ja v a 2 s . c o m*/ buffer.append("] "); } String output = buffer.toString(); Frame f = new Frame(); f.setTitle(analyzer.getClass().getSimpleName() + " : " + string); f.setResizable(true); Font font = new Font(null, Font.PLAIN, 36); int width = getWidth(f.getFontMetrics(font), output); f.setSize((width < 250) ? 250 : width + 50, 75); // NOTE: if Label doesn't render the Chinese characters // properly, try using javax.swing.JLabel instead Label label = new Label(output); //D label.setSize(width, 75); label.setAlignment(Label.CENTER); label.setFont(font); f.add(label); f.setVisible(true); }