List of usage examples for org.apache.lucene.analysis TokenStream addAttribute
public final <T extends Attribute> T addAttribute(Class<T> attClass)
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); stream.reset();//ww w . ja v a 2s . c o m int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); } System.out.print("[" + term + "] "); } System.out.println(); stream.close(); }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B stream.reset();/*from w ww . ja v a2 s . co m*/ int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D } System.out.print("[" + // #E term + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } System.out.println(); stream.close(); }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); for (String expected : output) { assertTrue(stream.incrementToken()); assertEquals(expected, termAttr.toString()); }// w ww . ja v a 2 s .c o m assertFalse(stream.incrementToken()); stream.close(); }
From source file:test.AnalzyerDemo.java
License:Apache License
public static void main(String[] args) { Analyzer analyzer = new BaseAnalyzer(); // Analyzer analyzer = new org.apache.lucene.analysis.cjk.CJKAnalyzer(); // ?LuceneTokenStream TokenStream ts = null; try {/*from w w w . j av a 2 s. co m*/ ts = analyzer.tokenStream("myfield", new StringReader( "????????????????2?3noneok???BaseAnalyer can analysis english text too")); // ??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); // ?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); // ?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); // ?TokenStream?StringReader ts.reset(); // ?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } // TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); analyzer.close(); } finally { // TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:text_analyzer.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); TermAttribute termAttr = stream.addAttribute(TermAttribute.class); for (String expected : output) { //Assert.assertTrue(stream.incrementToken()); //Assert.assertEquals(expected, termAttr.term()); }//w w w. j a v a 2 s.c o m //Assert.assertFalse(stream.incrementToken()); stream.close(); }
From source file:tweetembeding.AnalyzerClass.java
public String analizeString(String FIELD, String txt) throws IOException { this.analyzer = setAnalyzer(); TokenStream stream = analyzer.tokenStream(FIELD, new StringReader(txt)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset();// w ww.j a va2s .c o m StringBuffer tokenizedContentBuff = new StringBuffer(); while (stream.incrementToken()) { String term = termAtt.toString(); if (!term.equals("nbsp")) tokenizedContentBuff.append(term).append(" "); } stream.end(); stream.close(); return tokenizedContentBuff.toString(); }
From source file:ucas.IKAnalzyerDemo.java
License:Apache License
public static String Spilt2Words(String content) { String resString = ""; //IK?smart??// ww w .ja va 2 s .co m Analyzer analyzer = new IKAnalyzer(true); //?LuceneTokenStream TokenStream ts = null; try { //myfield?? ts = analyzer.tokenStream("myfield", new StringReader(content)); //?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); //?TokenStream?StringReader ts.reset(); //?? while (ts.incrementToken()) { resString += term.toString() + "|"; } //TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); } finally { //TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } return resString; }
From source file:uib.scratch.AnalyzerUtils.java
public static Token insertB(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); StringBuilder currenttoken = new StringBuilder(64); // currenttoken.append('['); char[] character = new char[1]; int i = posIncr.getPositionIncrement(); // reset our states :) //posIncr//from www. java 2 s . c o m boolean tokenstart = false; boolean tokenend = false; stream.reset(); while (stream.incrementToken()) { /* end of stream reached ... if (i == 0) return null; if (character[0] == '[') { // token starts here ... tokenstart = true; } else if (character[0] == ']') { // token ends here ... tokenend = true; } else if (tokenstart && !tokenend) { // between end and start ... currenttoken.append(character[0]); } // we found our token and return it ... if (tokenstart && tokenend) { // currenttoken.append(']'); // prepend a token because lucene does not allow leading wildcards. //currenttoken.insert(0, '_');*/ //String tokenString = currenttoken.toString().toLowerCase().replace(' ', '_').trim(); String tokenString = term.toString(); Token t = new Token(tokenString, 0, tokenString.length() - 1); System.out.println(t); //return t; } return null; }
From source file:uib.scratch.AnalyzerUtils.java
public static void insertBracket(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); StringBuilder currentToken = new StringBuilder(64); int position = 0; while (stream.incrementToken()) { //final String token = new StringTokenizer(); int increment = posIncr.getPositionIncrement(); if (increment > 0) { position += increment;/*from ww w .j av a 2s. co m*/ offset.endOffset(); currentToken.append(term); currentToken.insert(0, "_"); String tokenString = currentToken.toString().toLowerCase().replace(' ', '_').trim(); Token t = new Token(tokenString, 0, tokenString.length() - 1); t.setTermBuffer(tokenString); System.out.println("test " + " " + t); } } }
From source file:uib.scratch.AnalyzerUtils.java
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; stream.reset();//from w w w. j a va2s . co m while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D } System.out.print("[" + // #E term.term() + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } System.out.println(); }