List of usage examples for org.apache.lucene.analysis TokenStream incrementToken
public abstract boolean incrementToken() throws IOException;
From source file:com.leavesfly.lia.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); TermAttribute term = stream.addAttribute(TermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D }// w w w. j ava 2 s .com System.out.print("[" + // #E term.term() + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } System.out.println(); }
From source file:com.leavesfly.lia.analysis.Fragments.java
License:Apache License
public void frag3() throws Exception { Analyzer analyzer = null;/* w w w . j a v a2 s . c o m*/ String text = null; // START TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); PositionIncrementAttribute posIncr = (PositionIncrementAttribute) stream .addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.println("posIncr=" + posIncr.getPositionIncrement()); } // END }
From source file:com.leavesfly.lia.analysis.i18n.ChineseDemo.java
License:Apache License
private static void analyze(String string, Analyzer analyzer) throws IOException { StringBuffer buffer = new StringBuffer(); TokenStream stream = analyzer.tokenStream("contents", new StringReader(string)); TermAttribute term = stream.addAttribute(TermAttribute.class); while (stream.incrementToken()) { // C buffer.append("["); buffer.append(term.term());/*from ww w.ja v a 2 s. com*/ buffer.append("] "); } String output = buffer.toString(); Frame f = new Frame(); f.setTitle(analyzer.getClass().getSimpleName() + " : " + string); f.setResizable(true); Font font = new Font(null, Font.PLAIN, 36); int width = getWidth(f.getFontMetrics(font), output); f.setSize((width < 250) ? 250 : width + 50, 75); // NOTE: if Label doesn't render the Chinese characters // properly, try using javax.swing.JLabel instead Label label = new Label(output); // D label.setSize(width, 75); label.setAlignment(Label.CENTER); label.setFont(font); f.add(label); f.setVisible(true); }
From source file:com.liferay.events.global.mobile.Utils.java
License:Open Source License
public static String removeStopWords(String words) throws IOException { if (Validator.isNull(EventContactServiceImpl.stopWords)) { EventContactServiceImpl.stopWords = new TreeSet<String>(); BufferedReader r = new BufferedReader(new InputStreamReader( EventContactService.class.getClassLoader().getResourceAsStream("stopwords/words.txt"))); String nextLine;// w w w . j av a2 s . c o m while ((nextLine = r.readLine()) != null) { String word = nextLine.trim(); if (Validator.isNotNull(word)) { EventContactServiceImpl.stopWords.add(nextLine.trim()); } } r.close(); } // remove punctuation and stuff final CharArraySet stopSet = new CharArraySet(Version.LUCENE_35, EventContactServiceImpl.stopWords, true); TokenStream tokenStream = new StopFilter(Version.LUCENE_35, new StandardTokenizer(Version.LUCENE_35, new StringReader(words)), stopSet); StringBuilder sb = new StringBuilder(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { String term = charTermAttribute.toString(); sb.append(term).append(" "); } return sb.toString(); }
From source file:com.lorelib.analyzer.sample.IKAnalzyerDemo.java
License:Apache License
public static void main(String[] args) { //IK?smart??/* w ww . j a v a 2s .c o m*/ Analyzer analyzer = new IKAnalyzer(true); //?LuceneTokenStream TokenStream ts = null; try { ts = analyzer.tokenStream("myfield", new StringReader( "?????IKAnalyer can analysis english text too")); //??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); //?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); //?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); //?TokenStream?StringReader ts.reset(); //?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } //TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); } finally { //TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.lou.simhasher.seg.WordsSegment.java
License:Open Source License
/** * ?//from w w w.jav a 2 s . c om * * @param str * @return */ public static List<String> getCutWords(String str) { Analyzer analyzer = new IKAnalyzer(); Reader r = new StringReader(str); TokenStream ts = analyzer.tokenStream("searchValue", r); ts.addAttribute(CharTermAttribute.class); List<String> list = new ArrayList<String>(); try { while (ts.incrementToken()) { CharTermAttribute ta = ts.getAttribute(CharTermAttribute.class); String word = ta.toString(); list.add(word); } } catch (IOException e) { logger.error("?IO" + e.getMessage()); } return list; }
From source file:com.mathworks.xzheng.advsearching.SpanQueryTest.java
License:Apache License
private void dumpSpans(SpanQuery query) throws IOException { Spans spans = query.getSpans(reader.getContext()); System.out.println(query + ":"); int numSpans = 0; TopDocs hits = searcher.search(query, 10); float[] scores = new float[2]; for (ScoreDoc sd : hits.scoreDocs) { scores[sd.doc] = sd.score;// www . ja v a 2s.c o m } while (spans.next()) { // A numSpans++; int id = spans.doc(); Document doc = reader.document(id); // B TokenStream stream = analyzer.tokenStream("contents", // C new StringReader(doc.get("f"))); // C CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); StringBuilder buffer = new StringBuilder(); buffer.append(" "); int i = 0; while (stream.incrementToken()) { // D if (i == spans.start()) { // E buffer.append("<"); // E } // E buffer.append(term.toString()); // E if (i + 1 == spans.end()) { // E buffer.append(">"); // E } // E buffer.append(" "); i++; } buffer.append("(").append(scores[id]).append(") "); System.out.println(buffer); } if (numSpans == 0) { System.out.println(" No spans"); } System.out.println(); }
From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { OffsetAttribute offsetAttribute = stream.addAttribute(OffsetAttribute.class); CharTermAttribute charTermAttribute = stream.addAttribute(CharTermAttribute.class); stream.reset();/*from w w w . jav a2s.co m*/ while (stream.incrementToken()) { int startOffset = offsetAttribute.startOffset(); int endOffset = offsetAttribute.endOffset(); System.out.print("[" + charTermAttribute.toString() + "] "); //B } }
From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); }/*from www . j a v a2 s . c o m*/ System.out.print("[" + term.toString() + "] "); } System.out.println(); }
From source file:com.mathworks.xzheng.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D }/*ww w. j av a 2s . co m*/ System.out.print("[" + // #E term.toString() + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } System.out.println(); }