List of usage examples for org.apache.lucene.analysis TokenStream addAttribute
public final <T extends Attribute> T addAttribute(Class<T> attClass)
From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java
License:Apache License
private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException { if (!analyzeWildcard) { return super.getPrefixQuery(field, termStr); }//from ww w . ja va 2s. c om // get Analyzer from superclass and tokenize the term TokenStream source; try { source = getAnalyzer().tokenStream(field, termStr); source.reset(); } catch (IOException e) { return super.getPrefixQuery(field, termStr); } List<String> tlist = new ArrayList<>(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); while (true) { try { if (!source.incrementToken()) break; } catch (IOException e) { break; } tlist.add(termAtt.toString()); } try { source.close(); } catch (IOException e) { // ignore } if (tlist.size() == 1) { return super.getPrefixQuery(field, tlist.get(0)); } else { // build a boolean query with prefix on each one... List<BooleanClause> clauses = new ArrayList<>(); for (String token : tlist) { clauses.add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD)); } return getBooleanQuery(clauses, true); //return super.getPrefixQuery(field, termStr); /* this means that the analyzer used either added or consumed * (common for a stemmer) tokens, and we can't build a PrefixQuery */ // throw new ParseException("Cannot build PrefixQuery with analyzer " // + getAnalyzer().getClass() // + (tlist.size() > 1 ? " - token(s) added" : " - token consumed")); } }
From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.MapperQueryParser.java
License:Apache License
private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException { if (!analyzeWildcard) { return super.getWildcardQuery(field, termStr); }/* ww w .j a v a 2 s . com*/ boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*")); StringBuilder aggStr = new StringBuilder(); StringBuilder tmp = new StringBuilder(); for (int i = 0; i < termStr.length(); i++) { char c = termStr.charAt(i); if (c == '?' || c == '*') { if (isWithinToken) { try { TokenStream source = getAnalyzer().tokenStream(field, tmp.toString()); source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); if (source.incrementToken()) { String term = termAtt.toString(); if (term.length() == 0) { // no tokens, just use what we have now aggStr.append(tmp); } else { aggStr.append(term); } } else { // no tokens, just use what we have now aggStr.append(tmp); } source.close(); } catch (IOException e) { aggStr.append(tmp); } tmp.setLength(0); } isWithinToken = false; aggStr.append(c); } else { tmp.append(c); isWithinToken = true; } } if (isWithinToken) { try { TokenStream source = getAnalyzer().tokenStream(field, tmp.toString()); source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); if (source.incrementToken()) { String term = termAtt.toString(); if (term.length() == 0) { // no tokens, just use what we have now aggStr.append(tmp); } else { aggStr.append(term); } } else { // no tokens, just use what we have now aggStr.append(tmp); } source.close(); } catch (IOException e) { aggStr.append(tmp); } } return super.getWildcardQuery(field, aggStr.toString()); }
From source file:jp.sf.fess.solr.plugin.analysis.ja.TestJapaneseNumberFilter.java
License:Apache License
public void analyze(final Analyzer analyzer, final Reader reader, final Writer writer) throws IOException { final TokenStream stream = analyzer.tokenStream("dummy", reader); stream.reset();/*from w w w. j a va2 s . c o m*/ final CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { writer.write(termAttr.toString()); writer.write("\n"); } reader.close(); writer.close(); }
From source file:kafka.examples.Producer.java
License:Apache License
public void run() { while (true) { String access_token = "2.009F1d9BmHHChD7abcd6de0a0jui5Y"; int count = 20; Timeline tm = new Timeline(access_token); Analyzer analyzer4 = new IKAnalyzer(false);// ? try {/*from ww w.j a v a2 s. c om*/ StatusWapper status = tm.getPublicTimeline(count, 0); //------------------------------------------- try { TokenStream tokenstream = analyzer4.tokenStream("", new StringReader(status.toString())); CharTermAttribute termAttribute = tokenstream.addAttribute(CharTermAttribute.class);// token tokenstream.reset();// ? while (tokenstream.incrementToken()) {// ??token String prTxt = new String(termAttribute.buffer(), 0, termAttribute.length()); //producer.send(new KeyedMessage<Integer, String>(topic, ptTxt + " ")); System.out.print(prTxt + " "); } //System.out.println(); tokenstream.close();//TokenStream } catch (IOException e) { e.printStackTrace(); } //------------------------------------------- producer.send(new KeyedMessage<Integer, String>(topic, status.toString())); Log.logInfo(status.toString()); } catch (WeiboException e) { e.printStackTrace(); } } }
From source file:lia.analysis.CopyOfAnalyzerDemo.java
License:Apache License
private static void analyze(String text) throws IOException { System.out.println("Analyzing \"" + text + "\""); for (Analyzer analyzer : analyzers) { String name = analyzer.getClass().getSimpleName(); System.out.println(name); TokenStream stream = analyzer.tokenStream("dummy", text); stream.reset();/*from www.ja va2s. c o m*/ CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class); TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class); PositionIncrementAttribute positionAttr = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { System.out.print("[" + termAttr + "] "); } System.out.println(""); } }
From source file:lia.analysis.i18n.ChineseDemo.java
License:Apache License
private static void analyze(String string, Analyzer analyzer) throws IOException { StringBuffer buffer = new StringBuffer(); TokenStream stream = analyzer.tokenStream("contents", new StringReader(string)); TermAttribute term = stream.addAttribute(TermAttribute.class); while (stream.incrementToken()) { //C buffer.append("["); buffer.append(term.term());/*from w w w. java2 s . c o m*/ buffer.append("] "); } String output = buffer.toString(); Frame f = new Frame(); f.setTitle(analyzer.getClass().getSimpleName() + " : " + string); f.setResizable(true); Font font = new Font(null, Font.PLAIN, 36); int width = getWidth(f.getFontMetrics(font), output); f.setSize((width < 250) ? 250 : width + 50, 75); // NOTE: if Label doesn't render the Chinese characters // properly, try using javax.swing.JLabel instead JLabel label = new JLabel(output); //D label.setSize(width, 75); //label.setAlignment(JLabel.CENTER); label.setFont(font); f.add(label); f.setVisible(true); }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); //B }//from ww w. j a va 2 s . c o m }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); }//from www . j a va 2s. c o m System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); } System.out.println(); }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream tokenStream = analyzer.tokenStream("contents", // #A new StringReader(text)); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = tokenStream.addAttribute(TypeAttribute.class); // #B while (tokenStream.incrementToken()) { // #C int startOffset = offsetAttribute.startOffset(); System.out.println(startOffset); int endOffset = offsetAttribute.endOffset(); System.out.println(endOffset); String term = charTermAttribute.toString(); System.out.println(term); System.out.println(type.toString()); }// ww w. j ava 2 s . c o m }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class); for (String expected : output) { Assert.assertTrue(stream.incrementToken()); Assert.assertEquals(expected, termAttr.getBytesRef().utf8ToString()); }/*from w w w . j a v a2 s. com*/ Assert.assertFalse(stream.incrementToken()); stream.close(); }