List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:servlets.TermStatsComparator.java
String analyze(String query) { StringBuffer buff = new StringBuffer(); try {//from ww w .j a va 2 s . c o m Analyzer analyzer = retriever.getAnalyzer(); TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { String term = termAtt.toString(); buff.append(term); break; } stream.end(); stream.close(); } catch (Exception ex) { ex.printStackTrace(); return query; } return buff.toString(); }
From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java
License:Apache License
/** * Builds the prefix query.//from www .j a v a 2 s. c om * * @param searchString the search string * @param field the field * @param analyzer the analyzer * @return the query * @throws IOException Signals that an I/O exception has occurred. */ protected Query buildPrefixQuery(String searchString, String field, Analyzer analyzer) throws IOException { final TokenStream tokenStream; final List<String> terms; try (StringReader textReader = new StringReader(searchString)) { tokenStream = analyzer.tokenStream(field, textReader); tokenStream.reset(); terms = new ArrayList<>(); final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { terms.add(charTermAttribute.toString()); } } tokenStream.close(); analyzer.close(); final BooleanQuery.Builder bq = new BooleanQuery.Builder(); if ((terms.size() > 0) && !searchString.endsWith(" ")) { final String last = terms.remove(terms.size() - 1); bq.add(new PrefixQuery((new Term(field, last))), Occur.MUST); } terms.stream().forEach((s) -> { bq.add(new TermQuery(new Term(field, s)), Occur.MUST); }); return bq.build(); }
From source file:stackoverflow.lucene.modified.MoreLikeThis.java
License:Apache License
/** * Adds term frequencies found by tokenizing text from reader into the Map words * * @param r a source of text to be tokenized * @param termFreqMap a Map of terms and their frequencies * @param fieldName Used by analyzer for any special per-field analysis *///from w ww. ja va 2 s.c o m private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName) throws IOException { if (analyzer == null) { throw new UnsupportedOperationException( "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); } TokenStream ts = analyzer.tokenStream(fieldName, r); int tokenCount = 0; // for every token CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { String word = termAtt.toString(); tokenCount++; if (tokenCount > maxNumTokensParsed) { break; } if (isNoiseWord(word)) { continue; } // increment frequency Int cnt = termFreqMap.get(word); if (cnt == null) { termFreqMap.put(word, new Int()); } else { cnt.x++; } } ts.end(); ts.close(); }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { stream.reset();/* ww w.j a v a 2s. c o m*/ CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term + "] "); //B } stream.close(); }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); stream.reset();//from w w w. ja v a 2 s. co m int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); } System.out.print("[" + term + "] "); } System.out.println(); stream.close(); }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", // #A new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B PositionIncrementAttribute posIncr = // #B stream.addAttribute(PositionIncrementAttribute.class); // #B OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B stream.reset();// w ww. j a v a2 s . com int position = 0; while (stream.incrementToken()) { // #C int increment = posIncr.getPositionIncrement(); // #D if (increment > 0) { // #D position = position + increment; // #D System.out.println(); // #D System.out.print(position + ": "); // #D } System.out.print("[" + // #E term + ":" + // #E offset.startOffset() + "->" + // #E offset.endOffset() + ":" + // #E type.type() + "] "); // #E } System.out.println(); stream.close(); }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); for (String expected : output) { assertTrue(stream.incrementToken()); assertEquals(expected, termAttr.toString()); }/* w w w . ja v a2s . c om*/ assertFalse(stream.incrementToken()); stream.close(); }
From source file:test.AnalzyerDemo.java
License:Apache License
public static void main(String[] args) { Analyzer analyzer = new BaseAnalyzer(); // Analyzer analyzer = new org.apache.lucene.analysis.cjk.CJKAnalyzer(); // ?LuceneTokenStream TokenStream ts = null; try {//from www. ja v a 2 s. co m ts = analyzer.tokenStream("myfield", new StringReader( "????????????????2?3noneok???BaseAnalyer can analysis english text too")); // ??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); // ?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); // ?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); // ?TokenStream?StringReader ts.reset(); // ?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } // TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); analyzer.close(); } finally { // TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:text_analyzer.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); TermAttribute termAttr = stream.addAttribute(TermAttribute.class); for (String expected : output) { //Assert.assertTrue(stream.incrementToken()); //Assert.assertEquals(expected, termAttr.term()); }//from ww w. j a v a2s . c om //Assert.assertFalse(stream.incrementToken()); stream.close(); }
From source file:tweetembeding.AnalyzerClass.java
public String analizeString(String FIELD, String txt) throws IOException { this.analyzer = setAnalyzer(); TokenStream stream = analyzer.tokenStream(FIELD, new StringReader(txt)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset();/*from w w w . j a v a2s .c o m*/ StringBuffer tokenizedContentBuff = new StringBuffer(); while (stream.incrementToken()) { String term = termAtt.toString(); if (!term.equals("nbsp")) tokenizedContentBuff.append(term).append(" "); } stream.end(); stream.close(); return tokenizedContentBuff.toString(); }