List of usage examples for org.apache.lucene.analysis TokenStream reset
public void reset() throws IOException
From source file:org.opengrok.web.api.v1.suggester.query.SuggesterQueryParser.java
License:Open Source License
private static List<String> getAllTokens(final Analyzer analyzer, final String field, final String text) { List<String> tokens = new LinkedList<>(); TokenStream ts = null; try {/*from w w w .j a v a2s . c om*/ ts = analyzer.tokenStream(field, text); CharTermAttribute attr = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { tokens.add(attr.toString()); } } catch (IOException e) { logger.log(Level.WARNING, "Could not analyze query text", e); } finally { try { if (ts != null) { ts.end(); ts.close(); } } catch (IOException e) { logger.log(Level.WARNING, "Could not close token stream", e); } } return tokens; }
From source file:org.opensextant.solrtexttagger.Tagger.java
License:Open Source License
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream, TagClusterReducer tagClusterReducer, boolean skipAltTokens, boolean ignoreStopWords) throws IOException { this.terms = terms; this.liveDocs = liveDocs; this.tokenStream = tokenStream; this.skipAltTokens = skipAltTokens; this.ignoreStopWords = ignoreStopWords; // termAtt = tokenStream.addAttribute(CharTermAttribute.class); byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); lookupAtt = tokenStream.addAttribute(TaggingAttribute.class); tokenStream.reset(); this.tagClusterReducer = tagClusterReducer; }
From source file:org.pageseeder.flint.lucene.query.Queries.java
License:Apache License
/** * Returns the terms for a field//from ww w . ja va 2s . c om * * @param field The field * @param text The text to analyze * @param analyzer The analyzer * * @return the corresponding list of terms produced by the analyzer. * * @throws IOException */ private static void addTermsToPhrase(String field, String text, Analyzer analyzer, PhraseQuery phrase) { try { TokenStream stream = analyzer.tokenStream(field, text); PositionIncrementAttribute increment = stream.addAttribute(PositionIncrementAttribute.class); CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class); int position = -1; stream.reset(); while (stream.incrementToken()) { position += increment.getPositionIncrement(); Term term = new Term(field, attribute.toString()); phrase.add(term, position); } stream.end(); stream.close(); } catch (IOException ex) { // Should not occur since we use a StringReader ex.printStackTrace(); } }
From source file:org.pageseeder.flint.lucene.query.Queries.java
License:Apache License
private static boolean isTokenized(String field, Analyzer analyzer) { // try to load terms for a phrase and return true if more than one term TokenStream stream = null; try {//from ww w. j a v a 2 s. c om stream = analyzer.tokenStream(field, "word1 word2"); stream.reset(); if (stream.incrementToken()) { return stream.incrementToken(); } } catch (IOException ex) { // Should not occur since we use a StringReader ex.printStackTrace(); } finally { if (stream != null) try { stream.end(); stream.close(); } catch (IOException ex) { // Should not occur since we use a StringReader ex.printStackTrace(); } } return false; }
From source file:org.pageseeder.flint.lucene.search.Fields.java
License:Apache License
/** * Returns the terms for a field/*from www . j a va 2 s .c o m*/ * * @param field The field * @param text The text to analyze * @param analyzer The analyzer * * @return the corresponding list of terms produced by the analyzer. * * @throws IOException */ public static List<String> toTerms(String field, String text, Analyzer analyzer) { List<String> terms = new ArrayList<String>(); try { TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { String term = attribute.toString(); terms.add(term); } stream.end(); stream.close(); } catch (IOException ex) { // Should not occur since we use a StringReader ex.printStackTrace(); } return terms; }
From source file:org.sc.probro.lucene.BiothesaurusSearcher.java
License:Apache License
public Query createQuery(String phrase) { phrase = phrase.trim();//from w w w .j a v a 2s . c o m TermQuery idQuery = new TermQuery(new Term("protein-id", phrase.toUpperCase())); TermQuery accQuery = new TermQuery(new Term("accession", phrase.toUpperCase())); ArrayList<TermQuery> descQueries = new ArrayList<TermQuery>(); ArrayList<Term> descTerms = new ArrayList<Term>(); TokenStream stream = analyzer.tokenStream("description", new StringReader(phrase)); TermAttribute attr = (TermAttribute) stream.getAttribute(TermAttribute.class); try { stream.reset(); Term lastTerm = null; while (stream.incrementToken()) { Term t = new Term("description", attr.term()); descQueries.add(new TermQuery(t)); descTerms.add(t); if (lastTerm != null) { Term hyph = new Term("description", lastTerm.text() + "-" + t.text()); descQueries.add(new TermQuery(hyph)); //descTerms.add(hyph); } lastTerm = t; } } catch (IOException e) { e.printStackTrace(); } return createDisjunction(2.0f, createMust(idQuery), createMust(accQuery), //createShould(descQueries.toArray(new Query[0]))); createDescendingQuery(descTerms.toArray(new Term[0]))); }
From source file:org.sc.probro.lucene.BiothesaurusSearcher.java
License:Apache License
public String[] tokenize(String input) { ArrayList<String> tokens = new ArrayList<String>(); try {/*from www.j av a 2 s .co m*/ TokenStream stream = analyzer.tokenStream(null, new StringReader(input)); TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); //stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { String term = termattr.term(); tokens.add(term); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(System.err); } catch (IOException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(); } return tokens.toArray(new String[0]); }
From source file:org.sc.probro.lucene.BiothesaurusSearcher.java
License:Apache License
public Query createPhraseQuery(String field, String phrase) throws IOException { PhraseQuery query = new PhraseQuery(); /*// ww w . j av a 2 s . com String[] array = phrase.split("\\s+"); for(int i = 0; i < array.length; i++) { query.add(new Term(field, array[i])); } */ try { TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase)); //stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); Term t = new Term(field, termattr.term()); query.add(t); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { e.printStackTrace(System.err); System.err.println(String.format("Phrase: \"%s\"", phrase)); } return query; }
From source file:org.sc.probro.lucene.ProteinSearcher.java
License:Apache License
public String[] tokenize(String input) { ArrayList<String> tokens = new ArrayList<String>(); try {/*from w ww . j a v a 2s. com*/ TokenStream stream = analyzer.tokenStream(null, new StringReader(input)); stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); String term = termattr.term(); tokens.add(term); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(System.err); } catch (IOException e) { System.err.println(String.format("Phrase: \"%s\"", input)); e.printStackTrace(); } return tokens.toArray(new String[0]); }
From source file:org.sc.probro.lucene.ProteinSearcher.java
License:Apache License
public Query createPhraseQuery(String field, String phrase) throws IOException { PhraseQuery query = new PhraseQuery(); /*/*w ww . ja va 2 s. c om*/ String[] array = phrase.split("\\s+"); for(int i = 0; i < array.length; i++) { query.add(new Term(field, array[i])); } */ try { TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase)); stream = new LowerCaseFilter(stream); stream.reset(); while (stream.incrementToken()) { if (stream.hasAttribute(TermAttribute.class)) { TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class); Term t = new Term(field, termattr.term()); query.add(t); } } stream.end(); stream.close(); } catch (IllegalArgumentException e) { e.printStackTrace(System.err); System.err.println(String.format("Phrase: \"%s\"", phrase)); } return query; }