List of usage examples for org.apache.lucene.analysis TokenStream addAttribute
public final <T extends Attribute> T addAttribute(Class<T> attClass)
From source file:practica3b.Practica3b.java
public static TopDocs busquedaAuthor(IndexSearcher is, String tipo, String tipo_year, String authors, Integer num1, Integer num2, FacetsCollector fc) throws IOException { Analyzer analizador = new StandardAnalyzer(); List<String> palabras = new ArrayList<String>(); try {//from w w w . j ava 2 s . c o m TokenStream stream = analizador.tokenStream(null, new StringReader(authors)); CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { palabras.add(catt.toString()); } stream.close(); stream.end(); } catch (IOException e) { throw new RuntimeException(e); } ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>(); for (int i = 0; i < palabras.size(); i++) { Query query = new TermQuery(new Term("Authors", palabras.get(0))); if (tipo.equals("should")) bc.add(new BooleanClause(query, BooleanClause.Occur.SHOULD)); else if (tipo.equals("must")) bc.add(new BooleanClause(query, BooleanClause.Occur.MUST)); } BooleanQuery.Builder bqbuilder = new BooleanQuery.Builder(); for (int i = 0; i < bc.size(); i++) { bqbuilder.add(bc.get(i)); } if (num1 != null) { Query q; if (num2 == null) { q = IntPoint.newExactQuery("Year", num1); bqbuilder.add(q, BooleanClause.Occur.MUST); } else { if (tipo_year.equals("range")) { q = IntPoint.newRangeQuery("Year", num1, num2); bqbuilder.add(q, BooleanClause.Occur.MUST); } else { q = IntPoint.newSetQuery("Year", num1, num2); bqbuilder.add(q, BooleanClause.Occur.MUST); } } } BooleanQuery bq = bqbuilder.build(); fc = new FacetsCollector(); TopDocs td = FacetsCollector.search(is, bq, 10, fc); for (ScoreDoc scoreDoc : td.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(scoreDoc.score + " - " + doc.get("Authors") + " - " + doc.get("Title") + " - Year: " + doc.get("Year")); } return td; }
From source file:practica3b.Practica3b.java
public static ArrayList<BooleanClause> createClause(String busqueda, int tipo_busqueda, String tipo) { Analyzer analizador;//from ww w. j a va 2 s.co m List<String> palabras = new ArrayList<String>(); if (tipo_busqueda == 1) { analizador = new EnglishAnalyzer(); } else if (tipo_busqueda == 2) { analizador = new StandardAnalyzer(); } else { analizador = new EnglishAnalyzer(); } try { TokenStream stream = analizador.tokenStream(null, new StringReader(busqueda)); CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { palabras.add(catt.toString()); } stream.close(); stream.end(); } catch (IOException e) { throw new RuntimeException(e); } ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>(); for (int i = 0; i < palabras.size(); i++) { Query q; if (tipo_busqueda == 1) q = new TermQuery(new Term("Title", palabras.get(i))); else if (tipo_busqueda == 2) q = new TermQuery(new Term("Authors", palabras.get(i))); else q = new TermQuery(new Term("Abstract", palabras.get(i))); if (tipo.equals("should")) bc.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); else if (tipo.equals("must")) bc.add(new BooleanClause(q, BooleanClause.Occur.MUST)); } return bc; }
From source file:retriever.TermFreq.java
String analyze(String query) throws Exception { StringBuffer buff = new StringBuffer(); TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset();/*from www .j ava2s . c om*/ while (stream.incrementToken()) { String term = termAtt.toString(); term = term.toLowerCase(); buff.append(term).append(" "); } stream.end(); stream.close(); return buff.toString(); }
From source file:ri.trabri.Lucene.java
protected ArrayList<String> geraTokens(String text) throws IOException { TokenStream stream = this.analyzer.tokenStream(null, new StringReader(text)); ArrayList<String> words = new ArrayList<>(); CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); stream.reset();/*from w ww. j a va 2 s . com*/ while (stream.incrementToken()) { //System.out.println(cattr.toString()); words.add(cattr.toString()); } stream.end(); stream.close(); return words; }
From source file:se.inera.intyg.webcert.web.service.diagnos.repo.DiagnosRepositoryImpl.java
License:Open Source License
@Override public List<Diagnos> searchDiagnosisByDescription(String searchString, int nbrOfResults) { if (Strings.isNullOrEmpty(searchString)) { return Collections.emptyList(); }/*from www. j a v a 2 s . co m*/ BooleanQuery query = new BooleanQuery(); try (StandardAnalyzer analyzer = new StandardAnalyzer()) { TokenStream tokenStream = analyzer.tokenStream(DESC, searchString); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { String term = WildcardQuery.WILDCARD_STRING + charTermAttribute.toString() + WildcardQuery.WILDCARD_STRING; query.add(new WildcardQuery(new Term(DESC, term)), BooleanClause.Occur.MUST); } } catch (IOException e) { throw new RuntimeException("IOException occurred in lucene index search", e); } return searchDiagnosisByQuery(query, nbrOfResults); }
From source file:servlets.TermStatsComparator.java
String analyze(String query) { StringBuffer buff = new StringBuffer(); try {//w ww . java 2 s. com Analyzer analyzer = retriever.getAnalyzer(); TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { String term = termAtt.toString(); buff.append(term); break; } stream.end(); stream.close(); } catch (Exception ex) { ex.printStackTrace(); return query; } return buff.toString(); }
From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java
License:Apache License
/** * Builds the prefix query./*w ww .j a v a2 s .com*/ * * @param searchString the search string * @param field the field * @param analyzer the analyzer * @return the query * @throws IOException Signals that an I/O exception has occurred. */ protected Query buildPrefixQuery(String searchString, String field, Analyzer analyzer) throws IOException { final TokenStream tokenStream; final List<String> terms; try (StringReader textReader = new StringReader(searchString)) { tokenStream = analyzer.tokenStream(field, textReader); tokenStream.reset(); terms = new ArrayList<>(); final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()) { terms.add(charTermAttribute.toString()); } } tokenStream.close(); analyzer.close(); final BooleanQuery.Builder bq = new BooleanQuery.Builder(); if ((terms.size() > 0) && !searchString.endsWith(" ")) { final String last = terms.remove(terms.size() - 1); bq.add(new PrefixQuery((new Term(field, last))), Occur.MUST); } terms.stream().forEach((s) -> { bq.add(new TermQuery(new Term(field, s)), Occur.MUST); }); return bq.build(); }
From source file:stackoverflow.lucene.modified.MoreLikeThis.java
License:Apache License
/** * Adds term frequencies found by tokenizing text from reader into the Map words * * @param r a source of text to be tokenized * @param termFreqMap a Map of terms and their frequencies * @param fieldName Used by analyzer for any special per-field analysis *//*from w w w. j av a2s . c o m*/ private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName) throws IOException { if (analyzer == null) { throw new UnsupportedOperationException( "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); } TokenStream ts = analyzer.tokenStream(fieldName, r); int tokenCount = 0; // for every token CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { String word = termAtt.toString(); tokenCount++; if (tokenCount > maxNumTokensParsed) { break; } if (isNoiseWord(word)) { continue; } // increment frequency Int cnt = termFreqMap.get(word); if (cnt == null) { termFreqMap.put(word, new Int()); } else { cnt.x++; } } ts.end(); ts.close(); }
From source file:stroom.search.server.TestStandardAnalyser.java
License:Apache License
private void testAnalyser(final String input, final Analyzer analyzer) throws Exception { System.out.println("Testing analyser: " + analyzer.getClass().getName()); final ReusableStringReader reader = new ReusableStringReader(); reader.init(input);/*from ww w. j ava 2s. c om*/ final TokenStream stream = analyzer.tokenStream("Test", reader); // reset the TokenStream to the first token stream.reset(); boolean hasMoreTokens = stream.incrementToken(); final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); for (;;) { if (!hasMoreTokens) break; // Get the text of this term. final char[] tokenText = termAtt.buffer(); final int tokenTextLen = termAtt.length(); System.out.println(new String(tokenText, 0, tokenTextLen)); hasMoreTokens = stream.incrementToken(); } }
From source file:test.analysis.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { stream.reset();/*from w w w.j ava 2 s . c o m*/ CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term + "] "); //B } stream.close(); }