List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:au.edu.unimelb.csse.servlet.PagingServlet.java
License:Apache License
@Override protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { String pageNumParam = req.getParameter("j"); String hash = req.getParameter("h"); String prevQuery = req.getParameter("p"); String prevCorpus = req.getParameter("c"); String docNumsParam = req.getParameter("d"); String totalHits = req.getParameter("t"); if ((pageNumParam == null || hash == null || prevQuery == null || docNumsParam == null || prevCorpus == null || totalHits == null)//from w w w .j av a2 s . c o m || (hashValue(prevQuery, prevCorpus, docNumsParam, totalHits) != Integer.parseInt(hash))) { req.setAttribute("error", "Oops! An error has occurred."); logger.warning("Error searching: " + prevQuery + ". Incorrect hidden parameters in page."); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); return; } int requestedPage = Integer.valueOf(pageNumParam); String[] docStrings = docNumsParam.split(" "); int[] docNums = new int[docStrings.length]; for (int i = 0; i < docStrings.length; i++) { docNums[i] = Integer.valueOf(docStrings[i]); } if (requestedPage - 1 > docNums.length) { req.setAttribute("error", "Oops! An error has occurred."); logger.warning("Error searching: " + prevQuery + ". Requested page exceeds number of result pages."); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); return; } String corpus = getCorpus(prevCorpus); req.setAttribute("corpus", corpus); res.setCharacterEncoding("UTF-8"); IndexSearcher searcher = getSearcher(corpus, req, res); if (searcher == null) { req.setAttribute("error", "Oops! An error has occurred. Search engine not initialized."); logger.warning("Error searching: " + prevQuery + ". Search engine not initialized."); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); return; } String queryView = getReturnQuery(prevQuery); req.setAttribute("query-view", queryView); try { TreebankQuery tq = getTreebankQuery(req, res, corpus, prevQuery, pageNumParam); long start = System.nanoTime(); SimpleHitCollector hitCollector = null; if (requestedPage == 1) { hitCollector = new SimpleHitCollector(MAX_RESULTS_PER_PAGE); searcher.search(tq, hitCollector); } else if (requestedPage % 10 < 6 && requestedPage % 10 > 1) { hitCollector = new SimpleHitCollector(MAX_RESULTS_PER_PAGE); searcher.truncatedSearch(tq, hitCollector, MAX_RESULTS_PER_PAGE, docNums[requestedPage - 2]); } else { if (requestedPage > docNums.length - 5) { int hitsToLoad = (docNums.length - requestedPage + 11) * MAX_RESULTS_PER_PAGE; hitCollector = new SimpleHitCollector(hitsToLoad); searcher.truncatedSearch(tq, hitCollector, hitsToLoad, docNums[requestedPage - 2]); int[] docs = hitCollector.lastDocOfEachPage(MAX_RESULTS_PER_PAGE); StringBuilder builder = new StringBuilder(docNumsParam); for (int i = docNums.length - requestedPage + 1; i < docs.length; i++) { builder.append(" "); builder.append(docs[i]); } docNumsParam = builder.toString(); } else { // it has been previously loaded hitCollector = new SimpleHitCollector(MAX_RESULTS_PER_PAGE); searcher.truncatedSearch(tq, hitCollector, MAX_RESULTS_PER_PAGE, docNums[requestedPage - 2]); } } int numberOfResults = hitCollector.totalHits < MAX_RESULTS_PER_PAGE ? hitCollector.totalHits : MAX_RESULTS_PER_PAGE; AllResults allResults = new AllResults(hitCollector.hits, numberOfResults, tq); Result[] resultMeta = allResults.collect(searcher); long end = System.nanoTime(); setSearchTimeAttribute(req, start, end); req.setAttribute("totalhits", Integer.valueOf(totalHits)); req.setAttribute("pagenum", requestedPage); req.setAttribute("docnums", docNumsParam); req.setAttribute("hash", hashValue(prevQuery, prevCorpus, docNumsParam, totalHits)); //should hash prevQuery and not queryview String[] results = new String[numberOfResults]; for (int i = 0; i < numberOfResults; i++) { results[i] = searcher.doc(hitCollector.hits[i]).get("sent").trim(); } req.setAttribute("results", results); req.setAttribute("metadata", resultMeta); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/results.jsp"); view.forward(req, res); } catch (ParseException e) { req.setAttribute("error", "Sorry! Cannot parse your query"); logger.info("Q=\"" + prevQuery + "\";C=\"" + corpus + "\";S=\"no\""); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); } catch (Exception e) { req.setAttribute("error", "Oops! An error has occurred. " + e.getMessage() + ". The administrator will be informed."); logger.warning("Error searching: " + prevQuery); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); } }
From source file:au.edu.unimelb.csse.servlet.QueryServletFull.java
License:Apache License
@Override protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { // req.setCharacterEncoding("utf-16"); final String corpusParam = req.getParameter("corpus"); String corpus = getCorpus(corpusParam); req.setAttribute("corpus", corpus); res.setCharacterEncoding("UTF-8"); IndexSearcher searcher = getSearcher(corpus, req, res); if (searcher == null) return;/*from w w w . j ava 2 s .c o m*/ String query = getQuery(req, res); if (query == null) return; String queryView = getReturnQuery(query); req.setAttribute("query-view", queryView); try { TreebankQuery tq = getTreebankQuery(req, res, corpus, query, null); SimpleHitCollector hitCollector = new SimpleHitCollector(100); long start = System.nanoTime(); searcher.search(tq, hitCollector); int numberOfResults = hitCollector.totalHits < MAX_RESULTS_PER_PAGE ? hitCollector.totalHits : MAX_RESULTS_PER_PAGE; AllResults allResults = new AllResults(hitCollector.hits, numberOfResults, tq); Result[] resultMeta = allResults.collect(searcher); long end = System.nanoTime(); setSearchTimeAttribute(req, start, end); req.setAttribute("totalhits", hitCollector.totalHits); String[] results = new String[numberOfResults]; for (int i = 0; i < numberOfResults; i++) { results[i] = searcher.doc(hitCollector.hits[i]).get("sent").trim(); } req.setAttribute("results", results); req.setAttribute("metadata", resultMeta); // attributes for pagination int[] docNumInts = hitCollector.lastDocOfEachPage(MAX_RESULTS_PER_PAGE); StringBuilder sb = new StringBuilder(); for (int i = 0; i < docNumInts.length; i++) { sb.append(docNumInts[i] + " "); } if (sb.length() > 0) { sb.deleteCharAt(sb.length() - 1); } req.setAttribute("pagenum", 1); final String docNums = sb.toString(); req.setAttribute("docnums", docNums); req.setAttribute("hash", hashValue(query, corpusParam, docNums, String.valueOf(hitCollector.totalHits))); //should hash value of `query' and not `queryview' RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/results.jsp"); view.forward(req, res); } catch (ParseException e) { req.setAttribute("error", "Sorry! Cannot parse your query"); logger.info("Q=\"" + query + "\";C=\"" + corpus + "\";S=\"no\""); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); } catch (Exception e) { req.setAttribute("error", "Oops! An error has occurred. " + e.getMessage() + ". The administrator will be informed."); logger.severe("Error searching: " + query); logger.severe(e.getMessage()); RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp"); view.forward(req, res); } }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
private String getValueFromIndex(IndexSearcher is, String searchField, String value, String retField) { TermQuery tq = new TermQuery(new Term(searchField, value)); try {/*ww w .j a v a2s. c om*/ org.apache.lucene.search.TopDocs results = is.search(tq, 1); if (results.totalHits > 0) return is.doc(results.scoreDocs[0].doc).get(retField); } catch (IOException e) { e.printStackTrace(); } return value; }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Determines whether or not the supplied taxon lsid was included in the * latest ANBG exports.//from w ww . j ava 2 s . co m * * @param is * @param lsid * @return */ private boolean doesTaxonConceptExist(IndexSearcher is, String lsid) { TermQuery query = new TermQuery(new Term("lsid", lsid)); try { org.apache.lucene.search.TopDocs results = is.search(query, 1); return results.totalHits > 0; } catch (IOException e) { return false; } }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
public Key[] getSourceNodeIds(final String id, final int mode, ArrayList protos, BooleanQuery append, Sort sort) throws Exception { IndexSearcher searcher = null; Hits hits = null;//from w w w . j a v a 2 s . c om Key[] keys = null; BooleanQuery query = null; try { searcher = this.getIndexSearcher(); query = new BooleanQuery(); final int sizeOfProtos; if ((sizeOfProtos = protos.size()) > 0) { BooleanQuery proto_query = new BooleanQuery(); for (int i = 0; i < sizeOfProtos; i++) { proto_query.add(new TermQuery(new Term(PROTOTYPE, (String) protos.get(i))), BooleanClause.Occur.SHOULD); } query.add(proto_query, BooleanClause.Occur.MUST); } query.add(new TermQuery(new Term(REF_LIST_FIELD, id)), BooleanClause.Occur.MUST); if (append != null && append.getClauses().length > 0) { query.add(append, BooleanClause.Occur.MUST); } hits = searcher.search(query, sort); /*if (app.debug()) app.logEvent("LuceneManager.getSourceNodeIds(): id=" + id + ",layer=" + mode + " executed query [" + query + " which resulted in " + hits.length() + " hits");*/ int size = hits.length(); ArrayList<Key> list = new ArrayList<Key>(); for (int i = 0; i < size; i++) { Document doc = hits.doc(i); if (!isIdInDocumentRefs(doc, id)) { continue; } Field id_field = doc.getField(ID); Field proto_field = doc.getField(PROTOTYPE); Field layer_field = doc.getField(LAYER_OF_SAVE); if (layer_field != null) { try { if (mode < Integer.parseInt(layer_field.stringValue())) { continue; } } catch (Exception nfe) { } } if (id_field != null && proto_field != null) { list.add(new DbKey(this.app.getDbMapping(proto_field.stringValue()), id_field.stringValue(), mode)); } } keys = new Key[list.size()]; list.toArray(keys); } catch (Exception ex) { app.logError( ErrorReporter.errorMsg(this.getClass(), "getSourceNodeIds") + "Could not retrieve document " + id + " from Lucene index with query = " + (query != null ? query : "null"), ex); throw ex; } finally { this.releaseIndexSearcher(searcher); } return keys; }
From source file:axiom.scripting.rhino.LuceneQueryDispatcher.java
License:Open Source License
private Object luceneHits(ArrayList prototypes, IFilter filter, SortObject sort, int maxResults, ArrayList opaths, IndexSearcher searcher, LuceneQueryParams params, int _layer) throws Exception { long start = System.currentTimeMillis(); BooleanQuery primary = new BooleanQuery(); final String PROTO = LuceneManager.PROTOTYPE; final BooleanClause.Occur SHOULD = BooleanClause.Occur.SHOULD; final BooleanClause.Occur MUST = BooleanClause.Occur.MUST; final TypeManager tmgr = this.app.typemgr; final ResourceProperties combined_props = new ResourceProperties(); Sort lsort = null;/* w w w . ja va 2 s .co m*/ final int length; if (prototypes != null && (length = prototypes.size()) > 0) { BooleanQuery proto_query = new BooleanQuery(); for (int i = 0; i < length; i++) { String prototype = (String) prototypes.get(i); proto_query.add(new TermQuery(new Term(PROTO, prototype)), SHOULD); Prototype proto = tmgr.getPrototype(prototype); Stack protos = new Stack(); while (proto != null) { protos.push(proto); proto = proto.getParentPrototype(); } final int protoChainSize = protos.size(); for (int j = 0; j < protoChainSize; j++) { proto = (Prototype) protos.pop(); combined_props.putAll(proto.getTypeProperties()); } } primary.add(proto_query, MUST); } else { ArrayList protoarr = app.getSearchablePrototypes(); BooleanQuery proto_query = new BooleanQuery(); for (int i = protoarr.size() - 1; i > -1; i--) { String protoName = (String) protoarr.get(i); proto_query.add(new TermQuery(new Term(PROTO, protoName)), SHOULD); Prototype proto = tmgr.getPrototype(protoName); Stack protos = new Stack(); while (proto != null) { protos.push(proto); proto = proto.getParentPrototype(); } final int protoChainSize = protos.size(); for (int j = 0; j < protoChainSize; j++) { proto = (Prototype) protos.pop(); combined_props.putAll(proto.getTypeProperties()); } } primary.add(proto_query, MUST); } parseFilterIntoQuery(filter, primary, combined_props); RequestEvaluator reqeval = this.app.getCurrentRequestEvaluator(); int layer = _layer; if (layer == -1) { layer = DbKey.LIVE_LAYER; if (reqeval != null) { layer = reqeval.getLayer(); } } BooleanQuery layerQuery = new BooleanQuery(); for (int i = 0; i <= layer; i++) { layerQuery.add(new TermQuery(new Term(LuceneManager.LAYER_OF_SAVE, i + "")), BooleanClause.Occur.SHOULD); } primary.add(layerQuery, BooleanClause.Occur.MUST); BooleanClause[] clauses = primary.getClauses(); if (clauses == null || clauses.length == 0) { throw new Exception("QueryBean.executeQuery(): The lucene query doesn't have any clauses!"); } if (filter.isCached()) { SimpleQueryFilter sqf = (SimpleQueryFilter) this.cache.get(primary); if (sqf == null) { sqf = new SimpleQueryFilter(primary); this.cache.put(primary, sqf); } } Object ret = null; int sizeOfResults = 0; try { if (app.debug()) { app.logEvent("running query " + primary + " with maxResults " + maxResults + " and sort " + (sort == null ? "null" : getLuceneSort(sort))); } if (sort != null && (lsort = getLuceneSort(sort)) != null) { if (maxResults == -1 || opaths.size() > 0) { Hits h = searcher.search(primary, lsort); sizeOfResults = h.length(); ret = h; } else { TopFieldDocs tfd = searcher.search(primary, null, maxResults, lsort); sizeOfResults = tfd.totalHits; ret = tfd; } } else { if (maxResults == -1 || opaths.size() > 0) { Hits h = searcher.search(primary); sizeOfResults = h.length(); ret = h; } else { TopDocs td = searcher.search(primary, null, maxResults); sizeOfResults = td.totalHits; ret = td; } } } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "luceneHits") + "Occured on query = " + primary, ex); } if (ret == null) { ret = (maxResults == -1 || opaths.size() > 0) ? new Boolean(true) : new Boolean(false); } if (params != null) { params.query = primary; params.max_results = maxResults; params.sort = lsort; params.rprops = combined_props; } if (app.debug()) { long time = System.currentTimeMillis() - start; app.logEvent("... took " + (time / 1000.0) + " seconds\n ------"); } return ret; }
From source file:back.Searcher.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * /*from www. j a v a 2 s . com*/ * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, List<String[]> resu, int consulta) throws IOException { //Relevante para Mineracao int[] relevantes = new int[3]; relevantes[consulta - 1] = 0; for (int i = 0; i < 200; i++) { String[] lista = resu.get(consulta - 1); if (Integer.parseInt(lista[i]) == 1) { relevantes[consulta - 1]++; } } int recuperados; int relevantesRecuperados = 0; // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; recuperados = numTotalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); //troquei end por numTotalHits //LAO PRINCIPAL for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.print((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } int docEncontrado = 0; if (path != null) { String docNumberString = path.substring(path.indexOf('#') + 1, path.indexOf(".txt")); docEncontrado = Integer.parseInt(docNumberString); } if (consulta != 0) { String[] lista = resu.get(consulta - 1); if (Integer.parseInt(lista[docEncontrado - 1]) == 1) { System.out.print(" [RELEVANT]"); relevantesRecuperados++; } } System.out.println(); } else { System.out.println((i + 1) + ". " + "No path for this document"); } } //mais matematica para mineracao if (consulta != 0) { double precision = ((double) relevantesRecuperados) / ((double) recuperados); double recall = ((double) relevantesRecuperados) / ((double) relevantes[consulta - 1]); double fmeasure = 2 * (precision * recall) / (precision + recall); System.out.println("Using information of Relevancy Matrix Row " + consulta); System.out.println("Precision: " + precision + " | Recall: " + recall); System.out.println("F-Measure: " + fmeasure); } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:bajavista.Buscador.java
public ArrayList<Informacion> buscarContenido(String busqueda) throws IOException, ParseException { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); File indexDirES = new File(dirIndexES); Directory indexES = FSDirectory.open(indexDirES); //File indexDirNONES = new File(dirIndexNONES); //Directory indexNONES = FSDirectory.open(indexDirNONES); // 2. Query/*from w w w . ja v a 2 s. c om*/ String querystr = busqueda; Query q = new QueryParser(Version.LUCENE_43, "text", analyzer).parse(querystr); //Query qNONES = new QueryParser(Version.LUCENE_43, "contenido", analyzer).parse(querystr); // 3. Search int hitsPage = 1024; IndexReader reader = DirectoryReader.open(indexES); IndexSearcher searcher = new IndexSearcher(reader); //IndexReader readerNONES = DirectoryReader.open(indexNONES); //IndexSearcher searcherNONES = new IndexSearcher(readerNONES); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPage, true); //TopScoreDocCollector collectorNONES = TopScoreDocCollector.create(hitsPage, true); searcher.search(q, collector); //searcherNONES.search(q, collectorNONES); ScoreDoc[] hits = collector.topDocs().scoreDocs; // ScoreDoc[] hitsNONES = collectorNONES.topDocs().scoreDocs; // 4. Return results for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document data = searcher.doc(docId); info = new Informacion(Integer.parseInt(data.get("idUser")), Long.parseLong(data.get("timestamp")), data.get("text"), Double.parseDouble(data.get("objective")), Double.parseDouble(data.get("subjective")), Double.parseDouble(data.get("positive")), Double.parseDouble(data.get("negative")), Integer.parseInt(data.get("need"))); listaInfo.add(info); } /*System.out.println("No ES Found " + hitsNONES.length + " hits."); for(int i=0;i<hitsNONES.length;++i) { int docId = hitsNONES[i].doc; Document d = searcherNONES.doc(docId); System.out.println((i + 1) + ". " + d.get("es") + "\t" + d.get("contenido")); }*/ reader.close(); //readerNONES.close(); return listaInfo; }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.newsFetch.storm.bolts.NewsItemToTermsBolt.java
License:Apache License
private void updateTermMap(DirectoryReader reader, IndexSearcher searcher, Map<String, Double> termMap, String id, String field, double weight) throws IOException { Query query = new TermQuery(new Term("id", id)); TopDocs topdocs = searcher.search(query, 1); if (topdocs.totalHits > 0) { int docNr = topdocs.scoreDocs[0].doc; Terms vector = reader.getTermVector(docNr, field); if (vector != null) { TermsEnum termsEnum;//w ww. jav a 2s . c o m termsEnum = vector.iterator(TermsEnum.EMPTY); BytesRef text; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); int docFreq = reader.docFreq(new Term(field, text)); // ignore really rare terms and really common terms double minFreq = reader.numDocs() * 0.0001; double maxFreq = reader.numDocs() / 3; //double minFreq = 0; //double maxFreq = Double.MAX_VALUE; if (docFreq > minFreq && docFreq < maxFreq) { double tf = 1 + ((double) termsEnum.totalTermFreq()) / reader.getSumTotalTermFreq(field); double idf = Math.log((double) reader.numDocs() / docFreq); if (!Double.isInfinite(idf)) { if (!termMap.containsKey(term)) { termMap.put(term, tf * idf * weight); } else { termMap.put(term, termMap.get(term) + tf * idf * weight); } } } } } else { logger.debug("no term available for doc=" + docNr + " and field=" + field); } } else { logger.warn("No documents found with id=" + id); } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.scorers.DatabaseLuceneScorer.java
License:Apache License
protected Map<String, Double> getTopTerms(String field, String value) throws IOException { manager.maybeRefreshBlocking();// w w w. ja v a 2s .c om IndexSearcher searcher = manager.acquire(); try (IndexReader reader = searcher.getIndexReader()) { TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); Query q = new TermQuery(new Term(field, value)); searcher.search(q, collector); if (collector.getTotalHits() > 0) { int docNr = collector.topDocs().scoreDocs[0].doc; Document doc = reader.document(docNr); NewsItem nitem = NewsItemLuceneDocConverter.documentToNewsItem(doc); return nitem.getTerms(); } else { logger.warn("Could not find document with " + field + "=" + value); } } manager.release(searcher); return new HashMap<>(); }