Example usage for org.apache.lucene.search IndexSearcher search

List of usage examples for org.apache.lucene.search IndexSearcher search

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher search.

Prototype

public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
        throws IOException 

Source Link

Document

Lower-level search API.

Usage

From source file:au.edu.unimelb.csse.servlet.PagingServlet.java

License:Apache License

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
    String pageNumParam = req.getParameter("j");
    String hash = req.getParameter("h");
    String prevQuery = req.getParameter("p");
    String prevCorpus = req.getParameter("c");
    String docNumsParam = req.getParameter("d");
    String totalHits = req.getParameter("t");
    if ((pageNumParam == null || hash == null || prevQuery == null || docNumsParam == null || prevCorpus == null
            || totalHits == null)//from w w w .j  av a2 s  .  c  o  m
            || (hashValue(prevQuery, prevCorpus, docNumsParam, totalHits) != Integer.parseInt(hash))) {
        req.setAttribute("error", "Oops! An error has occurred.");
        logger.warning("Error searching: " + prevQuery + ". Incorrect hidden parameters in page.");
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
        return;
    }
    int requestedPage = Integer.valueOf(pageNumParam);

    String[] docStrings = docNumsParam.split(" ");
    int[] docNums = new int[docStrings.length];
    for (int i = 0; i < docStrings.length; i++) {
        docNums[i] = Integer.valueOf(docStrings[i]);
    }

    if (requestedPage - 1 > docNums.length) {
        req.setAttribute("error", "Oops! An error has occurred.");
        logger.warning("Error searching: " + prevQuery + ". Requested page exceeds number of result pages.");
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
        return;
    }

    String corpus = getCorpus(prevCorpus);
    req.setAttribute("corpus", corpus);
    res.setCharacterEncoding("UTF-8");

    IndexSearcher searcher = getSearcher(corpus, req, res);
    if (searcher == null) {
        req.setAttribute("error", "Oops! An error has occurred. Search engine not initialized.");
        logger.warning("Error searching: " + prevQuery + ". Search engine not initialized.");
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
        return;
    }

    String queryView = getReturnQuery(prevQuery);
    req.setAttribute("query-view", queryView);

    try {
        TreebankQuery tq = getTreebankQuery(req, res, corpus, prevQuery, pageNumParam);
        long start = System.nanoTime();
        SimpleHitCollector hitCollector = null;
        if (requestedPage == 1) {
            hitCollector = new SimpleHitCollector(MAX_RESULTS_PER_PAGE);
            searcher.search(tq, hitCollector);
        } else if (requestedPage % 10 < 6 && requestedPage % 10 > 1) {
            hitCollector = new SimpleHitCollector(MAX_RESULTS_PER_PAGE);
            searcher.truncatedSearch(tq, hitCollector, MAX_RESULTS_PER_PAGE, docNums[requestedPage - 2]);
        } else {
            if (requestedPage > docNums.length - 5) {
                int hitsToLoad = (docNums.length - requestedPage + 11) * MAX_RESULTS_PER_PAGE;
                hitCollector = new SimpleHitCollector(hitsToLoad);
                searcher.truncatedSearch(tq, hitCollector, hitsToLoad, docNums[requestedPage - 2]);
                int[] docs = hitCollector.lastDocOfEachPage(MAX_RESULTS_PER_PAGE);
                StringBuilder builder = new StringBuilder(docNumsParam);
                for (int i = docNums.length - requestedPage + 1; i < docs.length; i++) {
                    builder.append(" ");
                    builder.append(docs[i]);
                }
                docNumsParam = builder.toString();
            } else {
                // it has been previously loaded
                hitCollector = new SimpleHitCollector(MAX_RESULTS_PER_PAGE);
                searcher.truncatedSearch(tq, hitCollector, MAX_RESULTS_PER_PAGE, docNums[requestedPage - 2]);
            }
        }
        int numberOfResults = hitCollector.totalHits < MAX_RESULTS_PER_PAGE ? hitCollector.totalHits
                : MAX_RESULTS_PER_PAGE;
        AllResults allResults = new AllResults(hitCollector.hits, numberOfResults, tq);
        Result[] resultMeta = allResults.collect(searcher);
        long end = System.nanoTime();

        setSearchTimeAttribute(req, start, end);
        req.setAttribute("totalhits", Integer.valueOf(totalHits));
        req.setAttribute("pagenum", requestedPage);
        req.setAttribute("docnums", docNumsParam);
        req.setAttribute("hash", hashValue(prevQuery, prevCorpus, docNumsParam, totalHits)); //should hash prevQuery and not queryview
        String[] results = new String[numberOfResults];
        for (int i = 0; i < numberOfResults; i++) {
            results[i] = searcher.doc(hitCollector.hits[i]).get("sent").trim();
        }
        req.setAttribute("results", results);
        req.setAttribute("metadata", resultMeta);
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/results.jsp");
        view.forward(req, res);
    } catch (ParseException e) {
        req.setAttribute("error", "Sorry! Cannot parse your query");
        logger.info("Q=\"" + prevQuery + "\";C=\"" + corpus + "\";S=\"no\"");
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
    } catch (Exception e) {
        req.setAttribute("error",
                "Oops! An error has occurred. " + e.getMessage() + ". The administrator will be informed.");
        logger.warning("Error searching: " + prevQuery);
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
    }

}

From source file:au.edu.unimelb.csse.servlet.QueryServletFull.java

License:Apache License

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException {
    // req.setCharacterEncoding("utf-16");
    final String corpusParam = req.getParameter("corpus");
    String corpus = getCorpus(corpusParam);
    req.setAttribute("corpus", corpus);
    res.setCharacterEncoding("UTF-8");

    IndexSearcher searcher = getSearcher(corpus, req, res);
    if (searcher == null)
        return;/*from w  w  w  .  j  ava 2 s .c  o m*/

    String query = getQuery(req, res);
    if (query == null)
        return;

    String queryView = getReturnQuery(query);
    req.setAttribute("query-view", queryView);

    try {
        TreebankQuery tq = getTreebankQuery(req, res, corpus, query, null);
        SimpleHitCollector hitCollector = new SimpleHitCollector(100);
        long start = System.nanoTime();
        searcher.search(tq, hitCollector);
        int numberOfResults = hitCollector.totalHits < MAX_RESULTS_PER_PAGE ? hitCollector.totalHits
                : MAX_RESULTS_PER_PAGE;
        AllResults allResults = new AllResults(hitCollector.hits, numberOfResults, tq);
        Result[] resultMeta = allResults.collect(searcher);
        long end = System.nanoTime();
        setSearchTimeAttribute(req, start, end);

        req.setAttribute("totalhits", hitCollector.totalHits);
        String[] results = new String[numberOfResults];
        for (int i = 0; i < numberOfResults; i++) {
            results[i] = searcher.doc(hitCollector.hits[i]).get("sent").trim();
        }
        req.setAttribute("results", results);
        req.setAttribute("metadata", resultMeta);

        // attributes for pagination
        int[] docNumInts = hitCollector.lastDocOfEachPage(MAX_RESULTS_PER_PAGE);
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < docNumInts.length; i++) {
            sb.append(docNumInts[i] + " ");
        }
        if (sb.length() > 0) {
            sb.deleteCharAt(sb.length() - 1);
        }
        req.setAttribute("pagenum", 1);
        final String docNums = sb.toString();
        req.setAttribute("docnums", docNums);
        req.setAttribute("hash",
                hashValue(query, corpusParam, docNums, String.valueOf(hitCollector.totalHits))); //should hash value of `query' and not `queryview' 
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/results.jsp");
        view.forward(req, res);
    } catch (ParseException e) {
        req.setAttribute("error", "Sorry! Cannot parse your query");
        logger.info("Q=\"" + query + "\";C=\"" + corpus + "\";S=\"no\"");
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
    } catch (Exception e) {
        req.setAttribute("error",
                "Oops! An error has occurred. " + e.getMessage() + ". The administrator will be informed.");
        logger.severe("Error searching: " + query);
        logger.severe(e.getMessage());
        RequestDispatcher view = req.getRequestDispatcher("/WEB-INF/error.jsp");
        view.forward(req, res);
    }
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

private String getValueFromIndex(IndexSearcher is, String searchField, String value, String retField) {
    TermQuery tq = new TermQuery(new Term(searchField, value));
    try {/*ww w  .j  a v a2s. c  om*/
        org.apache.lucene.search.TopDocs results = is.search(tq, 1);
        if (results.totalHits > 0)
            return is.doc(results.scoreDocs[0].doc).get(retField);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return value;
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Determines whether or not the supplied taxon lsid was included in the
 * latest ANBG exports.//from w ww  .  j  ava 2  s . co  m
 *
 * @param is
 * @param lsid
 * @return
 */
private boolean doesTaxonConceptExist(IndexSearcher is, String lsid) {
    TermQuery query = new TermQuery(new Term("lsid", lsid));
    try {
        org.apache.lucene.search.TopDocs results = is.search(query, 1);
        return results.totalHits > 0;
    } catch (IOException e) {
        return false;
    }

}

From source file:axiom.objectmodel.dom.LuceneManager.java

License:Open Source License

public Key[] getSourceNodeIds(final String id, final int mode, ArrayList protos, BooleanQuery append, Sort sort)
        throws Exception {
    IndexSearcher searcher = null;
    Hits hits = null;//from   w w  w  . j a  v  a  2  s . c om
    Key[] keys = null;
    BooleanQuery query = null;

    try {
        searcher = this.getIndexSearcher();
        query = new BooleanQuery();
        final int sizeOfProtos;
        if ((sizeOfProtos = protos.size()) > 0) {
            BooleanQuery proto_query = new BooleanQuery();
            for (int i = 0; i < sizeOfProtos; i++) {
                proto_query.add(new TermQuery(new Term(PROTOTYPE, (String) protos.get(i))),
                        BooleanClause.Occur.SHOULD);
            }
            query.add(proto_query, BooleanClause.Occur.MUST);
        }

        query.add(new TermQuery(new Term(REF_LIST_FIELD, id)), BooleanClause.Occur.MUST);

        if (append != null && append.getClauses().length > 0) {
            query.add(append, BooleanClause.Occur.MUST);
        }

        hits = searcher.search(query, sort);

        /*if (app.debug())
           app.logEvent("LuceneManager.getSourceNodeIds(): id=" + id + ",layer=" + mode
          + " executed query [" + query + " which resulted in " 
          + hits.length() + " hits");*/

        int size = hits.length();
        ArrayList<Key> list = new ArrayList<Key>();
        for (int i = 0; i < size; i++) {
            Document doc = hits.doc(i);

            if (!isIdInDocumentRefs(doc, id)) {
                continue;
            }

            Field id_field = doc.getField(ID);
            Field proto_field = doc.getField(PROTOTYPE);
            Field layer_field = doc.getField(LAYER_OF_SAVE);
            if (layer_field != null) {
                try {
                    if (mode < Integer.parseInt(layer_field.stringValue())) {
                        continue;
                    }
                } catch (Exception nfe) {
                }
            }
            if (id_field != null && proto_field != null) {
                list.add(new DbKey(this.app.getDbMapping(proto_field.stringValue()), id_field.stringValue(),
                        mode));
            }
        }

        keys = new Key[list.size()];
        list.toArray(keys);
    } catch (Exception ex) {
        app.logError(
                ErrorReporter.errorMsg(this.getClass(), "getSourceNodeIds") + "Could not retrieve document "
                        + id + " from Lucene index with query = " + (query != null ? query : "null"),
                ex);
        throw ex;
    } finally {
        this.releaseIndexSearcher(searcher);
    }

    return keys;
}

From source file:axiom.scripting.rhino.LuceneQueryDispatcher.java

License:Open Source License

private Object luceneHits(ArrayList prototypes, IFilter filter, SortObject sort, int maxResults,
        ArrayList opaths, IndexSearcher searcher, LuceneQueryParams params, int _layer) throws Exception {
    long start = System.currentTimeMillis();
    BooleanQuery primary = new BooleanQuery();
    final String PROTO = LuceneManager.PROTOTYPE;
    final BooleanClause.Occur SHOULD = BooleanClause.Occur.SHOULD;
    final BooleanClause.Occur MUST = BooleanClause.Occur.MUST;
    final TypeManager tmgr = this.app.typemgr;
    final ResourceProperties combined_props = new ResourceProperties();
    Sort lsort = null;/* w w  w  . ja va 2 s .co m*/

    final int length;
    if (prototypes != null && (length = prototypes.size()) > 0) {
        BooleanQuery proto_query = new BooleanQuery();
        for (int i = 0; i < length; i++) {
            String prototype = (String) prototypes.get(i);
            proto_query.add(new TermQuery(new Term(PROTO, prototype)), SHOULD);

            Prototype proto = tmgr.getPrototype(prototype);
            Stack protos = new Stack();
            while (proto != null) {
                protos.push(proto);
                proto = proto.getParentPrototype();
            }
            final int protoChainSize = protos.size();
            for (int j = 0; j < protoChainSize; j++) {
                proto = (Prototype) protos.pop();
                combined_props.putAll(proto.getTypeProperties());
            }
        }
        primary.add(proto_query, MUST);
    } else {
        ArrayList protoarr = app.getSearchablePrototypes();
        BooleanQuery proto_query = new BooleanQuery();
        for (int i = protoarr.size() - 1; i > -1; i--) {
            String protoName = (String) protoarr.get(i);
            proto_query.add(new TermQuery(new Term(PROTO, protoName)), SHOULD);

            Prototype proto = tmgr.getPrototype(protoName);
            Stack protos = new Stack();
            while (proto != null) {
                protos.push(proto);
                proto = proto.getParentPrototype();
            }
            final int protoChainSize = protos.size();
            for (int j = 0; j < protoChainSize; j++) {
                proto = (Prototype) protos.pop();
                combined_props.putAll(proto.getTypeProperties());
            }
        }
        primary.add(proto_query, MUST);
    }

    parseFilterIntoQuery(filter, primary, combined_props);
    RequestEvaluator reqeval = this.app.getCurrentRequestEvaluator();
    int layer = _layer;
    if (layer == -1) {
        layer = DbKey.LIVE_LAYER;
        if (reqeval != null) {
            layer = reqeval.getLayer();
        }
    }
    BooleanQuery layerQuery = new BooleanQuery();
    for (int i = 0; i <= layer; i++) {
        layerQuery.add(new TermQuery(new Term(LuceneManager.LAYER_OF_SAVE, i + "")),
                BooleanClause.Occur.SHOULD);
    }
    primary.add(layerQuery, BooleanClause.Occur.MUST);

    BooleanClause[] clauses = primary.getClauses();
    if (clauses == null || clauses.length == 0) {
        throw new Exception("QueryBean.executeQuery(): The lucene query doesn't have any clauses!");
    }

    if (filter.isCached()) {
        SimpleQueryFilter sqf = (SimpleQueryFilter) this.cache.get(primary);
        if (sqf == null) {
            sqf = new SimpleQueryFilter(primary);
            this.cache.put(primary, sqf);
        }
    }

    Object ret = null;
    int sizeOfResults = 0;

    try {
        if (app.debug()) {
            app.logEvent("running query " + primary + " with maxResults " + maxResults + " and sort "
                    + (sort == null ? "null" : getLuceneSort(sort)));
        }
        if (sort != null && (lsort = getLuceneSort(sort)) != null) {
            if (maxResults == -1 || opaths.size() > 0) {
                Hits h = searcher.search(primary, lsort);
                sizeOfResults = h.length();
                ret = h;
            } else {
                TopFieldDocs tfd = searcher.search(primary, null, maxResults, lsort);
                sizeOfResults = tfd.totalHits;
                ret = tfd;
            }
        } else {
            if (maxResults == -1 || opaths.size() > 0) {
                Hits h = searcher.search(primary);
                sizeOfResults = h.length();
                ret = h;
            } else {
                TopDocs td = searcher.search(primary, null, maxResults);
                sizeOfResults = td.totalHits;
                ret = td;
            }
        }

    } catch (Exception ex) {
        app.logError(ErrorReporter.errorMsg(this.getClass(), "luceneHits") + "Occured on query = " + primary,
                ex);
    }

    if (ret == null) {
        ret = (maxResults == -1 || opaths.size() > 0) ? new Boolean(true) : new Boolean(false);
    }

    if (params != null) {
        params.query = primary;
        params.max_results = maxResults;
        params.sort = lsort;
        params.rprops = combined_props;
    }
    if (app.debug()) {
        long time = System.currentTimeMillis() - start;
        app.logEvent("... took " + (time / 1000.0) + " seconds\n ------");
    }

    return ret;
}

From source file:back.Searcher.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 * /*from  www.  j a  v a  2  s  .  com*/
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive, List<String[]> resu, int consulta) throws IOException {

    //Relevante para Mineracao
    int[] relevantes = new int[3];
    relevantes[consulta - 1] = 0;
    for (int i = 0; i < 200; i++) {
        String[] lista = resu.get(consulta - 1);
        if (Integer.parseInt(lista[i]) == 1) {
            relevantes[consulta - 1]++;
        }
    }

    int recuperados;
    int relevantesRecuperados = 0;

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    recuperados = numTotalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        //troquei end por numTotalHits
        //LAO PRINCIPAL
        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.print((i + 1) + ". " + path);

                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }

                int docEncontrado = 0;
                if (path != null) {
                    String docNumberString = path.substring(path.indexOf('#') + 1, path.indexOf(".txt"));
                    docEncontrado = Integer.parseInt(docNumberString);
                }

                if (consulta != 0) {
                    String[] lista = resu.get(consulta - 1);
                    if (Integer.parseInt(lista[docEncontrado - 1]) == 1) {
                        System.out.print(" [RELEVANT]");
                        relevantesRecuperados++;
                    }
                }
                System.out.println();
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        //mais matematica para mineracao
        if (consulta != 0) {
            double precision = ((double) relevantesRecuperados) / ((double) recuperados);
            double recall = ((double) relevantesRecuperados) / ((double) relevantes[consulta - 1]);
            double fmeasure = 2 * (precision * recall) / (precision + recall);
            System.out.println("Using information of Relevancy Matrix Row " + consulta);
            System.out.println("Precision: " + precision + "   | Recall: " + recall);
            System.out.println("F-Measure: " + fmeasure);
        }
        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}

From source file:bajavista.Buscador.java

public ArrayList<Informacion> buscarContenido(String busqueda) throws IOException, ParseException {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    File indexDirES = new File(dirIndexES);
    Directory indexES = FSDirectory.open(indexDirES);
    //File indexDirNONES = new File(dirIndexNONES);
    //Directory indexNONES = FSDirectory.open(indexDirNONES);

    // 2. Query/*from  w w  w .  ja  v  a  2 s.  c  om*/
    String querystr = busqueda;

    Query q = new QueryParser(Version.LUCENE_43, "text", analyzer).parse(querystr);
    //Query qNONES = new QueryParser(Version.LUCENE_43, "contenido", analyzer).parse(querystr);

    // 3. Search
    int hitsPage = 1024;
    IndexReader reader = DirectoryReader.open(indexES);
    IndexSearcher searcher = new IndexSearcher(reader);

    //IndexReader readerNONES = DirectoryReader.open(indexNONES);
    //IndexSearcher searcherNONES = new IndexSearcher(readerNONES);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPage, true);
    //TopScoreDocCollector collectorNONES = TopScoreDocCollector.create(hitsPage, true);

    searcher.search(q, collector);
    //searcherNONES.search(q, collectorNONES);

    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    // ScoreDoc[] hitsNONES = collectorNONES.topDocs().scoreDocs;

    // 4. Return results
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document data = searcher.doc(docId);
        info = new Informacion(Integer.parseInt(data.get("idUser")), Long.parseLong(data.get("timestamp")),
                data.get("text"), Double.parseDouble(data.get("objective")),
                Double.parseDouble(data.get("subjective")), Double.parseDouble(data.get("positive")),
                Double.parseDouble(data.get("negative")), Integer.parseInt(data.get("need")));
        listaInfo.add(info);
    }

    /*System.out.println("No ES Found " + hitsNONES.length + " hits.");
     for(int i=0;i<hitsNONES.length;++i) {
     int docId = hitsNONES[i].doc;
     Document d = searcherNONES.doc(docId);
     System.out.println((i + 1) + ". " + d.get("es") + "\t" + d.get("contenido"));
     }*/
    reader.close();
    //readerNONES.close();

    return listaInfo;
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.newsFetch.storm.bolts.NewsItemToTermsBolt.java

License:Apache License

private void updateTermMap(DirectoryReader reader, IndexSearcher searcher, Map<String, Double> termMap,
        String id, String field, double weight) throws IOException {
    Query query = new TermQuery(new Term("id", id));
    TopDocs topdocs = searcher.search(query, 1);

    if (topdocs.totalHits > 0) {
        int docNr = topdocs.scoreDocs[0].doc;
        Terms vector = reader.getTermVector(docNr, field);
        if (vector != null) {
            TermsEnum termsEnum;//w ww.  jav a 2s  . c o m
            termsEnum = vector.iterator(TermsEnum.EMPTY);
            BytesRef text;
            while ((text = termsEnum.next()) != null) {
                String term = text.utf8ToString();
                int docFreq = reader.docFreq(new Term(field, text));
                // ignore really rare terms and really common terms
                double minFreq = reader.numDocs() * 0.0001;
                double maxFreq = reader.numDocs() / 3;
                //double minFreq = 0;
                //double maxFreq = Double.MAX_VALUE;

                if (docFreq > minFreq && docFreq < maxFreq) {
                    double tf = 1 + ((double) termsEnum.totalTermFreq()) / reader.getSumTotalTermFreq(field);
                    double idf = Math.log((double) reader.numDocs() / docFreq);
                    if (!Double.isInfinite(idf)) {
                        if (!termMap.containsKey(term)) {
                            termMap.put(term, tf * idf * weight);
                        } else {
                            termMap.put(term, termMap.get(term) + tf * idf * weight);
                        }
                    }
                }
            }
        } else {
            logger.debug("no term available for doc=" + docNr + " and field=" + field);
        }
    } else {
        logger.warn("No documents found with id=" + id);
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.scorers.DatabaseLuceneScorer.java

License:Apache License

protected Map<String, Double> getTopTerms(String field, String value) throws IOException {
    manager.maybeRefreshBlocking();//  w w  w. ja v a 2s .c  om
    IndexSearcher searcher = manager.acquire();
    try (IndexReader reader = searcher.getIndexReader()) {
        TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
        Query q = new TermQuery(new Term(field, value));
        searcher.search(q, collector);
        if (collector.getTotalHits() > 0) {
            int docNr = collector.topDocs().scoreDocs[0].doc;
            Document doc = reader.document(docNr);
            NewsItem nitem = NewsItemLuceneDocConverter.documentToNewsItem(doc);
            return nitem.getTerms();
        } else {
            logger.warn("Could not find document with " + field + "=" + value);
        }
    }
    manager.release(searcher);
    return new HashMap<>();
}