Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:irlucene.MEDRetrieval.java

public double[] precisionRecal(QueryData query, ScoreDoc[] hits) {
    double precisionRecall[] = { 0, 0 };
    int relevantAnswers;
    int answers;/*  w w  w  .ja  v  a 2s.  c  o  m*/
    int relevants;
    IndexReader indexReader;
    IndexSearcher indexSearcher;
    try {
        indexReader = DirectoryReader.open(index);
        indexSearcher = new IndexSearcher(indexReader);
        relevantAnswers = 0;
        answers = hits.length;
        relevants = query.getNumberRelevantDocuments();
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document doc = indexSearcher.doc(docId);
            for (int d : query.getRelevantDocuments()) {
                if (Integer.valueOf(doc.get("id").trim()) == d) {
                    relevantAnswers++;
                }
            }
        }
        if (answers == 0 || relevants == 0) {
            precisionRecall[0] = 0;
            precisionRecall[1] = 0;
        } else {
            precisionRecall[0] = (double) relevantAnswers / answers;
            precisionRecall[1] = (double) relevantAnswers / relevants;
        }
    } catch (IOException ex) {
        Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex);
    }
    return precisionRecall;
}

From source file:irlucene.MEDRetrieval.java

public double pAtN(QueryData query, ScoreDoc[] hits, int n) {
    double pAtN = 0;
    int limit;//from ww  w .j  a v a 2s  .  c  o m
    int relevantAnswers;
    IndexReader indexReader;
    IndexSearcher indexSearcher;
    try {
        indexReader = DirectoryReader.open(index);
        indexSearcher = new IndexSearcher(indexReader);
        relevantAnswers = 0;
        if (n > hits.length) {
            limit = hits.length;
        } else {
            limit = n;
        }
        for (int i = 0; i < limit; ++i) {
            int docId = hits[i].doc;
            Document doc = indexSearcher.doc(docId);
            for (int d : query.getRelevantDocuments()) {
                if (d == Integer.valueOf(doc.get("id").trim())) {
                    relevantAnswers++;
                }
            }
        }
        pAtN = 100 * relevantAnswers / n;
    } catch (IOException ex) {
        Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex);
    }
    return pAtN;
}

From source file:irlucene.MEDRetrieval.java

public void printHits(ScoreDoc[] hits) {
    try {//ww w .j a va2 s .  co  m
        IndexReader indexReader = DirectoryReader.open(index);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = indexSearcher.doc(docId);
            System.out.println((i + 1) + " " + d.get("id"));
        }
    } catch (IOException ex) {
        Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:it.cnr.ilc.lc.clavius.search.Tester.java

private static void searchWithHighlighter(String term)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    logger.info("searchWithContext2 (" + term + ")");
    Directory indexDirectory = FSDirectory
            .open(Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText"));
    DirectoryReader ireader = DirectoryReader.open(indexDirectory);

    IndexSearcher searcher = new IndexSearcher(ireader);
    QueryParser parser = new QueryParser("content", new StandardAnalyzer());
    Query query = parser.parse(term);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(9));
    for (int i = 0; i < hits.totalHits; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String idDoc = doc.get("idDoc");
        String text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                new StandardAnalyzer());
        List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, idDoc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        for (int j = 0; j < frag.size(); j++) {
            logger.info("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString());
        }// w w  w.j a v  a2 s .  co m
        //            TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        //            for (int j = 0; j < frag.length; j++) {
        //                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
        //                    logger.info("frag["+j+"] "+frag[j].toString());
        //                }
        //            }
        //            
    }
}

From source file:it.cnr.ilc.lc.claviusweb.ClaviusSearch.java

private static List<Annotation> fullTextSearch(String term)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    log.info("fullTextSearch (" + term + ")");
    List<Annotation> result = new ArrayList<>();

    try {/*from www.  ja  v  a2 s .  c o  m*/
        Directory indexDirectory = FSDirectory
                .open(Paths.get("/var/lucene/clavius-1.0.5/indexes/it.cnr.ilc.lc.claviusweb.entity.PlainText"));
        DirectoryReader ireader = DirectoryReader.open(indexDirectory);

        IndexSearcher searcher = new IndexSearcher(ireader);

        Analyzer fullTextAnalyzer = CustomAnalyzer.builder()
                .addCharFilter("patternReplace", "pattern", "([\\-\\(\\)\\[\\],\\.;:])", "replacement", " $1 ")
                .withTokenizer("whitespace").build();

        //QueryParser parserTerm = new QueryParser("content", fullTextAnalyzer);
        //            AnalyzingQueryParser parser = new AnalyzingQueryParser("content", fullTextAnalyzer);
        //            Query query2 = parser.parse(term);
        //            
        Query query = new WildcardQuery(new Term("content", term));
        TopDocs hits = searcher.search(query, MAX_SEARCH_HITS);

        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
        ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter());

        log.info("hits.totalHits=(" + hits.totalHits + ")");
        for (int i = 0; i < hits.totalHits; i++) {
            int id = hits.scoreDocs[i].doc;
            Document doc = searcher.doc(id);
            String idDoc = doc.get("idDoc");

            //String text = doc.get("content");
            TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
                    fullTextAnalyzer);

            List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, doc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
            for (int j = 0; j < frag.size(); j++) {
                log.debug("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString());
            }
            result.addAll(frag);
        }
    } catch (InvalidTokenOffsetsException | IOException e) {
        log.error(e);
    }
    log.info("Full Text Search found " + result.size() + " result(s) for term " + term);
    return result;
}

From source file:it.eng.spagobi.analiticalmodel.documentsbrowser.service.SearchContentAction.java

License:Mozilla Public License

@Override
public void doService() {

    List objects;/* w  ww. j  av a 2 s  .c  o m*/

    logger.debug("IN");

    try {
        UserProfile profile = (UserProfile) getUserProfile();

        Vector<String> fieldsToSearch = new Vector<String>();
        String valueFilter = getAttributeAsString(SpagoBIConstants.VALUE_FILTER);

        String attributes = getAttributeAsString(ATTRIBUTES);
        String metaDataToSearch = null;
        if (attributes != null) {
            if (attributes.equalsIgnoreCase("ALL")) {//SEARCH IN ALL FIELDS
                fieldsToSearch.add(IndexingConstants.BIOBJ_LABEL);
                fieldsToSearch.add(IndexingConstants.BIOBJ_NAME);
                fieldsToSearch.add(IndexingConstants.BIOBJ_DESCR);
                fieldsToSearch.add(IndexingConstants.METADATA);
                //search metadata binary content
                fieldsToSearch.add(IndexingConstants.CONTENTS);
                //search subobject fields
                fieldsToSearch.add(IndexingConstants.SUBOBJ_DESCR);
                fieldsToSearch.add(IndexingConstants.SUBOBJ_NAME);
            } else if (attributes.equalsIgnoreCase("LABEL")) {//SEARCH IN LABEL DOC
                fieldsToSearch.add(IndexingConstants.BIOBJ_LABEL);
            } else if (attributes.equalsIgnoreCase("NAME")) {//SEARCH IN NAME DOC
                fieldsToSearch.add(IndexingConstants.BIOBJ_NAME);
            } else if (attributes.equalsIgnoreCase("DESCRIPTION")) {//SEARCH IN DESCRIPTION DOC
                fieldsToSearch.add(IndexingConstants.BIOBJ_DESCR);
            } else {//SEARCH IN CATEGORIES DOC
                    //get categories name
                metaDataToSearch = attributes;
                //fieldsToSearch.add(IndexingConstants.METADATA);
                fieldsToSearch.add(IndexingConstants.CONTENTS);
            }

        }

        boolean similar = getAttributeAsBoolean(SIMILAR);

        logger.debug("Parameter [" + SpagoBIConstants.VALUE_FILTER + "] is equal to [" + valueFilter + "]");
        String indexBasePath = "";
        String jndiBean = SingletonConfig.getInstance().getConfigValue("SPAGOBI.RESOURCE_PATH_JNDI_NAME");
        if (jndiBean != null) {
            indexBasePath = SpagoBIUtilities.readJndiResource(jndiBean);
        }
        String index = indexBasePath + "/idx";
        IndexReader reader;
        HashMap returned = null;
        try {
            reader = IndexReader.open(FSDirectory.open(new File(index)), true);
            // read-only=true
            IndexSearcher searcher = new IndexSearcher(reader);

            String[] fields = new String[fieldsToSearch.size()];
            fieldsToSearch.toArray(fields);

            //getting  documents

            if (similar) {
                returned = LuceneSearcher.searchIndexFuzzy(searcher, valueFilter, index, fields,
                        metaDataToSearch);
            } else {
                returned = LuceneSearcher.searchIndex(searcher, valueFilter, index, fields, metaDataToSearch);
            }
            ScoreDoc[] hits = (ScoreDoc[]) returned.get("hits");

            objects = new ArrayList();
            if (hits != null) {
                for (int i = 0; i < hits.length; i++) {
                    ScoreDoc hit = hits[i];
                    Document doc = searcher.doc(hit.doc);
                    String biobjId = doc.get(IndexingConstants.BIOBJ_ID);

                    BIObject obj = DAOFactory.getBIObjectDAO().loadBIObjectForDetail(Integer.valueOf(biobjId));
                    if (obj != null) {
                        boolean canSee = ObjectsAccessVerifier.canSee(obj, profile);
                        if (canSee) {
                            objects.add(obj);
                        }
                    }
                }
            }
            searcher.close();
        } catch (CorruptIndexException e) {
            logger.error(e.getMessage(), e);
            throw new SpagoBIException("Index corrupted", e);

        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            throw new SpagoBIException("Unable to read index", e);

        } // only searching, so
        catch (ParseException e) {
            logger.error(e.getMessage(), e);
            throw new SpagoBIException("Wrong query syntax", e);

        }

        JSONArray documentsJSON = (JSONArray) SerializerFactory.getSerializer("application/json")
                .serialize(objects, null);
        for (int i = 0; i < documentsJSON.length(); i++) {
            JSONObject jsonobj = documentsJSON.getJSONObject(i);
            String biobjid = jsonobj.getString("id");
            String summary = (String) returned.get(biobjid);
            jsonobj.put("summary", summary);
            String views = (String) returned.get(biobjid + "-views");
            jsonobj.put("views", views);
        }
        Collection func = profile.getFunctionalities();

        if (func.contains("SeeMetadataFunctionality")) {
            JSONObject showmetadataAction = new JSONObject();
            showmetadataAction.put("name", "showmetadata");
            showmetadataAction.put("description", "Show Metadata");
            for (int i = 0; i < documentsJSON.length(); i++) {
                JSONObject documentJSON = documentsJSON.getJSONObject(i);
                documentJSON.getJSONArray("actions").put(showmetadataAction);
            }
        }
        JSONObject documentsResponseJSON = createJSONResponseDocuments(documentsJSON);

        try {
            writeBackToClient(new JSONSuccess(createJSONResponse(documentsResponseJSON)));
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            throw new SpagoBIException("Impossible to write back the responce to the client", e);
        }

    } catch (Exception e) {
        logger.error("Excepiton", e);
    } finally {
        logger.debug("OUT");
    }
}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndex(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery andQuery = new BooleanQuery();
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }//  w  w  w.ja  va 2s . c  o  m
    Query query = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer).parse(queryString);
    andQuery.add(query, BooleanClause.Occur.MUST);
    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);
    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);

    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    //setsback to action
    objectsToReturn.put("hits", hits);

    //highlighter
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));
    if (hits != null) {
        logger.debug("hits size: " + hits.length);
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);

            String[] subobjNames = doc.getValues(IndexingConstants.SUBOBJ_NAME);
            if (subobjNames != null && subobjNames.length != 0) {
                String views = "";
                for (int k = 0; k < subobjNames.length; k++) {
                    views += subobjNames[k] + " ";
                }
                objectsToReturn.put(biobjId + "-views", views);
            }
            String summary = "";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);

                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);
                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (NumberFormatException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }
            }
        }
    }
    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery orQuery = new BooleanQuery();
    BooleanQuery andQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        Query query = new FuzzyQuery(new Term(fields[i], queryString));
        query = query.rewrite(searcher.getIndexReader());
        orQuery.add(query, BooleanClause.Occur.SHOULD);
    }/*w ww . ja va2s  .  com*/
    andQuery.add(orQuery, BooleanClause.Occur.MUST);
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }

    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);

    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    objectsToReturn.put("hits", hits);

    //highlighter
    //orQuery = orQuery.rewrite(searcher.getIndexReader());
    //andQuery = andQuery.rewrite(searcher.getIndexReader());
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));

    if (hits != null) {
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);
            String summary = " ";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);
                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);

                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }

            }
        }
    }

    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:it.unipd.dei.ims.falcon.ranking.QueryMethods.java

License:Apache License

/**
 * Given a set of documents with multiple scores, retain the max score for
 * each document/*w  ww. j av  a 2s. c  om*/
 *
 * @param topdocs
 *            Lucene TopDocs; documents, namely segments, are ranked by
 *            score. The score of a segment is the sum of the score of its
 *            constituting hashes, specifically obtained by
 *            {@link it.unipd.dei.ims.falcon.ranking.SegmentQuery}
 * @param searcher
 *            Lucene {@link org.apache.lucene.search.IndexSearcher}
 * @return
 * @throws IOException
 */
private static Map<String, Double> reduceMaxScoreForEachSong(TopDocs topdocs, IndexSearcher searcher)
        throws IOException {
    if (docId2songidCache == null) {
        // TODO although this should work, it has not been checked for concurrency issues
        docId2songidCache = new ConcurrentHashMap<Integer, String>();
    }
    Map<String, Double> songid2maxscore = new TreeMap<String, Double>();
    int r = 1;
    for (ScoreDoc sd : topdocs.scoreDocs) {
        String stringId = docId2songidCache.get(sd.doc);
        if (stringId == null) {
            stringId = searcher.doc(sd.doc).getField("TITLE").stringValue();
            docId2songidCache.put(sd.doc, stringId);
        }
        if (!songid2maxscore.containsKey(stringId))
            songid2maxscore.put(stringId, new Double(sd.score));
        r++;
    }
    return songid2maxscore;
}

From source file:javatools.webapi.LuceneIndexFiles.java

License:Apache License

public static List<String[]> doPagingSearch2(IndexSearcher searcher, Query query, int hitsPerPage)
        throws IOException {
    List<String[]> searchresult = new ArrayList<String[]>();
    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 10 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    for (int i = 0; i < numTotalHits && i < hits.length; i++) {
        {/*from w  w w. j  a  va  2s  .c  o  m*/
            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                //System.out.println((i + 1) + ". " + path);
                String contents = doc.get("contents");
                searchresult.add(new String[] { path, contents });
                if (contents != null) {
                    //System.out.println("   Contents: " + contents);
                }
            } else {
                //System.out.println((i + 1) + ". " + "No path for this document");
            }
        }
    }
    return searchresult;
}