Example usage for org.apache.lucene.search IndexSearcher doc

List of usage examples for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException 

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public QueryResult performQuery(String aQueryString, String aBacklink, String aBasePath,
        Configuration aConfiguration, Map<String, String> aDrilldownFields) throws IOException {

    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();
    SortedSetDocValuesReaderState theSortedSetState = new DefaultSortedSetDocValuesReaderState(
            theSearcher.getIndexReader());

    List<QueryResultDocument> theResultDocuments = new ArrayList<>();

    long theStartTime = System.currentTimeMillis();

    LOGGER.info("Querying for " + aQueryString);

    DateFormat theDateFormat = new SimpleDateFormat("dd.MMMM.yyyy", Locale.ENGLISH);

    try {/*ww  w  . jav a  2  s  .c  o  m*/

        List<FacetDimension> theDimensions = new ArrayList<>();

        // Search only if a search query is given
        if (!StringUtils.isEmpty(aQueryString)) {

            Query theQuery = computeBooleanQueryFor(aQueryString);

            LOGGER.info(" query is " + theQuery);

            theQuery = theQuery.rewrite(theSearcher.getIndexReader());

            LOGGER.info(" rewritten query is " + theQuery);

            DrillDownQuery theDrilldownQuery = new DrillDownQuery(facetsConfig, theQuery);
            aDrilldownFields.entrySet().stream().forEach(aEntry -> {
                LOGGER.info(" with Drilldown " + aEntry.getKey() + " for " + aEntry.getValue());
                theDrilldownQuery.add(aEntry.getKey(), aEntry.getValue());
            });

            FacetsCollector theFacetCollector = new FacetsCollector();

            TopDocs theDocs = FacetsCollector.search(theSearcher, theDrilldownQuery, null,
                    aConfiguration.getNumberOfSearchResults(), theFacetCollector);
            SortedSetDocValuesFacetCounts theFacetCounts = new SortedSetDocValuesFacetCounts(theSortedSetState,
                    theFacetCollector);

            List<Facet> theAuthorFacets = new ArrayList<>();
            List<Facet> theFileTypesFacets = new ArrayList<>();
            List<Facet> theLastModifiedYearFacet = new ArrayList<>();
            List<Facet> theLanguageFacet = new ArrayList<>();

            LOGGER.info("Found " + theDocs.scoreDocs.length + " documents");

            // We need this cache to detect duplicate documents while searching for similarities
            Set<Integer> theUniqueDocumentsFound = new HashSet<>();

            Map<String, QueryResultDocument> theDocumentsByHash = new HashMap<>();

            for (int i = 0; i < theDocs.scoreDocs.length; i++) {
                int theDocumentID = theDocs.scoreDocs[i].doc;
                theUniqueDocumentsFound.add(theDocumentID);
                Document theDocument = theSearcher.doc(theDocumentID);

                String theUniqueID = theDocument.getField(IndexFields.UNIQUEID).stringValue();
                String theFoundFileName = theDocument.getField(IndexFields.FILENAME).stringValue();
                String theHash = theDocument.getField(IndexFields.CONTENTMD5).stringValue();
                QueryResultDocument theExistingDocument = theDocumentsByHash.get(theHash);
                if (theExistingDocument != null) {
                    theExistingDocument.addFileName(theFoundFileName);
                } else {
                    Date theLastModified = new Date(
                            theDocument.getField(IndexFields.LASTMODIFIED).numericValue().longValue());
                    SupportedLanguage theLanguage = SupportedLanguage
                            .valueOf(theDocument.getField(IndexFields.LANGUAGESTORED).stringValue());
                    String theFieldName;
                    if (analyzerCache.supportsLanguage(theLanguage)) {
                        theFieldName = analyzerCache.getFieldNameFor(theLanguage);
                    } else {
                        theFieldName = IndexFields.CONTENT;
                    }

                    String theOriginalContent = theDocument.getField(theFieldName).stringValue();

                    final Query theFinalQuery = theQuery;

                    ForkJoinTask<String> theHighligherResult = executorPool.submit(() -> {
                        StringBuilder theResult = new StringBuilder(theDateFormat.format(theLastModified));
                        theResult.append("&nbsp;-&nbsp;");
                        Highlighter theHighlighter = new Highlighter(new SimpleHTMLFormatter(),
                                new QueryScorer(theFinalQuery));
                        for (String theFragment : theHighlighter.getBestFragments(analyzer, theFieldName,
                                theOriginalContent, NUMBER_OF_FRAGMENTS)) {
                            if (theResult.length() > 0) {
                                theResult = theResult.append("...");
                            }
                            theResult = theResult.append(theFragment);
                        }
                        return theResult.toString();
                    });

                    int theNormalizedScore = (int) (theDocs.scoreDocs[i].score / theDocs.getMaxScore() * 5);

                    File theFileOnDisk = new File(theFoundFileName);
                    if (theFileOnDisk.exists()) {

                        boolean thePreviewAvailable = previewProcessor.previewAvailableFor(theFileOnDisk);

                        theExistingDocument = new QueryResultDocument(theDocumentID, theFoundFileName,
                                theHighligherResult,
                                Long.parseLong(theDocument.getField(IndexFields.LASTMODIFIED).stringValue()),
                                theNormalizedScore, theUniqueID, thePreviewAvailable);
                        theDocumentsByHash.put(theHash, theExistingDocument);
                        theResultDocuments.add(theExistingDocument);
                    }
                }
            }

            if (aConfiguration.isShowSimilarDocuments()) {

                MoreLikeThis theMoreLikeThis = new MoreLikeThis(theSearcher.getIndexReader());
                theMoreLikeThis.setAnalyzer(analyzer);
                theMoreLikeThis.setMinTermFreq(1);
                theMoreLikeThis.setMinDocFreq(1);
                theMoreLikeThis.setFieldNames(analyzerCache.getAllFieldNames());

                for (QueryResultDocument theDocument : theResultDocuments) {
                    Query theMoreLikeThisQuery = theMoreLikeThis.like(theDocument.getDocumentID());
                    TopDocs theMoreLikeThisTopDocs = theSearcher.search(theMoreLikeThisQuery, 5);
                    for (ScoreDoc theMoreLikeThisScoreDoc : theMoreLikeThisTopDocs.scoreDocs) {
                        int theSimilarDocument = theMoreLikeThisScoreDoc.doc;
                        if (theUniqueDocumentsFound.add(theSimilarDocument)) {
                            Document theMoreLikeThisDocument = theSearcher.doc(theSimilarDocument);
                            String theFilename = theMoreLikeThisDocument.getField(IndexFields.FILENAME)
                                    .stringValue();
                            theDocument.addSimilarFile(theFilename);
                        }
                    }
                }
            }

            LOGGER.info("Got Dimensions");
            for (FacetResult theResult : theFacetCounts.getAllDims(20000)) {
                String theDimension = theResult.dim;
                if ("author".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theAuthorFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("extension".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theFileTypesFacets.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if ("last-modified-year".equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            theLastModifiedYearFacet.add(new Facet(theLabelAndValue.label,
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }
                if (IndexFields.LANGUAGEFACET.equals(theDimension)) {
                    for (LabelAndValue theLabelAndValue : theResult.labelValues) {
                        if (!StringUtils.isEmpty(theLabelAndValue.label)) {
                            Locale theLocale = new Locale(theLabelAndValue.label);
                            theLanguageFacet.add(new Facet(theLocale.getDisplayLanguage(Locale.ENGLISH),
                                    theLabelAndValue.value.intValue(), aBasePath + "/" + encode(
                                            FacetSearchUtils.encode(theDimension, theLabelAndValue.label))));
                        }
                    }
                }

                LOGGER.info(" " + theDimension);
            }

            if (!theAuthorFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("Author", theAuthorFacets));
            }
            if (!theLastModifiedYearFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Last modified", theLastModifiedYearFacet));
            }
            if (!theFileTypesFacets.isEmpty()) {
                theDimensions.add(new FacetDimension("File types", theFileTypesFacets));
            }
            if (!theLanguageFacet.isEmpty()) {
                theDimensions.add(new FacetDimension("Language", theLanguageFacet));
            }

            // Wait for all Tasks to complete for the search result highlighter
            ForkJoinTask.helpQuiesce();
        }

        long theDuration = System.currentTimeMillis() - theStartTime;

        LOGGER.info("Total amount of time : " + theDuration + "ms");

        return new QueryResult(System.currentTimeMillis() - theStartTime, theResultDocuments, theDimensions,
                theSearcher.getIndexReader().numDocs(), aBacklink);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public File getFileOnDiskForDocument(String aUniqueID) throws IOException {
    searcherManager.maybeRefreshBlocking();
    IndexSearcher theSearcher = searcherManager.acquire();

    try {//from   ww  w . j  a v  a 2 s.  c o m
        TermQuery theTermQuery = new TermQuery(new Term(IndexFields.UNIQUEID, aUniqueID));
        TopDocs theTopDocs = theSearcher.search(theTermQuery, null, 1);
        if (theTopDocs.totalHits == 1) {
            Document theDocument = theSearcher.doc(theTopDocs.scoreDocs[0].doc);
            if (theDocument != null) {
                return new File(theDocument.get(IndexFields.FILENAME));
            }
        }
        return null;
    } finally {
        searcherManager.release(theSearcher);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.io.fangorn.FangornWriterTest.java

License:Apache License

@Test
public void test() throws Exception {
    File outputFile = new File("target/test-output");

    JCas jcas = JCasFactory.createJCas();

    jcas.setDocumentLanguage("en");
    jcas.setDocumentText("This is a test. I may work. Or it may not work.");

    DocumentMetaData meta = DocumentMetaData.create(jcas);
    meta.setCollectionId("dummyCollection");
    meta.setDocumentId("dummyId");

    AnalysisEngineDescription segmenter = createEngineDescription(OpenNlpSegmenter.class);

    AnalysisEngineDescription parser = createEngineDescription(OpenNlpParser.class,
            OpenNlpParser.PARAM_WRITE_PENN_TREE, true);

    AnalysisEngineDescription writer = createEngineDescription(FangornWriter.class,
            FangornWriter.PARAM_TARGET_LOCATION, outputFile);

    SimplePipeline.runPipeline(jcas, segmenter, parser, writer);

    IndexSearcher searcher = new IndexSearcher(FSDirectory.getDirectory(outputFile));
    QueryBuilder builder = new QueryBuilder("//NP");
    TreebankQuery tq = builder.parse(TermJoinType.SIMPLE_WITH_FC, false);
    SimpleHitCollector hitCollector = new SimpleHitCollector(100);
    searcher.search(tq, hitCollector);/*from w ww.j a v  a  2 s. c o m*/
    AllResults allResults = new AllResults(hitCollector.hits, hitCollector.totalHits, tq);

    Result[] resultMeta = allResults.collect(searcher);

    String[] results = new String[hitCollector.totalHits];
    for (int i = 0; i < hitCollector.totalHits; i++) {
        results[i] = searcher.doc(hitCollector.hits[i]).get("sent").trim();
    }

    List<String> actual = new ArrayList<String>();

    for (int i = 0; i < hitCollector.totalHits; i++) {
        Document doc = searcher.doc(hitCollector.hits[i]);
        actual.add(String.format("%s %s %s %s %s", doc.get(FangornWriter.FIELD_COLLECTION_ID),
                doc.get(FangornWriter.FIELD_DOCUMENT_ID), doc.get(FangornWriter.FIELD_BEGIN),
                doc.get(FangornWriter.FIELD_END), resultMeta[i].asJSONString().replace('"', '\'')));
    }

    List<String> expected = asList(
            "dummyCollection dummyId 0 15 {'num':'2','ms':[{'m':[{'s':'','e':'1_0_2_8','o':'0','t':'0'}]},{'m':[{'s':'','e':'4_2_3_6','o':'0','t':'0'}]}]}",
            "dummyCollection dummyId 16 27 {'num':'1','ms':[{'m':[{'s':'','e':'1_0_2_7','o':'0','t':'0'}]}]}",
            "dummyCollection dummyId 28 47 {'num':'1','ms':[{'m':[{'s':'','e':'2_1_2_9','o':'0','t':'0'}]}]}");

    assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(actual, "\n"));
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.clustering.debatefiltering.LuceneSearcher.java

License:Apache License

public List<String> retrieveTopNDocs(String textQuery, int topN) throws Exception {
    // Now search the index:
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

    Directory directory = FSDirectory.open(luceneIndexDir);
    IndexReader reader = DirectoryReader.open(directory);

    IndexSearcher indexSearcher = new IndexSearcher(reader);

    // Parse a simple query
    QueryParser parser = new QueryParser(Version.LUCENE_44, LuceneIndexer.FIELD_TEXT_CONTENT, analyzer);
    Query query = parser.parse(textQuery);

    ScoreDoc[] hits = indexSearcher.search(query, null, topN).scoreDocs;

    List<String> result = new ArrayList<>();

    // Iterate through the results:
    for (int i = 0; i < hits.length; i++) {
        Document hitDoc = indexSearcher.doc(hits[i].doc);
        result.add(hitDoc.getField(LuceneIndexer.FIELD_FILE).stringValue());
        //            System.out.println(hitDoc.toString());
        //                assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    }/*from ww w  .j  av a 2s  .c  o  m*/
    reader.close();
    directory.close();

    return result;
}

From source file:de.twitterlivesearch.analysis.Searcher.java

License:Apache License

/**
 * This is the same as//  w  ww .ja  va 2s. co  m
 * {@link de.twitterlivesearch.analysis.Searcher#searchForTweets(String)
 * searchForTweets(String)}, but the search is limited to the tweet with the
 * given id. This can for example be used to analyze the latest incoming
 * tweet.
 *
 * @param id
 * @param queryString
 * @return
 */
public List<Document> searchForTweets(Integer id, String queryString) {
    if (queryString.isEmpty()) {
        return Collections.emptyList();
    }

    AbstractConfiguration config = ConfigurationHolder.getConfiguration();
    try {
        if (!DirectoryReader.indexExists(directory)) {
            return null;
        }
    } catch (IOException e) {
        log.fatal("Error when trying to check if directory exists!", e);
        return new ArrayList<>();
    }
    DirectoryReader ireader;
    try {
        ireader = DirectoryReader.open(directory);
    } catch (IOException e) {
        log.fatal("Error when trying to open directory!", e);
        return null;
    }

    IndexSearcher isearcher = new IndexSearcher(ireader);
    Query textQuery = null;
    QueryParser parser = new QueryParser(FieldNames.TEXT.getField(),
            AnalyzerMapping.getInstance().ANALYZER_FOR_DELIMITER);
    parser.setDefaultOperator(config.getDefaultOperator());
    BooleanQuery query = new BooleanQuery();
    try {
        textQuery = parser.parse(queryString);
    } catch (ParseException e) {
        log.fatal("Error while parsing query: " + queryString, e);
    }

    // if id does not equal null only the query with the given id will be
    // searched
    // this can be used to search the latest element only
    if (id != null) {
        Query idQuery = NumericRangeQuery.newIntRange(FieldNames.ID.getField(), id.intValue(), id.intValue(),
                true, true);
        query.add(idQuery, Occur.MUST);
    }
    query.add(textQuery, Occur.MUST);
    ScoreDoc[] hits = null;
    try {
        hits = isearcher.search(query, 1000).scoreDocs;
    } catch (IOException e) {
        log.fatal("Error while trying to search!", e);
    }
    List<Document> result = new ArrayList<>();
    for (int i = 0; i < hits.length; i++) {
        try {
            result.add(isearcher.doc(hits[i].doc));
            log.info("Found result for query \"" + queryString + "\".");
        } catch (IOException e) {
            log.fatal("Error when getting document!", e);
        }
    }
    return result;
}

From source file:de.unidue.inf.is.ezdl.dlservices.search.handlers.ranking.LuceneRanker.java

License:Open Source License

private void calculateRSVs(Directory directory, ResultDocumentList documentList, DocumentQuery documentQuery)
        throws ParseException, CorruptIndexException, IOException {
    Query query = new QueryParser(Version.LUCENE_31,
            de.unidue.inf.is.ezdl.dlcore.data.fields.Field.TEXT.toString(),
            new SimpleAnalyzer(Version.LUCENE_31)).parse(queryConverter.convert(documentQuery.getQuery()));
    IndexSearcher searcher = new IndexSearcher(directory, true);

    TopDocs topDocs = searcher.search(query, 1000);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        String oid = searcher.doc(scoreDoc.doc).get("oid");
        float score = scoreDoc.score;
        ResultDocument document = getDocumentByOid(documentList, oid);
        if (document != null) {
            document.setUnnormalizedRsv(score);
        }/*  www. j a  va  2 s.c o  m*/
    }
}

From source file:de.uni_koeln.spinfo.maalr.lucene.core.Dictionary.java

License:Apache License

private QueryResult toQueryResult(TopDocs docs, int startIndex, int pageSize)
        throws NoIndexAvailableException, BrokenIndexException, IOException, InvalidTokenOffsetsException {
    final ArrayList<LemmaVersion> results = new ArrayList<LemmaVersion>(pageSize);
    final ScoreDoc[] scoreDocs = docs.scoreDocs;
    IndexSearcher searcher = indexProvider.getSearcher();
    for (int i = startIndex; i < scoreDocs.length && i < startIndex + pageSize; i++) {
        Document doc = searcher.doc(scoreDocs[i].doc);
        LemmaVersion e = indexManager.getLemmaVersion(doc);
        results.add(e);//  ww  w  . j a  v a 2 s. c  om
    }
    return new QueryResult(results, docs.totalHits, pageSize);
}

From source file:Demo1.MyServlet.java

private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) {
    try {/* w  w w. j a v  a 2  s. c o  m*/
        //   Text to search
        String querystr = request.getParameter("keyword");

        log.addHistory(querystr);

        //   The \"title\" arg specifies the default field to use when no field is explicitly specified in the query
        Query q = new QueryParser("Searching", analyzer).parse(querystr);

        // Searching code
        int hitsPerPage = 10;
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        //   Code to display the results of search
        //out.println("Found " + hits.length + " Classes Matching your Requirement");
        courseList = new ArrayList();
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            Land course = new Land(d.get("name"), d.get("price"), d.get("area"), d.get("purpose"));
            //out.println((i + 1) + ". " +  d.get("Number")+ d.get("Classes") );
            courseList.add(course);
        }
        request.setAttribute("Lands", courseList);
        RequestDispatcher de = request.getRequestDispatcher("/table.jsp");
        de.forward(request, response);

        // reader can only be closed when there is no need to access the documents any more
        reader.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:Demo2.MyServlet.java

private void gotoSearch(PrintWriter out, HttpServletRequest request, HttpServletResponse response) {
    try {//  www  .  j  a  va 2 s  .c o m
        //   Text to search
        String querystr = request.getParameter("keyword");

        log.addHistory(querystr);

        //   The \"title\" arg specifies the default field to use when no field is explicitly specified in the query
        Query q = new QueryParser("Classes", analyzer).parse(querystr);

        // Searching code
        int hitsPerPage = 10;
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        //   Code to display the results of search
        //out.println("Found " + hits.length + " Classes Matching your Requirement");
        courseList = new ArrayList();
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            Child course = new Child(d.get("Number"), d.get("Classes"), d.get("Time"), d.get("Department"));
            //out.println((i + 1) + ". " +  d.get("Number")+ d.get("Classes") );
            courseList.add(course);
        }
        request.setAttribute("course", courseList);
        RequestDispatcher de = request.getRequestDispatcher("/table.jsp");
        de.forward(request, response);

        // reader can only be closed when there is no need to access the documents any more
        reader.close();
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:di.uniba.it.tri.shell.Command.java

License:Open Source License

private void search(String cmd) throws Exception {
    String[] split = cmd.split("\\s+");
    if (split.length > 2) {
        if (reader == null) {
            throw new Exception("no index in memory");
        } else {//  w  ww.j  a  v  a 2  s  .co m
            if (!split[1].matches("[0-9]+")) {
                throw new Exception("no valid number of results");
            }
            StringBuilder qs = new StringBuilder();
            for (int i = 2; i < split.length; i++) {
                qs.append(split[i]).append(" ");
            }
            //String q = QueryParser.escape(qs.toString().trim());
            Query query = parser.parse(qs.toString().trim());
            IndexSearcher searcher = new IndexSearcher(reader);
            TopDocs topDocs = searcher.search(query, Integer.parseInt(split[1]));
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                TriShell.print(searcher.doc(scoreDoc.doc).get("word"));
                TriShell.print("\t");
                TriShell.println(String.valueOf(scoreDoc.score));
            }
        }
    } else {
        throw new Exception("search syntax error");
    }
}