Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:it.polito.tellmefirst.lucene.KBIndexSearcher.java

License:Open Source License

/**
 * Get DBpedia concepts related to a specific URI from the Lucene Index. These DBpedia concepts appear as wikilink
 * once in the Wikipedia page identified by the URI.
 *
 * @param uri Input URI.//  ww w . ja va2  s .c  om
 *
 * In the previous versions of TellMeFirst, the getResidualBagOfConcepts method take as input the
 * URI of a DBpedia resource (String) and the language parameter (String). We have decide to
 * modify the API in order to separate this module from the core of TellMeFirst.
 *
 * @since 3.0.0.0.
 */
public ArrayList<String> getResidualBagOfConcepts(String uri) {
    LOG.debug("[getResidualBagOfConcepts] - BEGIN");
    ArrayList<String> result = new ArrayList<String>();
    try {
        MMapDirectory directory = new MMapDirectory(new File(residualKb));
        IndexReader reader = IndexReader.open(directory, true);
        IndexSearcher is = new IndexSearcher(directory, true);
        Query q = new TermQuery(new Term("URI", uri));
        TopDocs hits = is.search(q, 1);
        is.close();
        if (hits.totalHits != 0) {
            int docId = hits.scoreDocs[0].doc;
            org.apache.lucene.document.Document doc = reader.document(docId);
            String wikilinksMerged = doc.getField("KB").stringValue();
            String[] wikiSplits = wikilinksMerged.split(" ");
            //no prod
            LOG.debug("Residual bag of concepts for the resource " + uri + ": ");
            for (String s : wikiSplits) {
                result.add(s);
                //no prod
                LOG.debug("* " + s);
            }
        }
        reader.close();
    } catch (Exception e) {
        LOG.error("[getResidualBagOfConcepts] - EXCEPTION: ", e);
    }
    LOG.debug("[getResidualBagOfConcepts] - END");
    return result;
}

From source file:it.unibz.instasearch.indexing.Searcher.java

License:Open Source License

private SearchResult collectSearchResults(SearchQuery searchQuery, IndexSearcher indexSearcher,
        IndexReader reader, Query query) throws IOException {
    int maxResults = reader.numDocs(); // all documents

    if (searchQuery.isLimited())
        maxResults = searchQuery.getMaxResults();

    Map<String, Float> searchTerms = extractTerms(query);
    TopDocCollector collector = new TopDocCollector(maxResults);

    if (searchQuery.isCanceled())
        return null;

    indexSearcher.search(query, collector); // do the actual search

    if (collector.getTotalHits() == 0)
        return null;

    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    ArrayList<SearchResultDoc> resultDocs = new ArrayList<SearchResultDoc>(hits.length);

    for (int i = 0; i < hits.length && !searchQuery.isCanceled(); i++) {
        int docId = hits[i].doc;
        float score = hits[i].score;

        Document doc = reader.document(docId);

        SearchResultDoc resultDoc = new SearchResultDoc(getIndexDir(), doc, docId, score);

        if (showMatchCounts)
            resultDoc.computeMatchCount(reader, searchTerms.keySet());

        resultDocs.add(resultDoc);// w w  w .j  a  va2s  . com
    }

    return new SearchResult(searchQuery, resultDocs, searchTerms);
}

From source file:it.unipd.dei.ims.falcon.indexing.Indexing.java

License:Apache License

/**
 * Prints information on the songs stored in the index in the specified path.
 * The specific information printed is that specified by the "option".
 * Available options are://from   www  . j  a  v  a  2 s . c  o  m
 * <ul>
 *  <li> "show_doc_ids": prints the internal index identifier of all the
 *       segments in the index together with the title of the song which
 *       the segment belongs to;
 *  <li> "show_seg_ids": prints the internal index identifier of all the
 *       segments in the index together with the segment identifier;
 *  <li> "show_full_index": print all the distinct hashes in the index
 *       and the posting list associated to each hash     *
 * </ul>
 *  
 * @param indexPath
 *                  full path to the folder where the index is stored
 * @param option
 *                  option which specified the requested information
 *                  
 * @throws IndexingException
 */
public static void indexUtils(String indexPath, String option) throws IndexingException {
    IndexReader reader;
    try {
        reader = IndexReader.open(new SimpleFSDirectory(new File(indexPath), null));

        if (option.equals("show_doc_ids")) {
            //  prints all the internal segment identifiers together with
            //  the title of the song of the considered segment.
            //  For instance, "[6] song2" denotes that the segment with
            //  internal identifier "6" belongs to the song with title "song2"
            for (int d = 0; d < reader.numDocs(); d++) {
                System.out.println("[" + d + "] " + reader.document(d).getField("TITLE").stringValue());
            }
        } else if (option.equals("show_seg_ids")) {
            //  prints all the internal segment identifiers together with
            //  the identifier of the segment.
            //  For instance, "[8] song2_3" denotes that the third segment
            //  of "song2" has internal identifier "8
            for (int d = 0; d < reader.numDocs(); d++) {
                System.out.println("[" + d + "] " + reader.document(d).getField("ID").stringValue());
            }

        } else if (option.equals("show_full_index")) {
            // print the full index, that is each hash with the associated
            // posting list
            TermEnum terms = reader.terms();
            while (terms.next()) {
                System.out.print(terms.term() + " [SF: " + terms.docFreq() + "] <");

                TermPositions poss = reader.termPositions(terms.term());
                while (poss.next()) {
                    System.out.print(" " + reader.document(poss.doc()).getField("ID").stringValue() + " ("
                            + poss.freq() + "), ");
                }
                System.out.println(">");
            }
        }
    } catch (CorruptIndexException ex) {
        throw new IndexingException("CorruptIndexException when accessing index for printing information");
    } catch (IOException ex) {
        throw new IndexingException("IOException when accessing index for printing information");
    }
}

From source file:lia.chapter5.CategorizerTest.java

License:Apache License

private void buildCategoryVectors() throws IOException {
    IndexSearcher searcher = Utils.getBookIndexSearcher();
    IndexReader reader = searcher.getIndexReader();

    int maxDoc = reader.maxDoc();
    System.out.println(maxDoc);/*from   www. j  a v a 2  s  .  c  om*/
    for (int i = 0; i < maxDoc; i++) {
        Document doc = reader.document(i);
        String category = doc.get("category");
        System.out.println("\n" + doc.get("subject") + "\n");
        Map vectorMap = (Map) categoryMap.get(category);
        if (vectorMap == null) {
            vectorMap = new TreeMap();
            categoryMap.put(category, vectorMap);
        }

        Terms termsVector = reader.getTermVector(i, "subject");

        addTermFreqToMap(vectorMap, termsVector);
    }
}

From source file:liredemo.flickr.TestParallelIndexer.java

License:Open Source License

public void testSearchTime() throws IOException {
    ImageSearcher ceddImageSearcher = new VisualWordsImageSearcher(100,
            DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
    //        ImageSearcher ceddImageSearcher = ImageSearcherFactory.createCEDDImageSearcher(100);
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    //        IndexReader reader = IndexReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath + "-reduced"))));
    System.out.println("reader.maxDoc() = " + reader.maxDoc());
    for (int i = 0; i < 10; i++) {
        long ms = System.currentTimeMillis();
        ceddImageSearcher.search(reader.document(0), reader);
        System.out.println("s = " + (double) (System.currentTimeMillis() - ms) / 1000d);
    }/*from   w w w.java  2 s  .c  om*/
}

From source file:liredemo.flickr.TestParallelIndexer.java

License:Open Source License

public void testMirFlickrSearch() throws IOException {
    float avgPrecision = 0f;
    float nullHits = 0f;
    int numDocsAll = 1000;
    for (int docId = 0; docId < numDocsAll; docId++) {
        int docNumber = docId;
        //            ImageSearcher imageSearcher = new VisualWordsImageSearcher(6, DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM_VISUAL_WORDS);
        ImageSearcher imageSearcher = ImageSearcherFactory.createCEDDImageSearcher(10);
        IndexReader reader = IndexReader.open(FSDirectory.open(new File("./index-mirflickr")));
        //        IndexReader reader = IndexReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath + "-reduced"))));
        //        System.out.println("reader.maxDoc() = " + reader.maxDoc());
        ImageSearchHits hits = imageSearcher.search(reader.document(docNumber), reader);
        //            LsaFilter lsa = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
        //            lsa.filter(hits, reader.document(docNumber));
        HashMap<String, Float> hist = new HashMap<String, Float>(250);
        //            System.out.println("query tags: " + reader.document(docNumber).getValues("tags")[0]);
        for (int i = 0; i < hits.length(); i++) {
            Document doc = hits.doc(i);
            String tags = doc.getValues("tags")[0];
            String[] t = tags.split("\\s");
            for (int j = 0; j < t.length; j++) {
                if (t[j].length() > 1 && j > 0) {
                    if (hist.containsKey(t[j])) {
                        //                            hist.put(t[j], hist.get(t[j]) + 1f / (float) Math.max(j / 2f, 1f));
                        hist.put(t[j], hist.get(t[j]) + 1f);
                    } else {
                        //                            hist.put(t[j], 1f / ((float) Math.max(j / 2f, 1f)));
                        hist.put(t[j], 1f);
                    }/*from   www. j av a 2 s.  c  o  m*/
                }
            }
        }
        int countHits = 0;
        // IDF
        //            for (Iterator<String> iterator = hist.keySet().iterator(); iterator.hasNext(); ) {
        //                String s = iterator.next();
        //                int docFreq = reader.docFreq(new Term("tags", s));
        //                hist.put(s, (float) (hist.get(s)*(Math.log(25000f/(float)docFreq))));
        //            }
        for (int c = 0; c < 10; c++) {
            String t = getMaxItem(hist);
            String s = "";
            if (t != null && reader.document(docNumber).getValues("tags") != null
                    && reader.document(docNumber).getValues("tags").length > 0
                    && reader.document(docNumber).getValues("tags")[0].indexOf(t) > -1) {
                s = "* ";
                countHits++;
            }
            //                System.out.println(s + t + " ("+hist.get(t)+")");
            hist.remove(t);
        }
        //            System.out.println("countHits = " + countHits);
        avgPrecision += (float) countHits / 10f;
        if (countHits < 1)
            nullHits++;
    }
    System.out.println("avgPrecision = " + avgPrecision / (float) numDocsAll);
    System.out.println("nullHits = " + nullHits / (float) numDocsAll);
}

From source file:liredemo.LireDemoFrame.java

License:Open Source License

private void resultsTableMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_resultsTableMouseClicked
    try {//from w  ww  . j a  v  a2  s. c  om
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText())));
        if (evt.getButton() == MouseEvent.BUTTON3) {
            int imageID = resultsTable.rowAtPoint(evt.getPoint()) * 3
                    + resultsTable.columnAtPoint(evt.getPoint());
            if (imageID >= 0 && imageID < tableModel.getHits().length()) {
                String file = reader.document(tableModel.getHits().documentID(imageID))
                        .getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue();

                try {
                    Desktop.getDesktop().open(new File(file));
                } catch (IOException ex) {
                    Logger.getLogger(LireDemoFrame.class.getName()).log(Level.SEVERE, null, ex);
                }
            }
        }
        if (evt.getClickCount() == 2) {
            searchForDocument(resultsTable.getSelectedRow() * 3 + resultsTable.getSelectedColumn());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:liredemo.LireDemoFrame.java

License:Open Source License

private void searchForDocument(int tableRow) {
    try {//from  w ww .  jav  a 2  s  . c o  m
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText())));
        searchForDocument(reader.document(tableModel.getHits().documentID(tableRow)));
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:liredemo.LireDemoFrame.java

License:Open Source License

private void rerankFeatureActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_rerankFeatureActionPerformed
    RerankFilter filter = null;//  www . j  a v  a2 s.c o  m
    filter = new RerankFilter(ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (selectboxRerankFeature.getSelectedIndex() == 1) { // ScalableColor
        filter = new RerankFilter(ScalableColor.class, DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    } else if (selectboxRerankFeature.getSelectedIndex() == 2) { // EdgeHistogram
        filter = new RerankFilter(EdgeHistogram.class, DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    } else if (selectboxRerankFeature.getSelectedIndex() == 3) { // ACC
        filter = new RerankFilter(AutoColorCorrelogram.class, DocumentBuilder.FIELD_NAME_AUTOCOLORCORRELOGRAM);
    } else if (selectboxRerankFeature.getSelectedIndex() == 4) { // CEDD
        filter = new RerankFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
    } else if (selectboxRerankFeature.getSelectedIndex() == 5) { // FCTH
        filter = new RerankFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH);
    } else if (selectboxRerankFeature.getSelectedIndex() == 6) { // JCD
        filter = new RerankFilter(JCD.class, DocumentBuilder.FIELD_NAME_JCD);
    } else if (selectboxRerankFeature.getSelectedIndex() == 7) { // SimpleColorHistogram
        filter = new RerankFilter(SimpleColorHistogram.class, DocumentBuilder.FIELD_NAME_COLORHISTOGRAM);
    } else if (selectboxRerankFeature.getSelectedIndex() == 8) { // Tamura
        filter = new RerankFilter(Tamura.class, DocumentBuilder.FIELD_NAME_TAMURA);
    } else if (selectboxRerankFeature.getSelectedIndex() == 9) { // Gabor
        filter = new RerankFilter(Gabor.class, DocumentBuilder.FIELD_NAME_GABOR);
    } else if (selectboxRerankFeature.getSelectedIndex() == 10) { // JPEG Coeffs
        filter = new RerankFilter(JpegCoefficientHistogram.class, DocumentBuilder.FIELD_NAME_JPEGCOEFFS);
    } else if (selectboxRerankFeature.getSelectedIndex() == 11) { // Joint Histogram
        filter = new RerankFilter(JointHistogram.class, DocumentBuilder.FIELD_NAME_JOINT_HISTOGRAM);
    } else if (selectboxRerankFeature.getSelectedIndex() == 12) { // OpponentHistogram
        filter = new RerankFilter(OpponentHistogram.class, DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM);
    } else if (selectboxRerankFeature.getSelectedIndex() == 13) { // LuminanceLayout
        filter = new RerankFilter(LuminanceLayout.class, DocumentBuilder.FIELD_NAME_LUMINANCE_LAYOUT);
    } else if (selectboxRerankFeature.getSelectedIndex() >= 14) { // PHOG
        filter = new RerankFilter(PHOG.class, DocumentBuilder.FIELD_NAME_PHOG);
    }
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText())));
        tableModel.setHits(
                filter.filter(tableModel.hits, reader, reader.document(tableModel.hits.documentID(0))), null,
                reader);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:liredemo.LireDemoFrame.java

License:Open Source License

private void rerankLsaActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_rerankLsaActionPerformed
    LsaFilter filter = null;// w w w .  j av a 2s .  co  m
    filter = new LsaFilter(ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (selectboxDocumentBuilder.getSelectedIndex() == 1) { // ScalableColor
        filter = new LsaFilter(ScalableColor.class, DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 2) { // EdgeHistogram
        filter = new LsaFilter(EdgeHistogram.class, DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 3) { // ACC
        filter = new LsaFilter(AutoColorCorrelogram.class, DocumentBuilder.FIELD_NAME_AUTOCOLORCORRELOGRAM);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 4) { // CEDD
        filter = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 5) { // FCTH
        filter = new LsaFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 6) { // JCD
        filter = new LsaFilter(JCD.class, DocumentBuilder.FIELD_NAME_JCD);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 7) { // SimpleColorHistogram
        filter = new LsaFilter(SimpleColorHistogram.class, DocumentBuilder.FIELD_NAME_COLORHISTOGRAM);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 8) { // Tamura
        filter = new LsaFilter(Tamura.class, DocumentBuilder.FIELD_NAME_TAMURA);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 9) { // Gabor
        filter = new LsaFilter(Gabor.class, DocumentBuilder.FIELD_NAME_GABOR);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 10) { // JPEG Coeffs
        filter = new LsaFilter(JpegCoefficientHistogram.class, DocumentBuilder.FIELD_NAME_JPEGCOEFFS);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 12) { // Joint Histogram
        filter = new LsaFilter(JointHistogram.class, DocumentBuilder.FIELD_NAME_JOINT_HISTOGRAM);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 13) { // OpponentHistogram
        filter = new LsaFilter(OpponentHistogram.class, DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM);
    } else if (selectboxDocumentBuilder.getSelectedIndex() == 14) { // LuminanceLayout
        filter = new LsaFilter(LuminanceLayout.class, DocumentBuilder.FIELD_NAME_LUMINANCE_LAYOUT);
    } else if (selectboxDocumentBuilder.getSelectedIndex() >= 15) { // PHOG
        filter = new LsaFilter(PHOG.class, DocumentBuilder.FIELD_NAME_PHOG);
    }
    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText())));
        tableModel.setHits(
                filter.filter(tableModel.hits, reader, reader.document(tableModel.hits.documentID(0))), null,
                reader);
    } catch (IOException e) {
        e.printStackTrace();
    }
}