Example usage for org.apache.lucene.document DocumentStoredFieldVisitor getDocument

List of usage examples for org.apache.lucene.document DocumentStoredFieldVisitor getDocument

Introduction

In this page you can find the example usage for org.apache.lucene.document DocumentStoredFieldVisitor getDocument.

Prototype

public Document getDocument() 

Source Link

Document

Retrieve the visited document.

Usage

From source file:com.browseengine.bobo.api.BoboSegmentReader.java

License:Open Source License

public String[] getStoredFieldValue(int docid, final String fieldname) throws IOException {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldname);
    super.document(docid, visitor);
    Document doc = visitor.getDocument();
    return doc.getValues(fieldname);
}

From source file:com.core.nlp.index.IndexReader.java

License:Apache License

/**
 * Returns the stored fields of the <code>n</code><sup>th</sup>
 * <code>Document</code> in this index.  This is just
 * sugar for using {@link DocumentStoredFieldVisitor}.
 * <p>/*www  .ja v a 2s .co m*/
 * <b>NOTE:</b> for performance reasons, this method does not check if the
 * requested document is deleted, and therefore asking for a deleted document
 * may yield unspecified results. Usually this is not required, however you
 * can test if the doc is deleted by checking the {@link
 * Bits} returned from {@link MultiFields#getLiveDocs}.
 *
 * <b>NOTE:</b> only the content of a field is returned,
 * if that field was stored during indexing.  Metadata
 * like boost, omitNorm, IndexOptions, tokenized, etc.,
 * are not preserved.
 * 
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if there is a low-level IO error
 */
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
// IndexableField
public final Document document(int docID) throws IOException {
    final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
    document(docID, visitor);
    return visitor.getDocument();
}

From source file:com.core.nlp.index.IndexReader.java

License:Apache License

/**
 * Like {@link #document(int)} but only loads the specified
 * fields.  Note that this is simply sugar for {@link
 * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}.
 *//*from  w  w w.j a va 2s .c  o  m*/
public final Document document(int docID, Set<String> fieldsToLoad) throws IOException {
    final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
    document(docID, visitor);
    return visitor.getDocument();
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
    try {//from w ww.  jav  a2 s  .co m
        if (source == null) {
            throw new Exception("Source collection is missing.");
        }
        // create as a sibling path of the main index
        Directory d = main.directory();
        File primaryDir = null;
        if (d instanceof FSDirectory) {
            String path = ((FSDirectory) d).getDirectory().getPath();
            primaryDir = new File(path);
            sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
        } else {
            String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                    + "-" + System.currentTimeMillis();
            sidecarIndex = new File(secondaryPath);
        }
        // create a new tmp dir for the secondary indexes
        File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
        if (rebuild) {
            safeDelete(sidecarIndex);
        }
        parallelFields.addAll(source.getFieldNames());
        parallelFields.remove("id");
        LOG.debug("building a new index");
        Directory dir = FSDirectory.open(secondaryIndex);
        if (IndexWriter.isLocked(dir)) {
            // try forcing unlock
            try {
                IndexWriter.unlock(dir);
            } catch (Exception e) {
                LOG.warn("Failed to unlock " + secondaryIndex);
            }
        }
        int[] mergeTargets;
        AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
        if (subReaders == null || subReaders.length == 0) {
            mergeTargets = new int[] { main.maxDoc() };
        } else {
            mergeTargets = new int[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                mergeTargets[i] = subReaders[i].maxDoc();
            }
        }
        Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
        IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
        //cfg.setInfoStream(System.err);
        cfg.setMergeScheduler(new SerialMergeScheduler());
        cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
        IndexWriter iw = new IndexWriter(dir, cfg);
        LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
        int boostedDocs = 0;
        Bits live = MultiFields.getLiveDocs(main);

        int targetPos = 0;
        int nextTarget = mergeTargets[targetPos];
        BytesRef idRef = new BytesRef();
        for (int i = 0; i < main.maxDoc(); i++) {
            if (i == nextTarget) {
                iw.commit();
                nextTarget = nextTarget + mergeTargets[++targetPos];
            }
            if (live != null && !live.get(i)) {
                addDummy(iw); // this is required to preserve doc numbers.
                continue;
            } else {
                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                main.document(i, visitor);
                Document doc = visitor.getDocument();
                // get docId
                String id = doc.get(docIdField);
                if (id == null) {
                    LOG.debug("missing id, docNo=" + i);
                    addDummy(iw);
                    continue;
                } else {
                    // find the data, if any
                    doc = lookup(source, id, idRef, parallelFields);
                    if (doc == null) {
                        LOG.debug("missing boost data, docId=" + id);
                        addDummy(iw);
                        continue;
                    } else {
                        LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                        iw.addDocument(doc);
                        boostedDocs++;
                    }
                }
            }
        }
        iw.close();
        DirectoryReader other = DirectoryReader.open(dir);
        LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
        SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
        return pr;
    } catch (Exception e) {
        LOG.warn("Unable to build parallel index: " + e.toString(), e);
        LOG.warn("Proceeding with single main index.");
        try {
            return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                    sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return main;
        }
    }
}

From source file:demo.jaxrs.search.server.Catalog.java

License:Apache License

@GET
@Produces(MediaType.APPLICATION_JSON)/*  ww w . ja v  a2  s .  c  o m*/
public JsonArray getBooks() throws IOException {
    final IndexReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final JsonArrayBuilder builder = Json.createArrayBuilder();

    try {
        final Query query = new MatchAllDocsQuery();

        for (final ScoreDoc scoreDoc : searcher.search(query, 1000).scoreDocs) {
            final DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(
                    LuceneDocumentMetadata.SOURCE_FIELD);

            reader.document(scoreDoc.doc, fieldVisitor);
            builder.add(fieldVisitor.getDocument().getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue());
        }

        return builder.build();
    } finally {
        reader.close();
    }
}

From source file:fi.semantum.strategia.Lucene.java

License:Open Source License

public static synchronized List<String> search(String databaseId, String search) throws IOException {

    ArrayList<String> result = new ArrayList<String>();

    IndexReader reader = null;/* w w  w.  j a v a 2  s.  c  o  m*/

    try {

        reader = DirectoryReader.open(getDirectory(databaseId));
        IndexSearcher searcher = new IndexSearcher(reader);

        QueryParser parser = new QueryParser(Version.LUCENE_4_9, "text", getAnalyzer());
        parser.setAllowLeadingWildcard(true);
        Query query = parser.parse(search);

        TopDocs docs = searcher.search(query, Integer.MAX_VALUE);

        for (ScoreDoc scoreDoc : docs.scoreDocs) {

            try {

                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();

                reader.document(scoreDoc.doc, visitor);

                Document doc = visitor.getDocument();

                result.add(doc.get("uuid"));

            } catch (CorruptIndexException e) {
                throw new IOException(e);
            }

        }

    } catch (ParseException e) {

        throw new IOException(e);

    } finally {

        if (reader != null)
            reader.close();

    }

    return result;

}

From source file:gov.nist.basekb.FreebaseSearcher.java

License:LGPL

public String getSubjectPredicateValue(String subjectURI, String predName) throws IOException {
    // Return the value of predicate `predName' on `subjectURI'.  If there are muliple values,
    // return the first one indexed, if there are none, return null.
    // This is specialized to only retrieve the `predName' field of the subject document.
    // If the full document has already been retrieved, use the Document accessor instead.
    int subjectId = getSubjectDocID(subjectURI);
    if (subjectId < 0)
        return null;
    else {//from  www  .  ja  v a  2s. co  m
        DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName);
        getIndexReader().document(subjectId, fieldVisitor);
        Document subject = fieldVisitor.getDocument();
        return getSubjectPredicateValue(subject, predName);
    }
}

From source file:gov.nist.basekb.FreebaseSearcher.java

License:LGPL

public String[] getSubjectPredicateValues(String subjectURI, String predName) throws IOException {
    // Return the values of predicate `predName' on `subjectURI'.
    // If there are none, return an empty array.
    // This is specialized to only retrieve the `predName' field of the subject document.
    // If the full document has already been retrieved, use the Document accessor instead.
    int subjectId = getSubjectDocID(subjectURI);
    if (subjectId < 0)
        return emptyValues;
    else {//from   ww w  .  j  a  v  a  2 s  .  c  o  m
        DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName);
        getIndexReader().document(subjectId, fieldVisitor);
        Document subject = fieldVisitor.getDocument();
        return getSubjectPredicateValues(subject, predName);
    }
}

From source file:lux.CachingDocReader.java

License:Mozilla Public License

private XdmNode get(int docID, int luceneDocID, IndexReader reader) throws IOException {
    XdmNode node = cache.get(docID);/*from  w  ww.  j av a 2  s . c  om*/
    if (node != null) {
        ++cacheHits;
        return node;
    }
    DocumentStoredFieldVisitor fieldSelector = new DocumentStoredFieldVisitor();
    reader.document(luceneDocID, fieldSelector);
    Document document = fieldSelector.getDocument();
    return getXdmNode(docID, document);
}

From source file:org.apache.blur.mapreduce.lib.GenericRecordReader.java

License:Apache License

private void fetchBlurRecord() throws IOException {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
    _fieldsReader.visitDocument(_docId, visitor);
    BlurRecord blurRecord = new BlurRecord();
    String rowId = RowDocumentUtil.readRecord(visitor.getDocument(), blurRecord);
    blurRecord.setRowId(rowId);// ww  w.ja va2s .  c o m
    _rowId = new Text(rowId);
    _tableBlurRecord = new TableBlurRecord(_table, blurRecord);
}