Example usage for org.apache.lucene.document DocumentStoredFieldVisitor DocumentStoredFieldVisitor

List of usage examples for org.apache.lucene.document DocumentStoredFieldVisitor DocumentStoredFieldVisitor

Introduction

In this page you can find the example usage for org.apache.lucene.document DocumentStoredFieldVisitor DocumentStoredFieldVisitor.

Prototype

public DocumentStoredFieldVisitor(String... fields) 

Source Link

Document

Load only fields named in the provided fields.

Usage

From source file:com.browseengine.bobo.api.BoboSegmentReader.java

License:Open Source License

public String[] getStoredFieldValue(int docid, final String fieldname) throws IOException {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldname);
    super.document(docid, visitor);
    Document doc = visitor.getDocument();
    return doc.getValues(fieldname);
}

From source file:com.core.nlp.index.IndexReader.java

License:Apache License

/**
 * Like {@link #document(int)} but only loads the specified
 * fields.  Note that this is simply sugar for {@link
 * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}.
 *///from  w  w w  .j  a va2 s .  com
public final Document document(int docID, Set<String> fieldsToLoad) throws IOException {
    final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
    document(docID, visitor);
    return visitor.getDocument();
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
    try {//  w  w  w  .jav a  2  s  .c  o  m
        if (source == null) {
            throw new Exception("Source collection is missing.");
        }
        // create as a sibling path of the main index
        Directory d = main.directory();
        File primaryDir = null;
        if (d instanceof FSDirectory) {
            String path = ((FSDirectory) d).getDirectory().getPath();
            primaryDir = new File(path);
            sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
        } else {
            String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                    + "-" + System.currentTimeMillis();
            sidecarIndex = new File(secondaryPath);
        }
        // create a new tmp dir for the secondary indexes
        File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
        if (rebuild) {
            safeDelete(sidecarIndex);
        }
        parallelFields.addAll(source.getFieldNames());
        parallelFields.remove("id");
        LOG.debug("building a new index");
        Directory dir = FSDirectory.open(secondaryIndex);
        if (IndexWriter.isLocked(dir)) {
            // try forcing unlock
            try {
                IndexWriter.unlock(dir);
            } catch (Exception e) {
                LOG.warn("Failed to unlock " + secondaryIndex);
            }
        }
        int[] mergeTargets;
        AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
        if (subReaders == null || subReaders.length == 0) {
            mergeTargets = new int[] { main.maxDoc() };
        } else {
            mergeTargets = new int[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                mergeTargets[i] = subReaders[i].maxDoc();
            }
        }
        Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
        IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
        //cfg.setInfoStream(System.err);
        cfg.setMergeScheduler(new SerialMergeScheduler());
        cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
        IndexWriter iw = new IndexWriter(dir, cfg);
        LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
        int boostedDocs = 0;
        Bits live = MultiFields.getLiveDocs(main);

        int targetPos = 0;
        int nextTarget = mergeTargets[targetPos];
        BytesRef idRef = new BytesRef();
        for (int i = 0; i < main.maxDoc(); i++) {
            if (i == nextTarget) {
                iw.commit();
                nextTarget = nextTarget + mergeTargets[++targetPos];
            }
            if (live != null && !live.get(i)) {
                addDummy(iw); // this is required to preserve doc numbers.
                continue;
            } else {
                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                main.document(i, visitor);
                Document doc = visitor.getDocument();
                // get docId
                String id = doc.get(docIdField);
                if (id == null) {
                    LOG.debug("missing id, docNo=" + i);
                    addDummy(iw);
                    continue;
                } else {
                    // find the data, if any
                    doc = lookup(source, id, idRef, parallelFields);
                    if (doc == null) {
                        LOG.debug("missing boost data, docId=" + id);
                        addDummy(iw);
                        continue;
                    } else {
                        LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                        iw.addDocument(doc);
                        boostedDocs++;
                    }
                }
            }
        }
        iw.close();
        DirectoryReader other = DirectoryReader.open(dir);
        LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
        SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
        return pr;
    } catch (Exception e) {
        LOG.warn("Unable to build parallel index: " + e.toString(), e);
        LOG.warn("Proceeding with single main index.");
        try {
            return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                    sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return main;
        }
    }
}

From source file:com.o19s.solr.swan.highlight.TermVectorFun.java

License:Apache License

@Test
public void testBlah() throws IOException {
    RAMDirectory ramDir = new RAMDirectory();
    // Index some made up content
    IndexWriterConfig iwf = new IndexWriterConfig(Version.LUCENE_47, new StandardAnalyzer(Version.LUCENE_47));
    IndexWriter writer = new IndexWriter(ramDir, iwf);
    FieldType ft = new FieldType();
    ft.setIndexed(true);/*from www . j  ava 2s .  co  m*/
    ft.setTokenized(true);
    ft.setStored(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.freeze();
    for (int i = 0; i < DOCS.length; i++) {
        Document doc = new Document();
        StringField id = new StringField("id", "doc_" + i, StringField.Store.YES);
        doc.add(id);
        // Store both position and offset information
        Field text = new Field("content", DOCS[i], ft);
        //               Field.Index.ANALYZED,
        //               Field.TermVector.WITH_POSITIONS_OFFSETS);
        doc.add(text);
        writer.addDocument(doc);
    }
    //writer.close();
    // Get a searcher
    AtomicReader dr = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
    IndexSearcher searcher = new IndexSearcher(dr);
    // Do a search using SpanQuery
    SpanTermQuery fleeceQ = new SpanTermQuery(new Term("content", "fleece"));
    TopDocs results = searcher.search(fleeceQ, 10);
    for (int i = 0; i < results.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = results.scoreDocs[i];
        System.out.println("Score Doc: " + scoreDoc);
    }
    IndexReader reader = searcher.getIndexReader();
    Bits acceptDocs = null;
    Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>();
    Spans spans = fleeceQ.getSpans(dr.getContext(), acceptDocs, termContexts);

    while (spans.next()) {
        System.out.println("Doc: " + spans.doc() + " Start: " + spans.start() + " End: " + spans.end());
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("content");
        reader.document(spans.doc(), visitor);
        Terms terms = reader.getTermVector(spans.doc(), "content");
        TermsEnum tenum = terms.iterator(null);
        //         AttributeSource as = tenum.attributes();

        while (tenum.next() != null) {
            System.out.println(tenum.term().utf8ToString());
        }
        for (long pos = 0L; pos < spans.end(); pos++) {
            //            tenum.next();
            //            if (tenum.ord()<pos) continue;
            //            System.out.println(tenum.term());
            //            
        }

        reader.document(spans.doc(), visitor);
        //         String[] values = visitor.getDocument().getValues("content");
        //         List<String> a = new ArrayList<String>();
        //         // build up the window
        //         tvm.start = spans.start() - window;
        //         tvm.end = spans.end() + window;
        //         reader.getTermFreqVector(spans.doc(), "content", tvm);
        //         for (WindowEntry entry : tvm.entries.values()) {
        //            System.out.println("Entry: " + entry);
        //         }
        //         // clear out the entries for the next round
        //         tvm.entries.clear();
    }
}

From source file:demo.jaxrs.search.server.Catalog.java

License:Apache License

@GET
@Produces(MediaType.APPLICATION_JSON)/*from   w ww  .  j a  va  2s  .c om*/
public JsonArray getBooks() throws IOException {
    final IndexReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final JsonArrayBuilder builder = Json.createArrayBuilder();

    try {
        final Query query = new MatchAllDocsQuery();

        for (final ScoreDoc scoreDoc : searcher.search(query, 1000).scoreDocs) {
            final DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(
                    LuceneDocumentMetadata.SOURCE_FIELD);

            reader.document(scoreDoc.doc, fieldVisitor);
            builder.add(fieldVisitor.getDocument().getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue());
        }

        return builder.build();
    } finally {
        reader.close();
    }
}

From source file:gov.nist.basekb.FreebaseSearcher.java

License:LGPL

public String getSubjectPredicateValue(String subjectURI, String predName) throws IOException {
    // Return the value of predicate `predName' on `subjectURI'.  If there are muliple values,
    // return the first one indexed, if there are none, return null.
    // This is specialized to only retrieve the `predName' field of the subject document.
    // If the full document has already been retrieved, use the Document accessor instead.
    int subjectId = getSubjectDocID(subjectURI);
    if (subjectId < 0)
        return null;
    else {//from   w ww  .  ja  v  a  2 s.c  o  m
        DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName);
        getIndexReader().document(subjectId, fieldVisitor);
        Document subject = fieldVisitor.getDocument();
        return getSubjectPredicateValue(subject, predName);
    }
}

From source file:gov.nist.basekb.FreebaseSearcher.java

License:LGPL

public String[] getSubjectPredicateValues(String subjectURI, String predName) throws IOException {
    // Return the values of predicate `predName' on `subjectURI'.
    // If there are none, return an empty array.
    // This is specialized to only retrieve the `predName' field of the subject document.
    // If the full document has already been retrieved, use the Document accessor instead.
    int subjectId = getSubjectDocID(subjectURI);
    if (subjectId < 0)
        return emptyValues;
    else {/* w ww. j a v a  2  s.c o  m*/
        DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(predName);
        getIndexReader().document(subjectId, fieldVisitor);
        Document subject = fieldVisitor.getDocument();
        return getSubjectPredicateValues(subject, predName);
    }
}

From source file:org.apache.mahout.text.LuceneStorageConfiguration.java

License:Apache License

public DocumentStoredFieldVisitor getStoredFieldVisitor() {
    Set<String> fieldSet = Sets.newHashSet(idField);
    fieldSet.addAll(fields);//from  w ww.  ja  va2s .  c o  m
    return new DocumentStoredFieldVisitor(fieldSet);
}

From source file:org.apache.solr.search.grouping.distributed.shardresultserializer.TopGroupsResultTransformer.java

License:Apache License

private Document retrieveDocument(final SchemaField uniqueField, int doc) throws IOException {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(uniqueField.getName());
    rb.req.getSearcher().doc(doc, visitor);
    return visitor.getDocument();
}

From source file:org.fao.geonet.component.csw.GetDomain.java

License:Open Source License

public static List<Element> handlePropertyName(CatalogConfiguration catalogConfig, String[] propertyNames,
        ServiceContext context, boolean freq, int maxRecords, String cswServiceSpecificConstraint,
        LuceneConfig luceneConfig) throws Exception {

    List<Element> domainValuesList = new ArrayList<Element>();

    if (Log.isDebugEnabled(Geonet.CSW))
        Log.debug(Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames)
                + "' with max records of " + maxRecords);

    for (int i = 0; i < propertyNames.length; i++) {

        // Initialize list of values element.
        Element listOfValues = null;

        // Generate DomainValues element
        Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW);

        // FIXME what should be the type ???
        domainValues.setAttribute("type", "csw:Record");

        String property = propertyNames[i].trim();

        // Set propertyName in any case.
        Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW);
        domainValues.addContent(pn.setText(property));

        GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        SearchManager sm = gc.getBean(SearchManager.class);

        IndexAndTaxonomy indexAndTaxonomy = sm.getNewIndexReader(null);
        try {/*from  w w w.  j a  va2s .  c om*/
            GeonetworkMultiReader reader = indexAndTaxonomy.indexReader;
            BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context);
            BooleanQuery query = null;

            // Apply CSW service specific constraint
            if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) {
                Query constraintQuery = parseLuceneQuery(cswServiceSpecificConstraint, luceneConfig);

                query = new BooleanQuery();

                BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false);

                query.add(groupsQuery, occur);
                query.add(constraintQuery, occur);

            } else {
                query = groupsQuery;
            }

            List<Pair<String, Boolean>> sortFields = Collections
                    .singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true));
            Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false);
            CachingWrapperFilter filter = null;

            Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary(maxRecords, 0,
                    maxRecords, context.getLanguage(), null, luceneConfig, reader, query, filter, sort, null,
                    false);
            TopDocs hits = searchResults.one();

            try {
                // Get mapped lucene field in CSW configuration
                String indexField = catalogConfig.getFieldMapping().get(property.toLowerCase());
                if (indexField != null)
                    property = indexField;

                // check if params asked is in the index using getFieldNames ?
                @SuppressWarnings("resource")
                FieldInfos fi = SlowCompositeReaderWrapper.wrap(reader).getFieldInfos();
                if (fi.fieldInfo(property) == null)
                    continue;

                boolean isRange = false;
                if (catalogConfig.getGetRecordsRangeFields().contains(property))
                    isRange = true;

                if (isRange)
                    listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW);
                else
                    listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW);

                Set<String> fields = new HashSet<String>();
                fields.add(property);
                fields.add("_isTemplate");

                // parse each document in the index
                String[] fieldValues;
                Collator stringCollator = Collator.getInstance();
                stringCollator.setStrength(Collator.PRIMARY);
                SortedSet<String> sortedValues = new TreeSet<String>(stringCollator);
                ObjectKeyIntOpenHashMap duplicateValues = new ObjectKeyIntOpenHashMap();
                for (int j = 0; j < hits.scoreDocs.length; j++) {
                    DocumentStoredFieldVisitor selector = new DocumentStoredFieldVisitor(fields);
                    reader.document(hits.scoreDocs[j].doc, selector);
                    Document doc = selector.getDocument();

                    // Skip templates and subTemplates
                    String[] isTemplate = doc.getValues("_isTemplate");
                    if (isTemplate[0] != null && !isTemplate[0].equals("n"))
                        continue;

                    // Get doc values for specified property
                    fieldValues = doc.getValues(property);
                    if (fieldValues == null)
                        continue;

                    addtoSortedSet(sortedValues, fieldValues, duplicateValues);
                }

                SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING,
                        context.getLanguage(), null);
                TreeSet<SummaryComparator.SummaryElement> sortedValuesFrequency = new TreeSet<SummaryComparator.SummaryElement>(
                        valuesComparator);
                ObjectKeyIntMapIterator entries = duplicateValues.entries();

                while (entries.hasNext()) {
                    entries.next();
                    sortedValuesFrequency.add(new SummaryComparator.SummaryElement(entries));
                }

                if (freq)
                    return createValuesByFrequency(sortedValuesFrequency);
                else
                    listOfValues.addContent(createValuesElement(sortedValues, isRange));

            } finally {
                // any children means that the catalog was unable to determine
                // anything about the specified parameter
                if (listOfValues != null && listOfValues.getChildren().size() != 0)
                    domainValues.addContent(listOfValues);

                // Add current DomainValues to the list
                domainValuesList.add(domainValues);
            }
        } finally {
            sm.releaseIndexReader(indexAndTaxonomy);
        }
    }
    return domainValuesList;

}