Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype

public final Document document(int docID, Set<String> fieldsToLoad) throws IOException 

Source Link

Document

Like #document(int) but only loads the specified fields.

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexMBeanImpl.java

License:Apache License

private static String getPath(IndexReader reader, ScoreDoc doc) throws IOException {
    PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
    reader.document(doc.doc, visitor);
    return visitor.getPath();
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    final Sort sort = getSort(plan);
    final PlanResult pr = getPlanResult(plan);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override/*from   w ww. jav a  2  s.  c o  m*/
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt,
                Facets facets, String explanation) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            //TODO Look into usage of field cache for retrieving the path
            //instead of reading via reader if no of docs in index are limited
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (pr.isPathTransformed()) {
                    String originalPath = path;
                    path = pr.transformPath(path);

                    if (path == null) {
                        LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                        return null;
                    }

                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                        return null;
                    }
                    seenPaths.add(path);
                }

                LOG.trace("Matched path {}", path);
                return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            final IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();

                    CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);

                    if (customScoreQuery != null) {
                        query = customScoreQuery;
                    }

                    checkForIndexVersionChange(searcher);

                    TopDocs docs;
                    long start = PERF_LOGGER.start();
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                            } else {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                            }
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.search(query, nextBatchSize);
                            } else {
                                docs = searcher.search(query, nextBatchSize, sort);
                            }
                        }
                        PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        long f = PERF_LOGGER.start();
                        Facets facets = FacetHelper.getFacets(searcher, query, docs, plan,
                                indexNode.getDefinition().isSecureFacets());
                        PERF_LOGGER.end(f, -1, "facets retrieved");

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
                        boolean addExplain = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        FieldInfos mergedFieldInfos = null;
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                            mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
                            }

                            String explanation = null;
                            if (addExplain) {
                                explanation = searcher.explain(query, doc.doc).toString();
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            //queue is still empty but more results can be fetched
                            //from Lucene so still continue
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT
                            : FieldNames.SPELLCHECK;
                    noDocs = true;
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(aclCheckField,
                                QueryParserBase.escape(suggestion.string));

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.string));
                                    break;
                                }
                            }
                        }
                    }

                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;

                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().isSuggestAnalyzed()
                                    ? indexNode.getDefinition().getAnalyzer()
                                    : SuggestHelper.getAnalyzer());

                    // ACL filter suggestions
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
                                    break;
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}

From source file:org.apache.mahout.utils.vectors.lucene.ClusterLabels.java

License:Apache License

private static OpenBitSet getClusterDocBitset(IndexReader reader, Collection<String> idSet, String idField)
        throws IOException {
    int numDocs = reader.numDocs();

    OpenBitSet bitset = new OpenBitSet(numDocs);

    Set<String> idFieldSelector = null;
    if (idField != null) {
        idFieldSelector = new TreeSet<String>();
        idFieldSelector.add(idField);/*from  w w w .jav  a2  s.  co  m*/
    }

    for (int i = 0; i < numDocs; i++) {
        String id;
        // Use Lucene's internal ID if idField is not specified. Else, get it from the document.
        if (idField == null) {
            id = Integer.toString(i);
        } else {
            id = reader.document(i, idFieldSelector).get(idField);
        }
        if (idSet.contains(id)) {
            bitset.set(i);
        }
    }
    log.info("Created bitset for in-cluster documents : {}", bitset.cardinality());
    return bitset;
}

From source file:org.apache.nifi.provenance.index.lucene.QueryTask.java

License:Apache License

private Tuple<List<ProvenanceEventRecord>, Integer> readDocuments(final TopDocs topDocs,
        final IndexReader indexReader) {
    // If no topDocs is supplied, just provide a Tuple that has no records and a hit count of 0.
    if (topDocs == null || topDocs.totalHits == 0) {
        return new Tuple<>(Collections.<ProvenanceEventRecord>emptyList(), 0);
    }/*from ww  w. ja v a2  s  . co  m*/

    final long start = System.nanoTime();
    final List<Long> eventIds = Arrays.stream(topDocs.scoreDocs).mapToInt(scoreDoc -> scoreDoc.doc)
            .mapToObj(docId -> {
                try {
                    return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD);
                } catch (final Exception e) {
                    throw new SearchFailedException("Failed to read Provenance Events from Event File", e);
                }
            }).map(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue()
                    .longValue())
            .collect(Collectors.toList());

    final long endConvert = System.nanoTime();
    final long ms = TimeUnit.NANOSECONDS.toMillis(endConvert - start);
    logger.debug("Converting documents took {} ms", ms);

    List<ProvenanceEventRecord> events;
    try {
        events = eventStore.getEvents(eventIds, authorizer, transformer);
    } catch (IOException e) {
        throw new SearchFailedException("Unable to retrieve events from the Provenance Store", e);
    }

    final long fetchEventNanos = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - endConvert);
    logger.debug("Fetching {} events from Event Store took {} ms ({} events actually fetched)", eventIds.size(),
            fetchEventNanos, events.size());

    final int totalHits = topDocs.totalHits;
    return new Tuple<>(events, totalHits);
}

From source file:org.apache.solr.handler.component.TermVectorComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;//from w  w w. j ava 2  s.c o  m
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
        termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }

    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {

            // workarround SOLR-3523
            if (null == field || "score".equals(field))
                continue;

            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS,
                            allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                } else {//field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    } //else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings 
    // are added that might be differnet between shards, finishStage() needs 
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
        hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
        hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
        hasWarnings = true;
    }
    if (hasWarnings) {
        termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    //Only load the id field to get the uniqueKey of that
    //field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<Object>();

        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }

        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    termsEnum = vector.iterator(termsEnum);
                    mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    termsEnum = terms.iterator(termsEnum);
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}

From source file:org.archive.tnh.FieldCacheNoCache.java

License:Apache License

/**
 *
 *///  w  w w  . j a va 2 s .  c o m
public String getValue(IndexReader reader, int docBase, int docId) throws IOException {
    Document doc = reader.document(docId, FIELD_ONLY);

    String value = doc.get(this.fieldName);

    return value;
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad)
        throws IOException {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
    reader.document(docId, visitor);
    return visitor.getDocument();
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from w  w w. j  a va 2  s .  co  m*/
 */
public void checkIndex(ItemDataConsumer itemStateManager, boolean isSystem, final InspectionReport report)
        throws RepositoryException, IOException {

    // The visitor, that performs item enumeration and checks if all nodes present in 
    // persistent layer are indexed. Also collects the list of all indexed nodes
    // to optimize the process of backward check, when index is traversed to find
    // references to already deleted nodes
    class ItemDataIndexConsistencyVisitor extends ItemDataTraversingVisitor {
        private final IndexReader indexReader;

        private final Set<String> indexedNodes = new HashSet<String>();

        /**
         * @param dataManager
         */
        public ItemDataIndexConsistencyVisitor(ItemDataConsumer dataManager, IndexReader indexReader) {
            super(dataManager);
            this.indexReader = indexReader;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(PropertyData property, int level) throws RepositoryException {
            // ignore properties;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(NodeData node, int level) throws RepositoryException {
            // process node uuids one-by-one
            try {
                String uuid = node.getIdentifier();
                TermDocs docs = indexReader.termDocs(new Term(FieldNames.UUID, uuid));

                if (docs.next()) {
                    indexedNodes.add(uuid);
                    docs.doc();
                    if (docs.next()) {
                        //multiple entries
                        report.logComment("Multiple entires.");
                        report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                    }
                } else {
                    report.logComment("Not indexed.");
                    report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                }
            } catch (IOException e) {
                throw new RepositoryException(e.getMessage(), e);
            }
        }

        @Override
        protected void leaving(PropertyData property, int level) throws RepositoryException {
            // ignore properties
        }

        @Override
        protected void leaving(NodeData node, int level) throws RepositoryException {
            // do nothing
        }

        @Override
        protected void visitChildProperties(NodeData node) throws RepositoryException {
            //do nothing
        }

        public Set<String> getIndexedNodes() {
            return indexedNodes;
        }
    }

    // check relation Persistent Layer -> Index
    // If current workspace is system, then need to invoke reader correspondent to system index
    ensureFlushed();
    if (isSystem) {
        if (getContext().getParentHandler() != null) {
            ((SearchIndex) getContext().getParentHandler()).ensureFlushed();
        }
    }
    IndexReader indexReader = getIndexReader(isSystem);
    try {
        ItemData root = itemStateManager.getItemData(Constants.ROOT_UUID);
        ItemDataIndexConsistencyVisitor visitor = new ItemDataIndexConsistencyVisitor(itemStateManager,
                indexReader);
        root.accept(visitor);

        Set<String> documentUUIDs = visitor.getIndexedNodes();

        // check relation Index -> Persistent Layer
        // find document that do not corresponds to real node
        // iterate on documents one-by-one
        for (int i = 0; i < indexReader.maxDoc(); i++) {
            if (indexReader.isDeleted(i)) {
                continue;
            }
            final int currentIndex = i;
            Document d = indexReader.document(currentIndex, FieldSelectors.UUID);
            String uuid = d.get(FieldNames.UUID);
            if (!documentUUIDs.contains(uuid)) {
                report.logComment("Document corresponds to removed node.");
                report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
            }
        }
    } finally {
        Util.closeOrRelease(indexReader);
    }
}

From source file:org.fao.geonet.kernel.csw.services.GetDomain.java

License:Open Source License

public static List<Element> handlePropertyName(String[] propertyNames, ServiceContext context, boolean freq,
        int maxRecords, String cswServiceSpecificConstraint) throws Exception {

    List<Element> domainValuesList = null;

    if (Log.isDebugEnabled(Geonet.CSW))
        Log.debug(Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames)
                + "' with max records of " + maxRecords);

    for (int i = 0; i < propertyNames.length; i++) {

        if (i == 0)
            domainValuesList = new ArrayList<Element>();

        // Initialize list of values element.
        Element listOfValues = null;

        // Generate DomainValues element
        Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW);

        // FIXME what should be the type ???
        domainValues.setAttribute("type", "csw:Record");

        String property = propertyNames[i].trim();

        // Set propertyName in any case.
        Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW);
        domainValues.addContent(pn.setText(property));

        GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        SearchManager sm = gc.getSearchmanager();

        IndexReader reader = sm.getIndexReader(context.getLanguage());
        try {//from  w ww.  ja  v a 2  s  . c om
            BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context);
            BooleanQuery query = null;

            // Apply CSW service specific constraint
            if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) {
                Query constraintQuery = CatalogSearcher
                        .getCswServiceSpecificConstraintQuery(cswServiceSpecificConstraint);

                query = new BooleanQuery();

                BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false);

                query.add(groupsQuery, occur);
                query.add(constraintQuery, occur);

            } else {
                query = groupsQuery;
            }

            List<Pair<String, Boolean>> sortFields = Collections
                    .singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true));
            Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false);
            CachingWrapperFilter filter = null;

            Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary(maxRecords, 0,
                    maxRecords, Integer.MAX_VALUE, context.getLanguage(), "results", new Element("summary"),
                    reader, query, filter, sort, false, false, false, false // Scoring is useless for GetDomain operation
            );
            TopDocs hits = searchResults.one();

            try {
                // Get mapped lucene field in CSW configuration
                String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase());
                if (indexField != null)
                    property = indexField;

                // check if params asked is in the index using getFieldNames ?
                if (!reader.getFieldNames(IndexReader.FieldOption.ALL).contains(property))
                    continue;

                boolean isRange = false;
                if (CatalogConfiguration.getGetRecordsRangeFields().contains(property))
                    isRange = true;

                if (isRange)
                    listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW);
                else
                    listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW);

                //List<String> fields = new ArrayList<String>();
                //fields.add(property);
                //fields.add("_isTemplate");
                String fields[] = new String[] { property, "_isTemplate" };
                MapFieldSelector selector = new MapFieldSelector(fields);

                // parse each document in the index
                String[] fieldValues;
                SortedSet<String> sortedValues = new TreeSet<String>();
                HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>();
                for (int j = 0; j < hits.scoreDocs.length; j++) {
                    Document doc = reader.document(hits.scoreDocs[j].doc, selector);

                    // Skip templates and subTemplates
                    String[] isTemplate = doc.getValues("_isTemplate");
                    if (isTemplate[0] != null && !isTemplate[0].equals("n"))
                        continue;

                    // Get doc values for specified property
                    fieldValues = doc.getValues(property);
                    if (fieldValues == null)
                        continue;

                    addtoSortedSet(sortedValues, fieldValues, duplicateValues);
                }

                SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING,
                        context.getLanguage(), null);
                TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency = new TreeSet<Map.Entry<String, Integer>>(
                        valuesComparator);
                sortedValuesFrequency.addAll(duplicateValues.entrySet());

                if (freq)
                    return createValuesByFrequency(sortedValuesFrequency);
                else
                    listOfValues.addContent(createValuesElement(sortedValues, isRange));

            } finally {
                // any children means that the catalog was unable to determine
                // anything about the specified parameter
                if (listOfValues != null && listOfValues.getChildren().size() != 0)
                    domainValues.addContent(listOfValues);

                // Add current DomainValues to the list
                domainValuesList.add(domainValues);
            }
        } finally {
            sm.releaseIndexReader(reader);
        }
    }
    return domainValuesList;

}

From source file:org.fao.geonet.kernel.search.DuplicateDocFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final BitSet bits = new BitSet(reader.maxDoc());

    new IndexSearcher(reader).search(_query, new Collector() {

        private int docBase;
        private IndexReader reader;

        @Override//from w w w .  ja va  2s . c  o  m
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
            if (hits.size() <= _maxResults) {
                Document document;
                try {
                    document = reader.document(docBase + doc, _fieldSelector);
                    String id = document.get("_id");

                    if (!hits.contains(id)) {
                        bits.set(docBase + doc);
                        hits.add(id);
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        }

        @Override
        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
            this.reader = reader;
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return false;
        }
    });

    return new DocIdBitSet(bits);
}