Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype

public final Document document(int docID, Set<String> fieldsToLoad) throws IOException

Source Link

Document

Like #document(int) but only loads the specified fields.

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexMBeanImpl.java

License:Apache License

private static String getPath(IndexReader reader, ScoreDoc doc) throws IOException {
    PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
    reader.document(doc.doc, visitor);
    return visitor.getPath();
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java

License:Apache License

@Override
public Cursor query(final IndexPlan plan, NodeState rootState) {
    final Filter filter = plan.getFilter();
    final Sort sort = getSort(plan);
    final PlanResult pr = getPlanResult(plan);
    QueryEngineSettings settings = filter.getQueryEngineSettings();
    Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() {
        private final Deque<LuceneResultRow> queue = Queues.newArrayDeque();
        private final Set<String> seenPaths = Sets.newHashSet();
        private ScoreDoc lastDoc;
        private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE;
        private boolean noDocs = false;
        private long lastSearchIndexerVersion;

        @Override/*from   w ww. jav a  2  s.  c o  m*/
        protected LuceneResultRow computeNext() {
            while (!queue.isEmpty() || loadDocs()) {
                return queue.remove();
            }
            return endOfData();
        }

        private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt,
                Facets facets, String explanation) throws IOException {
            IndexReader reader = searcher.getIndexReader();
            //TODO Look into usage of field cache for retrieving the path
            //instead of reading via reader if no of docs in index are limited
            PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
            reader.document(doc.doc, visitor);
            String path = visitor.getPath();
            if (path != null) {
                if ("".equals(path)) {
                    path = "/";
                }
                if (pr.isPathTransformed()) {
                    String originalPath = path;
                    path = pr.transformPath(path);

                    if (path == null) {
                        LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
                        return null;
                    }

                    // avoid duplicate entries
                    if (seenPaths.contains(path)) {
                        LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath);
                        return null;
                    }
                    seenPaths.add(path);
                }

                LOG.trace("Matched path {}", path);
                return new LuceneResultRow(path, doc.score, excerpt, facets, explanation);
            }
            return null;
        }

        /**
         * Loads the lucene documents in batches
         * @return true if any document is loaded
         */
        private boolean loadDocs() {

            if (noDocs) {
                return false;
            }

            ScoreDoc lastDocToRecord = null;

            final IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();

                    CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query);

                    if (customScoreQuery != null) {
                        query = customScoreQuery;
                    }

                    checkForIndexVersionChange(searcher);

                    TopDocs docs;
                    long start = PERF_LOGGER.start();
                    while (true) {
                        if (lastDoc != null) {
                            LOG.debug("loading the next {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize);
                            } else {
                                docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort);
                            }
                        } else {
                            LOG.debug("loading the first {} entries for query {}", nextBatchSize, query);
                            if (sort == null) {
                                docs = searcher.search(query, nextBatchSize);
                            } else {
                                docs = searcher.search(query, nextBatchSize, sort);
                            }
                        }
                        PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length);
                        nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000);

                        long f = PERF_LOGGER.start();
                        Facets facets = FacetHelper.getFacets(searcher, query, docs, plan,
                                indexNode.getDefinition().isSecureFacets());
                        PERF_LOGGER.end(f, -1, "facets retrieved");

                        PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT);
                        boolean addExcerpt = restriction != null && restriction.isNotNullRestriction();

                        restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION);
                        boolean addExplain = restriction != null && restriction.isNotNullRestriction();

                        Analyzer analyzer = indexNode.getDefinition().getAnalyzer();

                        FieldInfos mergedFieldInfos = null;
                        if (addExcerpt) {
                            // setup highlighter
                            QueryScorer scorer = new QueryScorer(query);
                            scorer.setExpandMultiTermQuery(true);
                            highlighter.setFragmentScorer(scorer);
                            mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                        }

                        for (ScoreDoc doc : docs.scoreDocs) {
                            String excerpt = null;
                            if (addExcerpt) {
                                excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos);
                            }

                            String explanation = null;
                            if (addExplain) {
                                explanation = searcher.explain(query, doc.doc).toString();
                            }

                            LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation);
                            if (row != null) {
                                queue.add(row);
                            }
                            lastDocToRecord = doc;
                        }

                        if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                            //queue is still empty but more results can be fetched
                            //from Lucene so still continue
                            lastDoc = lastDocToRecord;
                        } else {
                            break;
                        }
                    }
                } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) {
                    String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT
                            : FieldNames.SPELLCHECK;
                    noDocs = true;
                    SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade
                            .getLuceneRequest();
                    SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery);

                    // ACL filter spellchecks
                    QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField,
                            indexNode.getDefinition().getAnalyzer());
                    for (SuggestWord suggestion : suggestWords) {
                        Query query = qp.createPhraseQuery(aclCheckField,
                                QueryParserBase.escape(suggestion.string));

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.string));
                                    break;
                                }
                            }
                        }
                    }

                } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) {
                    SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade
                            .getLuceneRequest();
                    noDocs = true;

                    List<Lookup.LookupResult> lookupResults = SuggestHelper
                            .getSuggestions(indexNode.getLookup(), suggestQuery);

                    QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST,
                            indexNode.getDefinition().isSuggestAnalyzed()
                                    ? indexNode.getDefinition().getAnalyzer()
                                    : SuggestHelper.getAnalyzer());

                    // ACL filter suggestions
                    for (Lookup.LookupResult suggestion : lookupResults) {
                        Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\"");

                        query = addDescendantClauseIfRequired(query, plan);

                        TopDocs topDocs = searcher.search(query, 100);
                        if (topDocs.totalHits > 0) {
                            for (ScoreDoc doc : topDocs.scoreDocs) {
                                Document retrievedDoc = searcher.doc(doc.doc);
                                String prefix = filter.getPath();
                                if (prefix.length() == 1) {
                                    prefix = "";
                                }
                                if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) {
                                    queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value));
                                    break;
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }

            if (lastDocToRecord != null) {
                this.lastDoc = lastDocToRecord;
            }

            return !queue.isEmpty();
        }

        private void checkForIndexVersionChange(IndexSearcher searcher) {
            long currentVersion = getVersion(searcher);
            if (currentVersion != lastSearchIndexerVersion && lastDoc != null) {
                lastDoc = null;
                LOG.debug("Change in index version detected {} => {}. Query would be performed without "
                        + "offset", currentVersion, lastSearchIndexerVersion);
            }
            this.lastSearchIndexerVersion = currentVersion;
        }
    };
    SizeEstimator sizeEstimator = new SizeEstimator() {
        @Override
        public long getSize() {
            IndexNode indexNode = acquireIndexNode(plan);
            checkState(indexNode != null);
            try {
                IndexSearcher searcher = indexNode.getSearcher();
                LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory,
                        searcher.getIndexReader());
                if (luceneRequestFacade.getLuceneRequest() instanceof Query) {
                    Query query = (Query) luceneRequestFacade.getLuceneRequest();
                    TotalHitCountCollector collector = new TotalHitCountCollector();
                    searcher.search(query, collector);
                    int totalHits = collector.getTotalHits();
                    LOG.debug("Estimated size for query {} is {}", query, totalHits);
                    return totalHits;
                }
                LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest());
            } catch (IOException e) {
                LOG.warn("query via {} failed.", LucenePropertyIndex.this, e);
            } finally {
                indexNode.release();
            }
            return -1;
        }
    };
    return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}

From source file:org.apache.mahout.utils.vectors.lucene.ClusterLabels.java

License:Apache License

private static OpenBitSet getClusterDocBitset(IndexReader reader, Collection<String> idSet, String idField)
        throws IOException {
    int numDocs = reader.numDocs();

    OpenBitSet bitset = new OpenBitSet(numDocs);

    Set<String> idFieldSelector = null;
    if (idField != null) {
        idFieldSelector = new TreeSet<String>();
        idFieldSelector.add(idField);/*from  w w w .jav  a2  s.  co  m*/
    }

    for (int i = 0; i < numDocs; i++) {
        String id;
        // Use Lucene's internal ID if idField is not specified. Else, get it from the document.
        if (idField == null) {
            id = Integer.toString(i);
        } else {
            id = reader.document(i, idFieldSelector).get(idField);
        }
        if (idSet.contains(id)) {
            bitset.set(i);
        }
    }
    log.info("Created bitset for in-cluster documents : {}", bitset.cardinality());
    return bitset;
}

From source file:org.apache.nifi.provenance.index.lucene.QueryTask.java

License:Apache License

private Tuple<List<ProvenanceEventRecord>, Integer> readDocuments(final TopDocs topDocs,
        final IndexReader indexReader) {
    // If no topDocs is supplied, just provide a Tuple that has no records and a hit count of 0.
    if (topDocs == null || topDocs.totalHits == 0) {
        return new Tuple<>(Collections.<ProvenanceEventRecord>emptyList(), 0);
    }/*from ww  w. ja v a2  s  . co  m*/

    final long start = System.nanoTime();
    final List<Long> eventIds = Arrays.stream(topDocs.scoreDocs).mapToInt(scoreDoc -> scoreDoc.doc)
            .mapToObj(docId -> {
                try {
                    return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD);
                } catch (final Exception e) {
                    throw new SearchFailedException("Failed to read Provenance Events from Event File", e);
                }
            }).map(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue()
                    .longValue())
            .collect(Collectors.toList());

    final long endConvert = System.nanoTime();
    final long ms = TimeUnit.NANOSECONDS.toMillis(endConvert - start);
    logger.debug("Converting documents took {} ms", ms);

    List<ProvenanceEventRecord> events;
    try {
        events = eventStore.getEvents(eventIds, authorizer, transformer);
    } catch (IOException e) {
        throw new SearchFailedException("Unable to retrieve events from the Provenance Store", e);
    }

    final long fetchEventNanos = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - endConvert);
    logger.debug("Fetching {} events from Event Store took {} ms ({} events actually fetched)", eventIds.size(),
            fetchEventNanos, events.size());

    final int totalHits = topDocs.totalHits;
    return new Tuple<>(events, totalHits);
}

From source file:org.apache.solr.handler.component.TermVectorComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;//from w  w w. j ava 2  s.c o  m
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
        termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }

    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {

            // workarround SOLR-3523
            if (null == field || "score".equals(field))
                continue;

            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS,
                            allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                } else {//field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    } //else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings 
    // are added that might be differnet between shards, finishStage() needs 
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
        hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
        hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
        hasWarnings = true;
    }
    if (hasWarnings) {
        termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    //Only load the id field to get the uniqueKey of that
    //field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<Object>();

        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }

        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    termsEnum = vector.iterator(termsEnum);
                    mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    termsEnum = terms.iterator(termsEnum);
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}

From source file:org.archive.tnh.FieldCacheNoCache.java

License:Apache License

/**
 *
 *///  w  w w  . j a va 2 s .  c o m
public String getValue(IndexReader reader, int docBase, int docId) throws IOException {
    Document doc = reader.document(docId, FIELD_ONLY);

    String value = doc.get(this.fieldName);

    return value;
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad)
        throws IOException {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
    reader.document(docId, visitor);
    return visitor.getDocument();
}

From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex.java

License:Apache License

/**
 * {@inheritDoc}/*from w  w w. j  a va 2  s .  co  m*/
 */
public void checkIndex(ItemDataConsumer itemStateManager, boolean isSystem, final InspectionReport report)
        throws RepositoryException, IOException {

    // The visitor, that performs item enumeration and checks if all nodes present in 
    // persistent layer are indexed. Also collects the list of all indexed nodes
    // to optimize the process of backward check, when index is traversed to find
    // references to already deleted nodes
    class ItemDataIndexConsistencyVisitor extends ItemDataTraversingVisitor {
        private final IndexReader indexReader;

        private final Set<String> indexedNodes = new HashSet<String>();

        /**
         * @param dataManager
         */
        public ItemDataIndexConsistencyVisitor(ItemDataConsumer dataManager, IndexReader indexReader) {
            super(dataManager);
            this.indexReader = indexReader;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(PropertyData property, int level) throws RepositoryException {
            // ignore properties;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        protected void entering(NodeData node, int level) throws RepositoryException {
            // process node uuids one-by-one
            try {
                String uuid = node.getIdentifier();
                TermDocs docs = indexReader.termDocs(new Term(FieldNames.UUID, uuid));

                if (docs.next()) {
                    indexedNodes.add(uuid);
                    docs.doc();
                    if (docs.next()) {
                        //multiple entries
                        report.logComment("Multiple entires.");
                        report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                    }
                } else {
                    report.logComment("Not indexed.");
                    report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
                }
            } catch (IOException e) {
                throw new RepositoryException(e.getMessage(), e);
            }
        }

        @Override
        protected void leaving(PropertyData property, int level) throws RepositoryException {
            // ignore properties
        }

        @Override
        protected void leaving(NodeData node, int level) throws RepositoryException {
            // do nothing
        }

        @Override
        protected void visitChildProperties(NodeData node) throws RepositoryException {
            //do nothing
        }

        public Set<String> getIndexedNodes() {
            return indexedNodes;
        }
    }

    // check relation Persistent Layer -> Index
    // If current workspace is system, then need to invoke reader correspondent to system index
    ensureFlushed();
    if (isSystem) {
        if (getContext().getParentHandler() != null) {
            ((SearchIndex) getContext().getParentHandler()).ensureFlushed();
        }
    }
    IndexReader indexReader = getIndexReader(isSystem);
    try {
        ItemData root = itemStateManager.getItemData(Constants.ROOT_UUID);
        ItemDataIndexConsistencyVisitor visitor = new ItemDataIndexConsistencyVisitor(itemStateManager,
                indexReader);
        root.accept(visitor);

        Set<String> documentUUIDs = visitor.getIndexedNodes();

        // check relation Index -> Persistent Layer
        // find document that do not corresponds to real node
        // iterate on documents one-by-one
        for (int i = 0; i < indexReader.maxDoc(); i++) {
            if (indexReader.isDeleted(i)) {
                continue;
            }
            final int currentIndex = i;
            Document d = indexReader.document(currentIndex, FieldSelectors.UUID);
            String uuid = d.get(FieldNames.UUID);
            if (!documentUUIDs.contains(uuid)) {
                report.logComment("Document corresponds to removed node.");
                report.logBrokenObjectAndSetInconsistency("ID=" + uuid);
            }
        }
    } finally {
        Util.closeOrRelease(indexReader);
    }
}

From source file:org.fao.geonet.kernel.csw.services.GetDomain.java

License:Open Source License

public static List<Element> handlePropertyName(String[] propertyNames, ServiceContext context, boolean freq,
        int maxRecords, String cswServiceSpecificConstraint) throws Exception {

    List<Element> domainValuesList = null;

    if (Log.isDebugEnabled(Geonet.CSW))
        Log.debug(Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames)
                + "' with max records of " + maxRecords);

    for (int i = 0; i < propertyNames.length; i++) {

        if (i == 0)
            domainValuesList = new ArrayList<Element>();

        // Initialize list of values element.
        Element listOfValues = null;

        // Generate DomainValues element
        Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW);

        // FIXME what should be the type ???
        domainValues.setAttribute("type", "csw:Record");

        String property = propertyNames[i].trim();

        // Set propertyName in any case.
        Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW);
        domainValues.addContent(pn.setText(property));

        GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME);
        SearchManager sm = gc.getSearchmanager();

        IndexReader reader = sm.getIndexReader(context.getLanguage());
        try {//from  w ww.  ja  v a 2  s  . c om
            BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context);
            BooleanQuery query = null;

            // Apply CSW service specific constraint
            if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) {
                Query constraintQuery = CatalogSearcher
                        .getCswServiceSpecificConstraintQuery(cswServiceSpecificConstraint);

                query = new BooleanQuery();

                BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false);

                query.add(groupsQuery, occur);
                query.add(constraintQuery, occur);

            } else {
                query = groupsQuery;
            }

            List<Pair<String, Boolean>> sortFields = Collections
                    .singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true));
            Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false);
            CachingWrapperFilter filter = null;

            Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary(maxRecords, 0,
                    maxRecords, Integer.MAX_VALUE, context.getLanguage(), "results", new Element("summary"),
                    reader, query, filter, sort, false, false, false, false // Scoring is useless for GetDomain operation
            );
            TopDocs hits = searchResults.one();

            try {
                // Get mapped lucene field in CSW configuration
                String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase());
                if (indexField != null)
                    property = indexField;

                // check if params asked is in the index using getFieldNames ?
                if (!reader.getFieldNames(IndexReader.FieldOption.ALL).contains(property))
                    continue;

                boolean isRange = false;
                if (CatalogConfiguration.getGetRecordsRangeFields().contains(property))
                    isRange = true;

                if (isRange)
                    listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW);
                else
                    listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW);

                //List<String> fields = new ArrayList<String>();
                //fields.add(property);
                //fields.add("_isTemplate");
                String fields[] = new String[] { property, "_isTemplate" };
                MapFieldSelector selector = new MapFieldSelector(fields);

                // parse each document in the index
                String[] fieldValues;
                SortedSet<String> sortedValues = new TreeSet<String>();
                HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>();
                for (int j = 0; j < hits.scoreDocs.length; j++) {
                    Document doc = reader.document(hits.scoreDocs[j].doc, selector);

                    // Skip templates and subTemplates
                    String[] isTemplate = doc.getValues("_isTemplate");
                    if (isTemplate[0] != null && !isTemplate[0].equals("n"))
                        continue;

                    // Get doc values for specified property
                    fieldValues = doc.getValues(property);
                    if (fieldValues == null)
                        continue;

                    addtoSortedSet(sortedValues, fieldValues, duplicateValues);
                }

                SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING,
                        context.getLanguage(), null);
                TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency = new TreeSet<Map.Entry<String, Integer>>(
                        valuesComparator);
                sortedValuesFrequency.addAll(duplicateValues.entrySet());

                if (freq)
                    return createValuesByFrequency(sortedValuesFrequency);
                else
                    listOfValues.addContent(createValuesElement(sortedValues, isRange));

            } finally {
                // any children means that the catalog was unable to determine
                // anything about the specified parameter
                if (listOfValues != null && listOfValues.getChildren().size() != 0)
                    domainValues.addContent(listOfValues);

                // Add current DomainValues to the list
                domainValuesList.add(domainValues);
            }
        } finally {
            sm.releaseIndexReader(reader);
        }
    }
    return domainValuesList;

}

From source file:org.fao.geonet.kernel.search.DuplicateDocFilter.java

License:Open Source License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final BitSet bits = new BitSet(reader.maxDoc());

    new IndexSearcher(reader).search(_query, new Collector() {

        private int docBase;
        private IndexReader reader;

        @Override//from w w w .  ja va  2s . c  o  m
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
            if (hits.size() <= _maxResults) {
                Document document;
                try {
                    document = reader.document(docBase + doc, _fieldSelector);
                    String id = document.get("_id");

                    if (!hits.contains(id)) {
                        bits.set(docBase + doc);
                        hits.add(id);
                    }
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        }

        @Override
        public void setNextReader(IndexReader reader, int docBase) throws IOException {
            this.docBase = docBase;
            this.reader = reader;
        }

        @Override
        public boolean acceptsDocsOutOfOrder() {
            return false;
        }
    });

    return new DocIdBitSet(bits);
}