List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID, Set<String> fieldsToLoad) throws IOException
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexMBeanImpl.java
License:Apache License
private static String getPath(IndexReader reader, ScoreDoc doc) throws IOException { PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); return visitor.getPath(); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); final Sort sort = getSort(plan); final PlanResult pr = getPlanResult(plan); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override/*from w ww. jav a 2 s. c o m*/ protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException { IndexReader reader = searcher.getIndexReader(); //TODO Look into usage of field cache for retrieving the path //instead of reading via reader if no of docs in index are limited PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (pr.isPathTransformed()) { String originalPath = path; path = pr.transformPath(path); if (path == null) { LOG.trace("Ignoring path {} : Transformation returned null", originalPath); return null; } // avoid duplicate entries if (seenPaths.contains(path)) { LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); return null; } seenPaths.add(path); } LOG.trace("Matched path {}", path); return new LuceneResultRow(path, doc.score, excerpt, facets, explanation); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; final IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query); if (customScoreQuery != null) { query = customScoreQuery; } checkForIndexVersionChange(searcher); TopDocs docs; long start = PERF_LOGGER.start(); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort); } } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.search(query, nextBatchSize); } else { docs = searcher.search(query, nextBatchSize, sort); } } PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); long f = PERF_LOGGER.start(); Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets()); PERF_LOGGER.end(f, -1, "facets retrieved"); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION); boolean addExplain = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); FieldInfos mergedFieldInfos = null; if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader()); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos); } String explanation = null; if (addExplain) { explanation = searcher.explain(query, doc.doc).toString(); } LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { //queue is still empty but more results can be fetched //from Lucene so still continue lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK; noDocs = true; SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string)); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.string)); break; } } } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer()); // ACL filter suggestions for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\""); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value)); break; } } } } } } catch (Exception e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, plan, settings, sizeEstimator); }
From source file:org.apache.mahout.utils.vectors.lucene.ClusterLabels.java
License:Apache License
private static OpenBitSet getClusterDocBitset(IndexReader reader, Collection<String> idSet, String idField) throws IOException { int numDocs = reader.numDocs(); OpenBitSet bitset = new OpenBitSet(numDocs); Set<String> idFieldSelector = null; if (idField != null) { idFieldSelector = new TreeSet<String>(); idFieldSelector.add(idField);/*from w w w .jav a2 s. co m*/ } for (int i = 0; i < numDocs; i++) { String id; // Use Lucene's internal ID if idField is not specified. Else, get it from the document. if (idField == null) { id = Integer.toString(i); } else { id = reader.document(i, idFieldSelector).get(idField); } if (idSet.contains(id)) { bitset.set(i); } } log.info("Created bitset for in-cluster documents : {}", bitset.cardinality()); return bitset; }
From source file:org.apache.nifi.provenance.index.lucene.QueryTask.java
License:Apache License
private Tuple<List<ProvenanceEventRecord>, Integer> readDocuments(final TopDocs topDocs, final IndexReader indexReader) { // If no topDocs is supplied, just provide a Tuple that has no records and a hit count of 0. if (topDocs == null || topDocs.totalHits == 0) { return new Tuple<>(Collections.<ProvenanceEventRecord>emptyList(), 0); }/*from ww w. ja v a2 s . co m*/ final long start = System.nanoTime(); final List<Long> eventIds = Arrays.stream(topDocs.scoreDocs).mapToInt(scoreDoc -> scoreDoc.doc) .mapToObj(docId -> { try { return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD); } catch (final Exception e) { throw new SearchFailedException("Failed to read Provenance Events from Event File", e); } }).map(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue() .longValue()) .collect(Collectors.toList()); final long endConvert = System.nanoTime(); final long ms = TimeUnit.NANOSECONDS.toMillis(endConvert - start); logger.debug("Converting documents took {} ms", ms); List<ProvenanceEventRecord> events; try { events = eventStore.getEvents(eventIds, authorizer, transformer); } catch (IOException e) { throw new SearchFailedException("Unable to retrieve events from the Provenance Store", e); } final long fetchEventNanos = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - endConvert); logger.debug("Fetching {} events from Event Store took {} ms ({} events actually fetched)", eventIds.size(), fetchEventNanos, events.size()); final int totalHits = topDocs.totalHits; return new Tuple<>(events, totalHits); }
From source file:org.apache.solr.handler.component.TermVectorComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return;//from w w w. j ava 2 s.c o m } NamedList<Object> termVectors = new NamedList<Object>(); rb.rsp.add(TERM_VECTORS, termVectors); IndexSchema schema = rb.req.getSchema(); SchemaField keyField = schema.getUniqueKeyField(); String uniqFieldName = null; if (keyField != null) { uniqFieldName = keyField.getName(); termVectors.add("uniqueKeyFieldName", uniqFieldName); } FieldOptions allFields = new FieldOptions(); //figure out what options we have, and try to get the appropriate vector allFields.termFreq = params.getBool(TermVectorParams.TF, false); allFields.positions = params.getBool(TermVectorParams.POSITIONS, false); allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false); allFields.docFreq = params.getBool(TermVectorParams.DF, false); allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false); //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false); //short cut to all values. if (params.getBool(TermVectorParams.ALL, false)) { allFields.termFreq = true; allFields.positions = true; allFields.offsets = true; allFields.docFreq = true; allFields.tfIdf = true; } //Build up our per field mapping Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>(); NamedList<List<String>> warnings = new NamedList<List<String>>(); List<String> noTV = new ArrayList<String>(); List<String> noPos = new ArrayList<String>(); List<String> noOff = new ArrayList<String>(); Set<String> fields = getFields(rb); if (null != fields) { //we have specific fields to retrieve, or no fields for (String field : fields) { // workarround SOLR-3523 if (null == field || "score".equals(field)) continue; // we don't want to issue warnings about the uniqueKey field // since it can cause lots of confusion in distributed requests // where the uniqueKey field is injected into the fl for merging final boolean fieldIsUniqueKey = field.equals(uniqFieldName); SchemaField sf = schema.getFieldOrNull(field); if (sf != null) { if (sf.storeTermVector()) { FieldOptions option = fieldOptions.get(field); if (option == null) { option = new FieldOptions(); option.fieldName = field; fieldOptions.put(field, option); } //get the per field mappings option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq); option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq); option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf); //Validate these are even an option option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions); if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) { noPos.add(field); } option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets); if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) { noOff.add(field); } } else {//field doesn't have term vectors if (!fieldIsUniqueKey) noTV.add(field); } } else { //field doesn't exist throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field); } } } //else, deal with all fields // NOTE: currently all typs of warnings are schema driven, and garunteed // to be consistent across all shards - if additional types of warnings // are added that might be differnet between shards, finishStage() needs // to be changed to account for that. boolean hasWarnings = false; if (!noTV.isEmpty()) { warnings.add("noTermVectors", noTV); hasWarnings = true; } if (!noPos.isEmpty()) { warnings.add("noPositions", noPos); hasWarnings = true; } if (!noOff.isEmpty()) { warnings.add("noOffsets", noOff); hasWarnings = true; } if (hasWarnings) { termVectors.add("warnings", warnings); } DocListAndSet listAndSet = rb.getResults(); List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS)); Iterator<Integer> iter; if (docIds != null && !docIds.isEmpty()) { iter = docIds.iterator(); } else { DocList list = listAndSet.docList; iter = list.iterator(); } SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getIndexReader(); //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors //Only load the id field to get the uniqueKey of that //field final String finalUniqFieldName = uniqFieldName; final List<String> uniqValues = new ArrayList<String>(); // TODO: is this required to be single-valued? if so, we should STOP // once we find it... final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() { @Override public void stringField(FieldInfo fieldInfo, String value) { uniqValues.add(value); } @Override public void intField(FieldInfo fieldInfo, int value) { uniqValues.add(Integer.toString(value)); } @Override public void longField(FieldInfo fieldInfo, long value) { uniqValues.add(Long.toString(value)); } @Override public Status needsField(FieldInfo fieldInfo) { return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO; } }; TermsEnum termsEnum = null; while (iter.hasNext()) { Integer docId = iter.next(); NamedList<Object> docNL = new NamedList<Object>(); if (keyField != null) { reader.document(docId, getUniqValue); String uniqVal = null; if (uniqValues.size() != 0) { uniqVal = uniqValues.get(0); uniqValues.clear(); docNL.add("uniqueKey", uniqVal); termVectors.add(uniqVal, docNL); } } else { // support for schemas w/o a unique key, termVectors.add("doc-" + docId, docNL); } if (null != fields) { for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) { final String field = entry.getKey(); final Terms vector = reader.getTermVector(docId, field); if (vector != null) { termsEnum = vector.iterator(termsEnum); mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field); } } } else { // extract all fields final Fields vectors = reader.getTermVectors(docId); for (String field : vectors) { Terms terms = vectors.terms(field); if (terms != null) { termsEnum = terms.iterator(termsEnum); mapOneVector(docNL, allFields, reader, docId, termsEnum, field); } } } } }
From source file:org.archive.tnh.FieldCacheNoCache.java
License:Apache License
/** * */// w w w . j a va 2 s . c o m public String getValue(IndexReader reader, int docBase, int docId) throws IOException { Document doc = reader.document(docId, FIELD_ONLY); String value = doc.get(this.fieldName); return value; }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad) throws IOException { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); reader.document(docId, visitor); return visitor.getDocument(); }
From source file:org.exoplatform.services.jcr.impl.core.query.lucene.SearchIndex.java
License:Apache License
/** * {@inheritDoc}/*from w w w. j a va 2 s . co m*/ */ public void checkIndex(ItemDataConsumer itemStateManager, boolean isSystem, final InspectionReport report) throws RepositoryException, IOException { // The visitor, that performs item enumeration and checks if all nodes present in // persistent layer are indexed. Also collects the list of all indexed nodes // to optimize the process of backward check, when index is traversed to find // references to already deleted nodes class ItemDataIndexConsistencyVisitor extends ItemDataTraversingVisitor { private final IndexReader indexReader; private final Set<String> indexedNodes = new HashSet<String>(); /** * @param dataManager */ public ItemDataIndexConsistencyVisitor(ItemDataConsumer dataManager, IndexReader indexReader) { super(dataManager); this.indexReader = indexReader; } /** * {@inheritDoc} */ @Override protected void entering(PropertyData property, int level) throws RepositoryException { // ignore properties; } /** * {@inheritDoc} */ @Override protected void entering(NodeData node, int level) throws RepositoryException { // process node uuids one-by-one try { String uuid = node.getIdentifier(); TermDocs docs = indexReader.termDocs(new Term(FieldNames.UUID, uuid)); if (docs.next()) { indexedNodes.add(uuid); docs.doc(); if (docs.next()) { //multiple entries report.logComment("Multiple entires."); report.logBrokenObjectAndSetInconsistency("ID=" + uuid); } } else { report.logComment("Not indexed."); report.logBrokenObjectAndSetInconsistency("ID=" + uuid); } } catch (IOException e) { throw new RepositoryException(e.getMessage(), e); } } @Override protected void leaving(PropertyData property, int level) throws RepositoryException { // ignore properties } @Override protected void leaving(NodeData node, int level) throws RepositoryException { // do nothing } @Override protected void visitChildProperties(NodeData node) throws RepositoryException { //do nothing } public Set<String> getIndexedNodes() { return indexedNodes; } } // check relation Persistent Layer -> Index // If current workspace is system, then need to invoke reader correspondent to system index ensureFlushed(); if (isSystem) { if (getContext().getParentHandler() != null) { ((SearchIndex) getContext().getParentHandler()).ensureFlushed(); } } IndexReader indexReader = getIndexReader(isSystem); try { ItemData root = itemStateManager.getItemData(Constants.ROOT_UUID); ItemDataIndexConsistencyVisitor visitor = new ItemDataIndexConsistencyVisitor(itemStateManager, indexReader); root.accept(visitor); Set<String> documentUUIDs = visitor.getIndexedNodes(); // check relation Index -> Persistent Layer // find document that do not corresponds to real node // iterate on documents one-by-one for (int i = 0; i < indexReader.maxDoc(); i++) { if (indexReader.isDeleted(i)) { continue; } final int currentIndex = i; Document d = indexReader.document(currentIndex, FieldSelectors.UUID); String uuid = d.get(FieldNames.UUID); if (!documentUUIDs.contains(uuid)) { report.logComment("Document corresponds to removed node."); report.logBrokenObjectAndSetInconsistency("ID=" + uuid); } } } finally { Util.closeOrRelease(indexReader); } }
From source file:org.fao.geonet.kernel.csw.services.GetDomain.java
License:Open Source License
public static List<Element> handlePropertyName(String[] propertyNames, ServiceContext context, boolean freq, int maxRecords, String cswServiceSpecificConstraint) throws Exception { List<Element> domainValuesList = null; if (Log.isDebugEnabled(Geonet.CSW)) Log.debug(Geonet.CSW, "Handling property names '" + Arrays.toString(propertyNames) + "' with max records of " + maxRecords); for (int i = 0; i < propertyNames.length; i++) { if (i == 0) domainValuesList = new ArrayList<Element>(); // Initialize list of values element. Element listOfValues = null; // Generate DomainValues element Element domainValues = new Element("DomainValues", Csw.NAMESPACE_CSW); // FIXME what should be the type ??? domainValues.setAttribute("type", "csw:Record"); String property = propertyNames[i].trim(); // Set propertyName in any case. Element pn = new Element("PropertyName", Csw.NAMESPACE_CSW); domainValues.addContent(pn.setText(property)); GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); SearchManager sm = gc.getSearchmanager(); IndexReader reader = sm.getIndexReader(context.getLanguage()); try {//from w ww. ja v a 2 s . c om BooleanQuery groupsQuery = (BooleanQuery) CatalogSearcher.getGroupsQuery(context); BooleanQuery query = null; // Apply CSW service specific constraint if (StringUtils.isNotEmpty(cswServiceSpecificConstraint)) { Query constraintQuery = CatalogSearcher .getCswServiceSpecificConstraintQuery(cswServiceSpecificConstraint); query = new BooleanQuery(); BooleanClause.Occur occur = LuceneUtils.convertRequiredAndProhibitedToOccur(true, false); query.add(groupsQuery, occur); query.add(constraintQuery, occur); } else { query = groupsQuery; } List<Pair<String, Boolean>> sortFields = Collections .singletonList(Pair.read(Geonet.SearchResult.SortBy.RELEVANCE, true)); Sort sort = LuceneSearcher.makeSort(sortFields, context.getLanguage(), false); CachingWrapperFilter filter = null; Pair<TopDocs, Element> searchResults = LuceneSearcher.doSearchAndMakeSummary(maxRecords, 0, maxRecords, Integer.MAX_VALUE, context.getLanguage(), "results", new Element("summary"), reader, query, filter, sort, false, false, false, false // Scoring is useless for GetDomain operation ); TopDocs hits = searchResults.one(); try { // Get mapped lucene field in CSW configuration String indexField = CatalogConfiguration.getFieldMapping().get(property.toLowerCase()); if (indexField != null) property = indexField; // check if params asked is in the index using getFieldNames ? if (!reader.getFieldNames(IndexReader.FieldOption.ALL).contains(property)) continue; boolean isRange = false; if (CatalogConfiguration.getGetRecordsRangeFields().contains(property)) isRange = true; if (isRange) listOfValues = new Element("RangeOfValues", Csw.NAMESPACE_CSW); else listOfValues = new Element("ListOfValues", Csw.NAMESPACE_CSW); //List<String> fields = new ArrayList<String>(); //fields.add(property); //fields.add("_isTemplate"); String fields[] = new String[] { property, "_isTemplate" }; MapFieldSelector selector = new MapFieldSelector(fields); // parse each document in the index String[] fieldValues; SortedSet<String> sortedValues = new TreeSet<String>(); HashMap<String, Integer> duplicateValues = new HashMap<String, Integer>(); for (int j = 0; j < hits.scoreDocs.length; j++) { Document doc = reader.document(hits.scoreDocs[j].doc, selector); // Skip templates and subTemplates String[] isTemplate = doc.getValues("_isTemplate"); if (isTemplate[0] != null && !isTemplate[0].equals("n")) continue; // Get doc values for specified property fieldValues = doc.getValues(property); if (fieldValues == null) continue; addtoSortedSet(sortedValues, fieldValues, duplicateValues); } SummaryComparator valuesComparator = new SummaryComparator(SortOption.FREQUENCY, Type.STRING, context.getLanguage(), null); TreeSet<Map.Entry<String, Integer>> sortedValuesFrequency = new TreeSet<Map.Entry<String, Integer>>( valuesComparator); sortedValuesFrequency.addAll(duplicateValues.entrySet()); if (freq) return createValuesByFrequency(sortedValuesFrequency); else listOfValues.addContent(createValuesElement(sortedValues, isRange)); } finally { // any children means that the catalog was unable to determine // anything about the specified parameter if (listOfValues != null && listOfValues.getChildren().size() != 0) domainValues.addContent(listOfValues); // Add current DomainValues to the list domainValuesList.add(domainValues); } } finally { sm.releaseIndexReader(reader); } } return domainValuesList; }
From source file:org.fao.geonet.kernel.search.DuplicateDocFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final BitSet bits = new BitSet(reader.maxDoc()); new IndexSearcher(reader).search(_query, new Collector() { private int docBase; private IndexReader reader; @Override//from w w w . ja va 2s . c o m public void setScorer(Scorer scorer) throws IOException { } @Override public void collect(int doc) throws IOException { if (hits.size() <= _maxResults) { Document document; try { document = reader.document(docBase + doc, _fieldSelector); String id = document.get("_id"); if (!hits.contains(id)) { bits.set(docBase + doc); hits.add(id); } } catch (Exception e) { throw new RuntimeException(e); } } } @Override public void setNextReader(IndexReader reader, int docBase) throws IOException { this.docBase = docBase; this.reader = reader; } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); return new DocIdBitSet(bits); }