List of usage examples for org.apache.lucene.index Fields iterator
@Override public abstract Iterator<String> iterator();
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java
License:Apache License
private void compareLuceneESTermVectorResults(Fields fields, Fields luceneFields, HashMap<String, Boolean> storePositionsMap, HashMap<String, Boolean> storeOfsetsMap, HashMap<String, Boolean> storePayloadsMap, boolean getPositions, boolean getOffsets, boolean getPayloads, String[] selectedFields) throws IOException { HashSet<String> selectedFieldsMap = new HashSet<String>(Arrays.asList(selectedFields)); Iterator<String> luceneFieldNames = luceneFields.iterator(); assertThat(luceneFields.size(), equalTo(storeOfsetsMap.size())); assertThat(fields.size(), equalTo(selectedFields.length)); while (luceneFieldNames.hasNext()) { String luceneFieldName = luceneFieldNames.next(); if (!selectedFieldsMap.contains(luceneFieldName)) continue; Terms esTerms = fields.terms(luceneFieldName); Terms luceneTerms = luceneFields.terms(luceneFieldName); TermsEnum esTermEnum = esTerms.iterator(null); TermsEnum luceneTermEnum = luceneTerms.iterator(null); int numTerms = 0; while (esTermEnum.next() != null) { luceneTermEnum.next();//from www . ja v a 2 s . com assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq())); DocsAndPositionsEnum esDocsPosEnum = esTermEnum.docsAndPositions(null, null, 0); DocsAndPositionsEnum luceneDocsPosEnum = luceneTermEnum.docsAndPositions(null, null, 0); if (luceneDocsPosEnum == null) { assertThat(storeOfsetsMap.get(luceneFieldName), equalTo(false)); assertThat(storePayloadsMap.get(luceneFieldName), equalTo(false)); assertThat(storePositionsMap.get(luceneFieldName), equalTo(false)); continue; } numTerms++; assertThat("failed for field: " + luceneFieldName, esTermEnum.term().utf8ToString(), equalTo(luceneTermEnum.term().utf8ToString())); esDocsPosEnum.nextDoc(); luceneDocsPosEnum.nextDoc(); int freq = (int) esDocsPosEnum.freq(); assertThat(freq, equalTo(luceneDocsPosEnum.freq())); for (int i = 0; i < freq; i++) { int lucenePos = luceneDocsPosEnum.nextPosition(); int esPos = esDocsPosEnum.nextPosition(); if (storePositionsMap.get(luceneFieldName) && getPositions) { assertThat(luceneFieldName, lucenePos, equalTo(esPos)); } else { assertThat(esPos, equalTo(-1)); } if (storeOfsetsMap.get(luceneFieldName) && getOffsets) { assertThat(luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset())); assertThat(luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset())); } else { assertThat(esDocsPosEnum.startOffset(), equalTo(-1)); assertThat(esDocsPosEnum.endOffset(), equalTo(-1)); } if (storePayloadsMap.get(luceneFieldName) && getPayloads) { assertThat(luceneFieldName, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload())); } else { assertThat(esDocsPosEnum.getPayload(), equalTo(null)); } } } } }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReader.java
License:Open Source License
@Override public Fields getTermVectors(int docID) throws IOException { Fields f = super.getTermVectors(docID); if (f == null) { return null; }//from www . j ava2 s . c o m f = new FieldFilterFields(f); // we need to check for emptyness, so we can return null: return f.iterator().hasNext() ? f : null; }
From source file:org.getopt.luke.HighFreqTerms.java
License:Apache License
/** * //from w w w .jav a 2s . c om * @param reader * @param numTerms * @param fieldNames * @return TermStats[] ordered by terms with highest docFreq first. * @throws Exception */ public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, String[] fieldNames) throws Exception { TermStatsQueue tiq = null; TermsEnum te = null; if (fieldNames != null) { Fields fields = MultiFields.getFields(reader); if (fields == null) { LOG.info("Index with no fields - probably empty or corrupted"); return EMPTY_STATS; } tiq = new TermStatsQueue(numTerms); for (String field : fieldNames) { Terms terms = fields.terms(field); if (terms != null) { te = terms.iterator(te); fillQueue(te, tiq, field); } } } else { Fields fields = MultiFields.getFields(reader); if (fields == null) { LOG.info("Index with no fields - probably empty or corrupted"); return EMPTY_STATS; } tiq = new TermStatsQueue(numTerms); Iterator<String> fieldIterator = fields.iterator(); while (fieldIterator.hasNext()) { String field = fieldIterator.next(); Terms terms = fields.terms(field); if (terms != null) { te = terms.iterator(te); fillQueue(te, tiq, field); } } } TermStats[] result = new TermStats[tiq.size()]; // we want highest first so we read the queue and populate the array // starting at the end and work backwards int count = tiq.size() - 1; while (tiq.size() != 0) { result[count] = tiq.pop(); count--; } return result; }
From source file:org.meresco.lucene.Lucene.java
License:Open Source License
public List<String> fieldnames() throws Exception { SearcherAndTaxonomy reference = data.getManager().acquire(); try {/*from w ww .j ava2 s . c om*/ List<String> fieldnames = new ArrayList<String>(); Fields fields = MultiFields.getFields(reference.searcher.getIndexReader()); if (fields == null) return fieldnames; for (Iterator<String> iterator = fields.iterator(); iterator.hasNext();) { fieldnames.add(iterator.next()); } return fieldnames; } finally { data.getManager().release(reference); } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Returns the list of field names for the specified reader. * * @param reader The index reader/*ww w .j ava2 s. c om*/ * * @return the list of field names * * @throws IOException should any IO error be reported by the {@link MultiFields#getFields(IndexReader)} method. */ @Beta public static List<String> fields(IndexReader reader) throws IOException { LOGGER.debug("Loading fields"); List<String> fieldnames = new ArrayList<>(); Fields fields = MultiFields.getFields(reader); if (fields == null) return fieldnames; Iterator<String> it = fields.iterator(); while (it.hasNext()) { fieldnames.add(it.next()); } return fieldnames; }
From source file:tech.beshu.ror.es.security.DocumentFieldReader.java
License:Open Source License
@Override public Fields getTermVectors(int docID) throws IOException { Fields original = in.getTermVectors(docID); return new Fields() { @Override//from w w w .j a v a 2s. c o m public Iterator<String> iterator() { return Iterators.filter(original.iterator(), s -> policy.canKeep(s)); } @Override public Terms terms(String field) throws IOException { return policy.canKeep(field) ? original.terms(field) : null; } @Override public int size() { return remainingFieldsInfo.size(); } }; }
From source file:utils.HighFreqTerms.java
License:Apache License
/** * /*from w w w . j av a 2s . c om*/ * @param reader * @param numTerms * @param fieldNames * @return TermStats[] ordered by terms with highest docFreq first. * @throws Exception */ public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, String[] fieldNames) throws Exception { TermStatsQueue tiq = null; TermsEnum te = null; if (fieldNames != null) { Fields fields = MultiFields.getFields(reader); if (fields == null) { return EMPTY_STATS; } tiq = new TermStatsQueue(numTerms); for (String field : fieldNames) { Terms terms = fields.terms(field); if (terms != null) { te = terms.iterator(); fillQueue(te, tiq, field); } } } else { Fields fields = MultiFields.getFields(reader); if (fields == null) { return EMPTY_STATS; } tiq = new TermStatsQueue(numTerms); Iterator<String> fieldIterator = fields.iterator(); while (fieldIterator.hasNext()) { String field = fieldIterator.next(); Terms terms = fields.terms(field); if (terms != null) { te = terms.iterator(); fillQueue(te, tiq, field); } } } TermStats[] result = new TermStats[tiq.size()]; // we want highest first so we read the queue and populate the array // starting at the end and work backwards int count = tiq.size() - 1; while (tiq.size() != 0) { result[count] = tiq.pop(); count--; } return result; }