List of usage examples for org.apache.lucene.util BytesRef utf8ToString
public String utf8ToString()
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java
License:Apache License
@Test public void testRandomSingleTermVectors() throws ElasticSearchException, IOException { Random random = getRandom();//from www . j a v a2s .com FieldType ft = new FieldType(); int config = random.nextInt(6); boolean storePositions = false; boolean storeOffsets = false; boolean storePayloads = false; boolean storeTermVectors = false; switch (config) { case 0: { // do nothing } case 1: { storeTermVectors = true; } case 2: { storeTermVectors = true; storePositions = true; } case 3: { storeTermVectors = true; storeOffsets = true; } case 4: { storeTermVectors = true; storePositions = true; storeOffsets = true; } case 5: { storeTermVectors = true; storePositions = true; storePayloads = true; } case 6: { storeTermVectors = true; storePositions = true; storeOffsets = true; storePayloads = true; } } ft.setStoreTermVectors(storeTermVectors); ft.setStoreTermVectorOffsets(storeOffsets); ft.setStoreTermVectorPayloads(storePayloads); ft.setStoreTermVectorPositions(storePositions); String optionString = AbstractFieldMapper.termVectorOptionsToString(ft); run(addMapping(prepareCreate("test"), "type1", new Object[] { "field", "type", "string", "term_vector", optionString, "analyzer", "tv_test" }) .setSettings(ImmutableSettings.settingsBuilder() .put("index.analysis.analyzer.tv_test.tokenizer", "whitespace").putArray( "index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase"))); ensureYellow(); for (int i = 0; i < 10; i++) { client().prepareIndex("test", "type1", Integer.toString(i)) .setSource(XContentFactory.jsonBuilder().startObject() .field("field", "the quick brown fox jumps over the lazy dog") // 0the3 4quick9 10brown15 16fox19 20jumps25 26over30 // 31the34 35lazy39 40dog43 .endObject()) .execute().actionGet(); refresh(); } String[] values = { "brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the" }; int[] freq = { 1, 1, 1, 1, 1, 1, 1, 2 }; int[][] pos = { { 2 }, { 8 }, { 3 }, { 4 }, { 7 }, { 5 }, { 1 }, { 0, 6 } }; int[][] startOffset = { { 10 }, { 40 }, { 16 }, { 20 }, { 35 }, { 26 }, { 4 }, { 0, 31 } }; int[][] endOffset = { { 15 }, { 43 }, { 19 }, { 25 }, { 39 }, { 30 }, { 9 }, { 3, 34 } }; boolean isPayloadRequested = random.nextBoolean(); boolean isOffsetRequested = random.nextBoolean(); boolean isPositionsRequested = random.nextBoolean(); String infoString = createInfoString(isPositionsRequested, isOffsetRequested, isPayloadRequested, optionString); for (int i = 0; i < 10; i++) { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(isPayloadRequested).setOffsets(isOffsetRequested) .setPositions(isPositionsRequested).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); assertThat(infoString + "doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(ft.storeTermVectors() ? 1 : 0)); if (ft.storeTermVectors()) { Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(infoString, next, Matchers.notNullValue()); assertThat(infoString + "expected " + string, string, equalTo(next.utf8ToString())); assertThat(infoString, next, Matchers.notNullValue()); // do not test ttf or doc frequency, because here we have // many shards and do not know how documents are distributed DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); // docs and pos only returns something if positions or // payloads or offsets are stored / requestd Otherwise use // DocsEnum? assertThat(infoString, docsAndPositions.nextDoc(), equalTo(0)); assertThat(infoString, freq[j], equalTo(docsAndPositions.freq())); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; if (isPositionsRequested && storePositions) { assertThat(infoString, termPos.length, equalTo(freq[j])); } if (isOffsetRequested && storeOffsets) { assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); } for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); // only return something useful if requested and stored if (isPositionsRequested && storePositions) { assertThat(infoString + "positions for term: " + string, nextPosition, equalTo(termPos[k])); } else { assertThat(infoString + "positions for term: ", nextPosition, equalTo(-1)); } // only return something useful if requested and stored if (isPayloadRequested && storePayloads) { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } else { assertThat(infoString + "payloads for term: " + string, docsAndPositions.getPayload(), equalTo(null)); } // only return something useful if requested and stored if (isOffsetRequested && storeOffsets) { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); } else { assertThat(infoString + "startOffsets term: " + string, docsAndPositions.startOffset(), equalTo(-1)); assertThat(infoString + "endOffsets term: " + string, docsAndPositions.endOffset(), equalTo(-1)); } } } assertThat(iterator.next(), Matchers.nullValue()); } } }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTestsCheckDocFreq.java
License:Apache License
private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(true) .setFieldStatistics(false).setSelectedFields(); TermVectorResponse response = resp.execute().actionGet(); assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1)); assertThat(terms.getDocCount(), Matchers.equalTo(-1)); assertThat(terms.getSumDocFreq(), equalTo((long) -1)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(next, Matchers.notNullValue()); assertThat("expected " + string, string, equalTo(next.utf8ToString())); assertThat(next, Matchers.notNullValue()); if (string.equals("the")) { assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq())); } else {//from ww w. ja v a 2s . com assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq())); } DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); assertThat(docsAndPositions.nextDoc(), equalTo(0)); assertThat(freq[j], equalTo(docsAndPositions.freq())); assertThat(iterator.docFreq(), equalTo(numDocs)); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; assertThat(termPos.length, equalTo(freq[j])); assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); assertThat("term: " + string, nextPosition, equalTo(termPos[k])); assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } } assertThat(iterator.next(), Matchers.nullValue()); XContentBuilder xBuilder = new XContentFactory().jsonBuilder(); response.toXContent(xBuilder, null); BytesStream bytesStream = xBuilder.bytesStream(); String utf8 = bytesStream.bytes().toUtf8(); String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[2],\"start\":[10],\"end\":[15],\"payload\":[\"d29yZA==\"]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[8],\"start\":[40],\"end\":[43],\"payload\":[\"d29yZA==\"]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[3],\"start\":[16],\"end\":[19],\"payload\":[\"d29yZA==\"]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[4],\"start\":[20],\"end\":[25],\"payload\":[\"d29yZA==\"]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[7],\"start\":[35],\"end\":[39],\"payload\":[\"d29yZA==\"]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[5],\"start\":[26],\"end\":[30],\"payload\":[\"d29yZA==\"]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[1],\"start\":[4],\"end\":[9],\"payload\":[\"d29yZA==\"]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"pos\":[0,6],\"start\":[0,31],\"end\":[3,34],\"payload\":[\"d29yZA==\",\"d29yZA==\"]}}}}}"; assertThat(utf8, equalTo(expectedString)); }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTestsCheckDocFreq.java
License:Apache License
private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(true).setOffsets(true).setPositions(true).setTermStatistics(false) .setFieldStatistics(true).setSelectedFields(); assertThat(resp.request().termStatistics(), equalTo(false)); TermVectorResponse response = resp.execute().actionGet(); assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs))); assertThat(terms.getDocCount(), Matchers.equalTo(numDocs)); assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(next, Matchers.notNullValue()); assertThat("expected " + string, string, equalTo(next.utf8ToString())); assertThat(next, Matchers.notNullValue()); assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq())); DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); assertThat(docsAndPositions.nextDoc(), equalTo(0)); assertThat(freq[j], equalTo(docsAndPositions.freq())); assertThat(iterator.docFreq(), equalTo(-1)); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; assertThat(termPos.length, equalTo(freq[j])); assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); assertThat("term: " + string, nextPosition, equalTo(termPos[k])); assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); }// w w w.j a va2 s . co m } assertThat(iterator.next(), Matchers.nullValue()); XContentBuilder xBuilder = new XContentFactory().jsonBuilder(); response.toXContent(xBuilder, null); BytesStream bytesStream = xBuilder.bytesStream(); String utf8 = bytesStream.bytes().toUtf8(); String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"pos\":[2],\"start\":[10],\"end\":[15],\"payload\":[\"d29yZA==\"]},\"dog\":{\"term_freq\":1,\"pos\":[8],\"start\":[40],\"end\":[43],\"payload\":[\"d29yZA==\"]},\"fox\":{\"term_freq\":1,\"pos\":[3],\"start\":[16],\"end\":[19],\"payload\":[\"d29yZA==\"]},\"jumps\":{\"term_freq\":1,\"pos\":[4],\"start\":[20],\"end\":[25],\"payload\":[\"d29yZA==\"]},\"lazy\":{\"term_freq\":1,\"pos\":[7],\"start\":[35],\"end\":[39],\"payload\":[\"d29yZA==\"]},\"over\":{\"term_freq\":1,\"pos\":[5],\"start\":[26],\"end\":[30],\"payload\":[\"d29yZA==\"]},\"quick\":{\"term_freq\":1,\"pos\":[1],\"start\":[4],\"end\":[9],\"payload\":[\"d29yZA==\"]},\"the\":{\"term_freq\":2,\"pos\":[0,6],\"start\":[0,31],\"end\":[3,34],\"payload\":[\"d29yZA==\",\"d29yZA==\"]}}}}}"; assertThat(utf8, equalTo(expectedString)); }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTestsCheckDocFreq.java
License:Apache License
private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i) throws IOException { TermVectorRequestBuilder resp = client().prepareTermVector("test", "type1", Integer.toString(i)) .setPayloads(true).setOffsets(true).setPositions(true).setFieldStatistics(true) .setTermStatistics(true).setSelectedFields(); assertThat(resp.request().fieldStatistics(), equalTo(true)); TermVectorResponse response = resp.execute().actionGet(); assertThat("doc id: " + i + " doesn't exists but should", response.documentExists(), equalTo(true)); Fields fields = response.getFields(); assertThat(fields.size(), equalTo(1)); Terms terms = fields.terms("field"); assertThat(terms.size(), equalTo(8l)); assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs))); assertThat(terms.getDocCount(), Matchers.equalTo(numDocs)); assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length)); TermsEnum iterator = terms.iterator(null); for (int j = 0; j < values.length; j++) { String string = values[j]; BytesRef next = iterator.next(); assertThat(next, Matchers.notNullValue()); assertThat("expected " + string, string, equalTo(next.utf8ToString())); assertThat(next, Matchers.notNullValue()); if (string.equals("the")) { assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq())); } else {//from www .j a va2 s .c o m assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq())); } DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); assertThat(docsAndPositions.nextDoc(), equalTo(0)); assertThat(freq[j], equalTo(docsAndPositions.freq())); assertThat(iterator.docFreq(), equalTo(numDocs)); int[] termPos = pos[j]; int[] termStartOffset = startOffset[j]; int[] termEndOffset = endOffset[j]; assertThat(termPos.length, equalTo(freq[j])); assertThat(termStartOffset.length, equalTo(freq[j])); assertThat(termEndOffset.length, equalTo(freq[j])); for (int k = 0; k < freq[j]; k++) { int nextPosition = docsAndPositions.nextPosition(); assertThat("term: " + string, nextPosition, equalTo(termPos[k])); assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k])); assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k])); assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word"))); } } assertThat(iterator.next(), Matchers.nullValue()); XContentBuilder xBuilder = new XContentFactory().jsonBuilder(); response.toXContent(xBuilder, null); BytesStream bytesStream = xBuilder.bytesStream(); String utf8 = bytesStream.bytes().toUtf8(); String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\"" + i + "\",\"_version\":1,\"exists\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[2],\"start\":[10],\"end\":[15],\"payload\":[\"d29yZA==\"]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[8],\"start\":[40],\"end\":[43],\"payload\":[\"d29yZA==\"]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[3],\"start\":[16],\"end\":[19],\"payload\":[\"d29yZA==\"]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[4],\"start\":[20],\"end\":[25],\"payload\":[\"d29yZA==\"]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[7],\"start\":[35],\"end\":[39],\"payload\":[\"d29yZA==\"]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[5],\"start\":[26],\"end\":[30],\"payload\":[\"d29yZA==\"]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"pos\":[1],\"start\":[4],\"end\":[9],\"payload\":[\"d29yZA==\"]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"pos\":[0,6],\"start\":[0,31],\"end\":[3,34],\"payload\":[\"d29yZA==\",\"d29yZA==\"]}}}}}"; assertThat(utf8, equalTo(expectedString)); }
From source file:org.eu.bitzone.Leia.java
License:Apache License
public void showNextTerm(final Object fCombo, final Object fText) { if (ir == null) { showStatus(MSG_NOINDEX);/*from w ww .j av a 2 s .c o m*/ return; } final SlowThread st = new SlowThread(this) { @Override public void execute() { try { String text; text = getString(fText, "text"); if (text == null || text.trim().equals("")) { text = ""; } if (text.length() == 0) { showFirstTerm(fCombo, fText); return; } TermsEnum te = (TermsEnum) getProperty(fCombo, "te"); String fld = getString(fCombo, "text"); final String teField = (String) getProperty(fCombo, "teField"); SeekStatus status; BytesRef rawTerm = null; if (te != null) { rawTerm = te.term(); } final String rawString = rawTerm != null ? rawTerm.utf8ToString() : null; if (te == null || !teField.equals(fld) || !text.equals(rawString)) { final Terms terms = MultiFields.getTerms(ir, fld); te = terms.iterator(null); putProperty(fCombo, "te", te); putProperty(fCombo, "teField", fld); status = te.seekCeil(new BytesRef(text)); if (status.equals(SeekStatus.FOUND)) { rawTerm = te.term(); } else { rawTerm = null; } } else { rawTerm = te.next(); } if (rawTerm == null) { // proceed to next field int idx = fn.indexOf(fld); while (idx < fn.size() - 1) { idx++; setInteger(fCombo, "selected", idx); fld = fn.get(idx); final Terms terms = MultiFields.getTerms(ir, fld); if (terms == null) { continue; } te = terms.iterator(null); rawTerm = te.next(); putProperty(fCombo, "te", te); putProperty(fCombo, "teField", fld); break; } } if (rawTerm == null) { showStatus("No more terms"); return; } // Term t = new Term(fld, term.utf8ToString()); _showTerm(fCombo, fText, new Term(fld, rawTerm)); } catch (final Exception e) { e.printStackTrace(); showStatus(e.getMessage()); } } }; if (slowAccess) { st.start(); } else { st.execute(); } }
From source file:org.exist.indexing.lucene.LuceneIndexWorker.java
License:Open Source License
private Occurrences[] scanIndexByQName(List<QName> qnames, DocumentSet docs, NodeSet nodes, String start, String end, long max) { TreeMap<String, Occurrences> map = new TreeMap<>(); IndexReader reader = null;/*from ww w. ja va2s . c om*/ try { reader = index.getReader(); for (QName qname : qnames) { String field = LuceneUtil.encodeQName(qname, index.getBrokerPool().getSymbols()); List<AtomicReaderContext> leaves = reader.leaves(); for (AtomicReaderContext context : leaves) { NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID); BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(LuceneUtil.FIELD_NODE_ID); Bits liveDocs = context.reader().getLiveDocs(); Terms terms = context.reader().terms(field); if (terms == null) continue; TermsEnum termsIter = terms.iterator(null); if (termsIter.next() == null) { continue; } do { if (map.size() >= max) { break; } BytesRef ref = termsIter.term(); String term = ref.utf8ToString(); boolean include = true; if (end != null) { if (term.compareTo(end) > 0) include = false; } else if (start != null && !term.startsWith(start)) include = false; if (include) { DocsEnum docsEnum = termsIter.docs(null, null); while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { if (liveDocs != null && !liveDocs.get(docsEnum.docID())) { continue; } int docId = (int) docIdValues.get(docsEnum.docID()); DocumentImpl storedDocument = docs.getDoc(docId); if (storedDocument == null) continue; NodeId nodeId = null; if (nodes != null) { BytesRef nodeIdRef = new BytesRef(buf); nodeIdValues.get(docsEnum.docID(), nodeIdRef); int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset); nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes, nodeIdRef.offset + 2); } // DW: warning: nodes can be null? if (nodeId == null || nodes.get(storedDocument, nodeId) != null) { Occurrences oc = map.get(term); if (oc == null) { oc = new Occurrences(term); map.put(term, oc); } oc.addDocument(storedDocument); oc.addOccurrences(docsEnum.freq()); } } } } while (termsIter.next() != null); } } } catch (IOException e) { LOG.warn("Error while scanning lucene index entries: " + e.getMessage(), e); } finally { index.releaseReader(reader); } Occurrences[] occur = new Occurrences[map.size()]; return map.values().toArray(occur); }
From source file:org.exist.indexing.lucene.XMLToQuery.java
License:Open Source License
private Term[] expandTerms(String field, String queryStr) throws XPathException { List<Term> termList = new ArrayList<>(8); Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr)); CompiledAutomaton compiled = new CompiledAutomaton(automaton); IndexReader reader = null;//from w ww .j av a2 s . c o m try { reader = index.getReader(); for (AtomicReaderContext atomic : reader.leaves()) { Terms terms = atomic.reader().terms(field); if (terms != null) { TermsEnum termsEnum = compiled.getTermsEnum(terms); BytesRef data = termsEnum.next(); while (data != null) { String term = data.utf8ToString(); termList.add(new Term(field, term)); data = termsEnum.next(); } } } } catch (IOException e) { throw new XPathException("Lucene index error while creating query: " + e.getMessage(), e); } finally { index.releaseReader(reader); } Term[] matchingTerms = new Term[termList.size()]; return termList.toArray(matchingTerms); }
From source file:org.exist.indexing.range.RangeIndexWorker.java
License:Open Source License
private void scan(DocumentSet docs, NodeSet nodes, String start, String end, long max, TreeMap<String, Occurrences> map, IndexReader reader, String field) throws IOException { List<AtomicReaderContext> leaves = reader.leaves(); for (AtomicReaderContext context : leaves) { NumericDocValues docIdValues = context.reader().getNumericDocValues(FIELD_DOC_ID); BinaryDocValues nodeIdValues = context.reader().getBinaryDocValues(FIELD_NODE_ID); Bits liveDocs = context.reader().getLiveDocs(); Terms terms = context.reader().terms(field); if (terms == null) continue; TermsEnum termsIter = terms.iterator(null); if (termsIter.next() == null) { continue; }/* ww w.java2 s .c o m*/ do { if (map.size() >= max) { break; } BytesRef ref = termsIter.term(); String term = ref.utf8ToString(); boolean include = true; if (end != null) { if (term.compareTo(end) > 0) include = false; } else if (start != null && !term.startsWith(start)) include = false; if (include) { DocsEnum docsEnum = termsIter.docs(null, null); while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { if (liveDocs != null && !liveDocs.get(docsEnum.docID())) { continue; } int docId = (int) docIdValues.get(docsEnum.docID()); DocumentImpl storedDocument = docs.getDoc(docId); if (storedDocument == null) continue; NodeId nodeId = null; if (nodes != null) { BytesRef nodeIdRef = new BytesRef(buf); nodeIdValues.get(docsEnum.docID(), nodeIdRef); int units = ByteConversion.byteToShort(nodeIdRef.bytes, nodeIdRef.offset); nodeId = index.getBrokerPool().getNodeFactory().createFromData(units, nodeIdRef.bytes, nodeIdRef.offset + 2); } if (nodeId == null || nodes.get(storedDocument, nodeId) != null) { Occurrences oc = map.get(term); if (oc == null) { oc = new Occurrences(term); map.put(term, oc); } oc.addDocument(storedDocument); oc.addOccurrences(docsEnum.freq()); } } } } while (termsIter.next() != null); } }
From source file:org.getopt.luke.Luke.java
License:Apache License
public void showNextTerm(final Object fCombo, final Object fText) { if (ir == null) { showStatus(MSG_NOINDEX);//from ww w . j a va 2 s . co m return; } SlowThread st = new SlowThread(this) { public void execute() { try { String text; text = getString(fText, "text"); if (text == null || text.trim().equals("")) text = ""; if (text.length() == 0) { showFirstTerm(fCombo, fText); return; } TermsEnum te = (TermsEnum) getProperty(fCombo, "te"); String fld = getString(fCombo, "text"); String teField = (String) getProperty(fCombo, "teField"); SeekStatus status; BytesRef rawTerm = null; if (te != null) { rawTerm = te.term(); } String rawString = rawTerm != null ? rawTerm.utf8ToString() : null; if (te == null || !teField.equals(fld) || !text.equals(rawString)) { Terms terms = MultiFields.getTerms(ir, fld); te = terms.iterator(null); putProperty(fCombo, "te", te); putProperty(fCombo, "teField", fld); status = te.seekCeil(new BytesRef(text)); if (status.equals(SeekStatus.FOUND)) { rawTerm = te.term(); } else { rawTerm = null; } } else { rawTerm = te.next(); } if (rawTerm == null) { // proceed to next field int idx = fn.indexOf(fld); while (idx < fn.size() - 1) { idx++; setInteger(fCombo, "selected", idx); fld = fn.get(idx); Terms terms = MultiFields.getTerms(ir, fld); if (terms == null) { continue; } te = terms.iterator(null); rawTerm = te.next(); putProperty(fCombo, "te", te); putProperty(fCombo, "teField", fld); break; } } if (rawTerm == null) { showStatus("No more terms"); return; } //Term t = new Term(fld, term.utf8ToString()); _showTerm(fCombo, fText, new Term(fld, rawTerm)); } catch (Exception e) { e.printStackTrace(); showStatus(e.getMessage()); } } }; if (slowAccess) { st.start(); } else { st.execute(); } }
From source file:org.hibernate.search.query.fieldcache.impl.MultiStringFieldLoadingStrategy.java
License:LGPL
@Override public String[] collect(int relativeDocId) { // use the loaded SortedSetDocValues to retrieve all values for the field sortedSetDocValues.setDocument(relativeDocId); List<String> values = new ArrayList<String>(); long ordinal = sortedSetDocValues.nextOrd(); while (ordinal != SortedSetDocValues.NO_MORE_ORDS) { BytesRef bytesRef = sortedSetDocValues.lookupOrd(ordinal); sortedSetDocValues.lookupOrd(ordinal); values.add(bytesRef.utf8ToString()); ordinal = sortedSetDocValues.nextOrd(); }//from w ww. j ava 2 s . co m return values.toArray(new String[values.size()]); }