List of usage examples for org.apache.lucene.index PostingsEnum POSITIONS
short POSITIONS
To view the source code for org.apache.lucene.index PostingsEnum POSITIONS.
Click Source Link
From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java
License:Apache License
@Override public void testInvertedWrite() throws Exception { Directory dir = newDirectory();/*from w w w. ja v a2 s . c o m*/ MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); // Must be concurrent because thread(s) can be merging // while up to one thread flushes, and each of those // threads iterates over the map while the flushing // thread might be adding to it: final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>(); final AtomicLong sumDocFreq = new AtomicLong(); final AtomicLong sumTotalTermFreq = new AtomicLong(); // TODO: would be better to use / delegate to the current // Codec returned by getCodec() iwc.setCodec(new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { PostingsFormat p = getCodec().postingsFormat(); if (p instanceof PerFieldPostingsFormat) { p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field); } if (p instanceof RocanaPerFieldPostingsFormat) { p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field); } final PostingsFormat defaultPostingsFormat = p; final Thread mainThread = Thread.currentThread(); if (field.equals("body")) { // A PF that counts up some stats and then in // the end we verify the stats match what the // final IndexReader says, just to exercise the // new freedom of iterating the postings more // than once at flush/merge: return new PostingsFormat(defaultPostingsFormat.getName()) { @Override public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException { final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state); return new FieldsConsumer() { @Override public void write(Fields fields) throws IOException { fieldsConsumer.write(fields); boolean isMerge = state.context.context == IOContext.Context.MERGE; // We only use one thread for flushing // in this test: assert isMerge || Thread.currentThread() == mainThread; // We iterate the provided TermsEnum // twice, so we excercise this new freedom // with the inverted API; if // addOnSecondPass is true, we add up // term stats on the 2nd iteration: boolean addOnSecondPass = random().nextBoolean(); //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass); // Gather our own stats: Terms terms = fields.terms("body"); assert terms != null; TermsEnum termsEnum = terms.iterator(); PostingsEnum docs = null; while (termsEnum.next() != null) { BytesRef term = termsEnum.term(); // TODO: also sometimes ask for payloads/offsets? boolean noPositions = random().nextBoolean(); if (noPositions) { docs = termsEnum.postings(docs, PostingsEnum.FREQS); } else { docs = termsEnum.postings(null, PostingsEnum.POSITIONS); } int docFreq = 0; long totalTermFreq = 0; while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); int limit = TestUtil.nextInt(random(), 1, docs.freq()); if (!noPositions) { for (int i = 0; i < limit; i++) { docs.nextPosition(); } } } String termString = term.utf8ToString(); // During merge we should only see terms // we had already seen during a // previous flush: assertTrue(isMerge == false || termFreqs.containsKey(termString)); if (isMerge == false) { if (addOnSecondPass == false) { TermFreqs tf = termFreqs.get(termString); if (tf == null) { tf = new TermFreqs(); termFreqs.put(termString, tf); } tf.docFreq += docFreq; tf.totalTermFreq += totalTermFreq; sumDocFreq.addAndGet(docFreq); sumTotalTermFreq.addAndGet(totalTermFreq); } else if (termFreqs.containsKey(termString) == false) { // Add placeholder (2nd pass will // set its counts): termFreqs.put(termString, new TermFreqs()); } } } // Also test seeking the TermsEnum: for (String term : termFreqs.keySet()) { if (termsEnum.seekExact(new BytesRef(term))) { // TODO: also sometimes ask for payloads/offsets? boolean noPositions = random().nextBoolean(); if (noPositions) { docs = termsEnum.postings(docs, PostingsEnum.FREQS); } else { docs = termsEnum.postings(null, PostingsEnum.POSITIONS); } int docFreq = 0; long totalTermFreq = 0; while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); int limit = TestUtil.nextInt(random(), 1, docs.freq()); if (!noPositions) { for (int i = 0; i < limit; i++) { docs.nextPosition(); } } } if (isMerge == false && addOnSecondPass) { TermFreqs tf = termFreqs.get(term); assert tf != null; tf.docFreq += docFreq; tf.totalTermFreq += totalTermFreq; sumDocFreq.addAndGet(docFreq); sumTotalTermFreq.addAndGet(totalTermFreq); } //System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term)); assertTrue(docFreq <= termFreqs.get(term).docFreq); assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq); } } // Also test seekCeil for (int iter = 0; iter < 10; iter++) { BytesRef term = new BytesRef( TestUtil.randomRealisticUnicodeString(random())); SeekStatus status = termsEnum.seekCeil(term); if (status == SeekStatus.NOT_FOUND) { assertTrue(term.compareTo(termsEnum.term()) < 0); } } } @Override public void close() throws IOException { fieldsConsumer.close(); } }; } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { return defaultPostingsFormat.fieldsProducer(state); } }; } else { return defaultPostingsFormat; } } }); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); LineFileDocs docs = new LineFileDocs(random()); int bytesToIndex = atLeast(100) * 1024; int bytesIndexed = 0; while (bytesIndexed < bytesToIndex) { Document doc = docs.nextDoc(); w.addDocument(doc); bytesIndexed += RamUsageTester.sizeOf(doc); } IndexReader r = w.getReader(); w.close(); Terms terms = MultiFields.getTerms(r, "body"); assertEquals(sumDocFreq.get(), terms.getSumDocFreq()); assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq()); TermsEnum termsEnum = terms.iterator(); long termCount = 0; boolean supportsOrds = true; while (termsEnum.next() != null) { BytesRef term = termsEnum.term(); assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq()); assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq()); if (supportsOrds) { long ord; try { ord = termsEnum.ord(); } catch (UnsupportedOperationException uoe) { supportsOrds = false; ord = -1; } if (ord != -1) { assertEquals(termCount, ord); } } termCount++; } assertEquals(termFreqs.size(), termCount); r.close(); dir.close(); }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
/** * checks the terms enum sequentially/*w ww.j a v a2 s . c o m*/ * if deep is false, it does a 'shallow' test that doesnt go down to the docsenums */ public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception { BytesRef term; PostingsEnum leftPositions = null; PostingsEnum rightPositions = null; PostingsEnum leftDocs = null; PostingsEnum rightDocs = null; while ((term = leftTermsEnum.next()) != null) { assertEquals(term, rightTermsEnum.next()); assertTermStats(leftTermsEnum, rightTermsEnum); if (deep) { if (hasPositions) { // with payloads + off assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL)); assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.ALL), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.ALL)); // with payloads only assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS)); assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.PAYLOADS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.PAYLOADS)); // with offsets only assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS)); assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.OFFSETS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.OFFSETS)); // with positions only assertDocsAndPositionsEnum( leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS)); assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.postings(leftPositions, PostingsEnum.POSITIONS), rightPositions = rightTermsEnum.postings(rightPositions, PostingsEnum.POSITIONS)); } // with freqs: assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs)); // w/o freqs: assertDocsEnum(leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE), rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE)); // with freqs: assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs), rightDocs = rightTermsEnum.postings(rightDocs)); // w/o freqs: assertDocsSkipping(leftTermsEnum.docFreq(), leftDocs = leftTermsEnum.postings(leftDocs, PostingsEnum.NONE), rightDocs = rightTermsEnum.postings(rightDocs, PostingsEnum.NONE)); } } assertNull(rightTermsEnum.next()); }
From source file:nl.inl.blacklab.search.SearcherImpl.java
License:Apache License
@Override public void getCharacterOffsets(int doc, String fieldName, int[] startsOfWords, int[] endsOfWords, boolean fillInDefaultsIfNotFound) { if (startsOfWords.length == 0) return; // nothing to do try {/* www. jav a 2 s .c o m*/ // Determine lowest and highest word position we'd like to know something about. // This saves a little bit of time for large result sets. int minP = -1, maxP = -1; int numStarts = startsOfWords.length; int numEnds = endsOfWords.length; for (int i = 0; i < numStarts; i++) { if (startsOfWords[i] < minP || minP == -1) minP = startsOfWords[i]; if (startsOfWords[i] > maxP) maxP = startsOfWords[i]; } for (int i = 0; i < numEnds; i++) { if (endsOfWords[i] < minP || minP == -1) minP = endsOfWords[i]; if (endsOfWords[i] > maxP) maxP = endsOfWords[i]; } if (minP < 0 || maxP < 0) throw new RuntimeException("Can't determine min and max positions"); String fieldPropName = ComplexFieldUtil.mainPropertyOffsetsField(indexStructure, fieldName); org.apache.lucene.index.Terms terms = reader.getTermVector(doc, fieldPropName); if (terms == null) throw new IllegalArgumentException( "Field " + fieldPropName + " in doc " + doc + " has no term vector"); if (!terms.hasPositions()) throw new IllegalArgumentException( "Field " + fieldPropName + " in doc " + doc + " has no character postion information"); //int lowestPos = -1, highestPos = -1; int lowestPosFirstChar = -1, highestPosLastChar = -1; int total = numStarts + numEnds; boolean[] done = new boolean[total]; // NOTE: array is automatically initialized to zeroes! int found = 0; // Iterate over terms TermsEnum termsEnum = terms.iterator(); while (termsEnum.next() != null) { PostingsEnum dpe = termsEnum.postings(null, null, PostingsEnum.POSITIONS); // Iterate over docs containing this term (NOTE: should be only one doc!) while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { // Iterate over positions of this term in this doc int positionsRead = 0; int numberOfPositions = dpe.freq(); while (positionsRead < numberOfPositions) { int position = dpe.nextPosition(); if (position == -1) break; positionsRead++; // Keep track of the lowest and highest char pos, so // we can fill in the character positions we didn't find int startOffset = dpe.startOffset(); if (startOffset < lowestPosFirstChar || lowestPosFirstChar == -1) { lowestPosFirstChar = startOffset; } int endOffset = dpe.endOffset(); if (endOffset > highestPosLastChar) { highestPosLastChar = endOffset; } // We've calculated the min and max word positions in advance, so // we know we can skip this position if it's outside the range we're interested in. // (Saves a little time for large result sets) if (position < minP || position > maxP) { continue; } for (int m = 0; m < numStarts; m++) { if (!done[m] && position == startsOfWords[m]) { done[m] = true; startsOfWords[m] = startOffset; found++; } } for (int m = 0; m < numEnds; m++) { if (!done[numStarts + m] && position == endsOfWords[m]) { done[numStarts + m] = true; endsOfWords[m] = endOffset; found++; } } // NOTE: we might be tempted to break here if found == total, // but that would foul up our calculation of highestPosLastChar and // lowestPosFirstChar. } } } if (found < total) { if (!fillInDefaultsIfNotFound) throw new RuntimeException("Could not find all character offsets!"); if (lowestPosFirstChar < 0 || highestPosLastChar < 0) throw new RuntimeException("Could not find default char positions!"); for (int m = 0; m < numStarts; m++) { if (!done[m]) startsOfWords[m] = lowestPosFirstChar; } for (int m = 0; m < numEnds; m++) { if (!done[numStarts + m]) endsOfWords[m] = highestPosLastChar; } } } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
From source file:org.alfresco.solr.query.SolrPathScorer.java
License:Open Source License
public static SolrPathScorer createPathScorer(SolrPathQuery solrPathQuery, LeafReaderContext context, Weight weight, DictionaryService dictionarySertvice, boolean repeat) throws IOException { // StructuredFieldPosition last = null; // if(solrPathQuery.getPathStructuredFieldPositions().size() > 0) // {//ww w . j av a 2s. c o m // last = solrPathQuery.getPathStructuredFieldPositions().get(solrPathQuery.getPathStructuredFieldPositions().size() - 1); // } if (solrPathQuery.getPathStructuredFieldPositions().size() == 0) { ArrayList<StructuredFieldPosition> answer = new ArrayList<StructuredFieldPosition>(2); answer.add(new SelfAxisStructuredFieldPosition()); answer.add(new SelfAxisStructuredFieldPosition()); solrPathQuery.appendQuery(answer); } for (StructuredFieldPosition sfp : solrPathQuery.getPathStructuredFieldPositions()) { if (sfp.getTermText() != null) { PostingsEnum p = context.reader().postings( new Term(solrPathQuery.getPathField(), sfp.getTermText()), PostingsEnum.POSITIONS); if (p == null) return null; CachingTermPositions ctp = new CachingTermPositions(p); sfp.setCachingTermPositions(ctp); } } SolrContainerScorer cs = null; PostingsEnum rootContainerPositions = null; if (solrPathQuery.getPathRootTerm() != null) { rootContainerPositions = context.reader().postings(solrPathQuery.getPathRootTerm(), PostingsEnum.POSITIONS); } if (solrPathQuery.getPathStructuredFieldPositions().size() > 0) { cs = new SolrContainerScorer(weight, rootContainerPositions, (StructuredFieldPosition[]) solrPathQuery .getPathStructuredFieldPositions().toArray(new StructuredFieldPosition[] {})); } return new SolrPathScorer(weight, cs); }
From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTestCase.java
License:Apache License
protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig) throws IOException { assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index)); TestDoc testDoc = testConfig.doc;//from w w w. j a va 2 s .c om HashSet<String> selectedFields = testConfig.selectedFields == null ? null : new HashSet<>(Arrays.asList(testConfig.selectedFields)); Fields esTermVectorFields = esResponse.getFields(); for (TestFieldSetting field : testDoc.fieldSettings) { Terms esTerms = esTermVectorFields.terms(field.name); if (selectedFields != null && !selectedFields.contains(field.name)) { assertNull(esTerms); continue; } assertNotNull(esTerms); Terms luceneTerms = luceneFields.terms(field.name); TermsEnum esTermEnum = esTerms.iterator(); TermsEnum luceneTermEnum = luceneTerms.iterator(); while (esTermEnum.next() != null) { assertNotNull(luceneTermEnum.next()); assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq())); PostingsEnum esDocsPosEnum = esTermEnum.postings(null, PostingsEnum.POSITIONS); PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, PostingsEnum.POSITIONS); if (luceneDocsPosEnum == null) { // test we expect that... assertFalse(field.storedOffset); assertFalse(field.storedPayloads); assertFalse(field.storedPositions); continue; } String currentTerm = esTermEnum.term().utf8ToString(); assertThat("Token mismatch for field: " + field.name, currentTerm, equalTo(luceneTermEnum.term().utf8ToString())); esDocsPosEnum.nextDoc(); luceneDocsPosEnum.nextDoc(); int freq = esDocsPosEnum.freq(); assertThat(freq, equalTo(luceneDocsPosEnum.freq())); for (int i = 0; i < freq; i++) { String failDesc = " (field:" + field.name + " term:" + currentTerm + ")"; int lucenePos = luceneDocsPosEnum.nextPosition(); int esPos = esDocsPosEnum.nextPosition(); if (field.storedPositions && testConfig.requestPositions) { assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos)); } else { assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1)); } if (field.storedOffset && testConfig.requestOffsets) { assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset())); assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset())); } else { assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1)); assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1)); } if (field.storedPayloads && testConfig.requestPayloads) { assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload())); } else { assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null)); } } } assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next()); } }
From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTests.java
License:Apache License
protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig) throws IOException { assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index)); TestDoc testDoc = testConfig.doc;// www.j ava 2s . co m HashSet<String> selectedFields = testConfig.selectedFields == null ? null : new HashSet<>(Arrays.asList(testConfig.selectedFields)); Fields esTermVectorFields = esResponse.getFields(); for (TestFieldSetting field : testDoc.fieldSettings) { Terms esTerms = esTermVectorFields.terms(field.name); if (selectedFields != null && !selectedFields.contains(field.name)) { assertNull(esTerms); continue; } assertNotNull(esTerms); Terms luceneTerms = luceneFields.terms(field.name); TermsEnum esTermEnum = esTerms.iterator(); TermsEnum luceneTermEnum = luceneTerms.iterator(); while (esTermEnum.next() != null) { assertNotNull(luceneTermEnum.next()); assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq())); PostingsEnum esDocsPosEnum = esTermEnum.postings(null, null, PostingsEnum.POSITIONS); PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, null, PostingsEnum.POSITIONS); if (luceneDocsPosEnum == null) { // test we expect that... assertFalse(field.storedOffset); assertFalse(field.storedPayloads); assertFalse(field.storedPositions); continue; } String currentTerm = esTermEnum.term().utf8ToString(); assertThat("Token mismatch for field: " + field.name, currentTerm, equalTo(luceneTermEnum.term().utf8ToString())); esDocsPosEnum.nextDoc(); luceneDocsPosEnum.nextDoc(); int freq = esDocsPosEnum.freq(); assertThat(freq, equalTo(luceneDocsPosEnum.freq())); for (int i = 0; i < freq; i++) { String failDesc = " (field:" + field.name + " term:" + currentTerm + ")"; int lucenePos = luceneDocsPosEnum.nextPosition(); int esPos = esDocsPosEnum.nextPosition(); if (field.storedPositions && testConfig.requestPositions) { assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos)); } else { assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1)); } if (field.storedOffset && testConfig.requestOffsets) { assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset())); assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset())); } else { assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1)); assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1)); } if (field.storedPayloads && testConfig.requestPayloads) { assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload())); } else { assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null)); } } } assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next()); } }
From source file:org.elasticsearch.index.mapper.core.TextFieldMapperTests.java
License:Apache License
public void testDefaultPositionIncrementGap() throws IOException { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("field").field("type", "text").endObject().endObject().endObject().endObject() .string();//from ww w .j a v a 2s . co m DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false); assertEquals(mapping, mapper.mappingSource().toString()); ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject() .field("field", new String[] { "a", "b" }).endObject().bytes()); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length); assertEquals("a", fields[0].stringValue()); assertEquals("b", fields[1].stringValue()); IndexShard shard = indexService.getShard(0); shard.index(new Engine.Index(new Term("_uid", "1"), doc)); shard.refresh("test"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader(); TermsEnum terms = leaf.terms("field").iterator(); assertTrue(terms.seekExact(new BytesRef("b"))); PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 1, postings.nextPosition()); } }
From source file:org.elasticsearch.index.mapper.core.TextFieldMapperTests.java
License:Apache License
public void testPositionIncrementGap() throws IOException { final int positionIncrementGap = randomIntBetween(1, 1000); String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("field").field("type", "text").field("position_increment_gap", positionIncrementGap) .endObject().endObject().endObject().endObject().string(); DocumentMapper mapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, false); assertEquals(mapping, mapper.mappingSource().toString()); ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject() .field("field", new String[] { "a", "b" }).endObject().bytes()); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(2, fields.length);/*ww w. java 2 s.co m*/ assertEquals("a", fields[0].stringValue()); assertEquals("b", fields[1].stringValue()); IndexShard shard = indexService.getShard(0); shard.index(new Engine.Index(new Term("_uid", "1"), doc)); shard.refresh("test"); try (Engine.Searcher searcher = shard.acquireSearcher("test")) { LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader(); TermsEnum terms = leaf.terms("field").iterator(); assertTrue(terms.seekExact(new BytesRef("b"))); PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); assertEquals(positionIncrementGap + 1, postings.nextPosition()); } }
From source file:org.opengrok.suggest.SuggesterSearcher.java
License:Open Source License
private List<LookupResultItem> suggest(final Query query, final LeafReaderContext leafReaderContext, final String project, final SuggesterQuery suggesterQuery, final PopularityCounter searchCounts) throws IOException { if (Thread.currentThread().isInterrupted()) { interrupted = true;//from www.j av a 2 s . com return Collections.emptyList(); } boolean shouldLeaveOutSameTerms = shouldLeaveOutSameTerms(query, suggesterQuery); Set<BytesRef> tokensAlreadyIncluded = null; if (shouldLeaveOutSameTerms) { tokensAlreadyIncluded = SuggesterUtils.intoTermsExceptPhraseQuery(query).stream() .filter(t -> t.field().equals(suggesterQuery.getField())).map(Term::bytes) .collect(Collectors.toSet()); } boolean needsDocumentIds = query != null && !(query instanceof MatchAllDocsQuery); ComplexQueryData complexQueryData = null; if (needsDocumentIds) { complexQueryData = getComplexQueryData(query, leafReaderContext); if (interrupted) { return Collections.emptyList(); } } Terms terms = leafReaderContext.reader().terms(suggesterQuery.getField()); TermsEnum termsEnum = suggesterQuery.getTermsEnumForSuggestions(terms); LookupPriorityQueue queue = new LookupPriorityQueue(resultSize); boolean needPositionsAndFrequencies = needPositionsAndFrequencies(query); PostingsEnum postingsEnum = null; BytesRef term = termsEnum.next(); while (term != null) { if (Thread.currentThread().isInterrupted()) { interrupted = true; break; } if (needPositionsAndFrequencies) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.POSITIONS | PostingsEnum.FREQS); } else { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); } int score; if (!needsDocumentIds) { score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs); } else if (needPositionsAndFrequencies) { score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum); } else { score = getDocumentFrequency(complexQueryData.documentIds, leafReaderContext.docBase, postingsEnum); } if (score > 0) { if (!shouldLeaveOutSameTerms || !tokensAlreadyIncluded.contains(term)) { score += searchCounts.get(term) * TERM_ALREADY_SEARCHED_MULTIPLIER; if (queue.canInsert(score)) { queue.insertWithOverflow(new LookupResultItem(term.utf8ToString(), project, score)); } } } term = termsEnum.next(); } return queue.getResult(); }
From source file:tw.com.kyle.luminance.LumQuery.java
public void ListTerm(int docId) throws IOException { Terms terms = idx_reader.getTermVector(docId, "content"); TermsEnum term_enum = terms.iterator(); while (term_enum.next() != null) { System.out.printf("%s", term_enum.term().utf8ToString()); PostingsEnum post_enum = term_enum.postings(null, PostingsEnum.POSITIONS); post_enum.nextDoc(); int freq = post_enum.freq(); System.out.printf("%d: ", freq); for (int i = 0; i < freq; ++i) { System.out.printf("%d, ", post_enum.nextPosition()); }// w w w .ja va 2 s .c o m System.out.printf("%n"); } }