List of usage examples for org.apache.lucene.util LineFileDocs LineFileDocs
public LineFileDocs(Random random) throws IOException
From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java
License:Apache License
@Override public void testInvertedWrite() throws Exception { Directory dir = newDirectory();//from www . j a v a 2 s. co m MockAnalyzer analyzer = new MockAnalyzer(random()); analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); // Must be concurrent because thread(s) can be merging // while up to one thread flushes, and each of those // threads iterates over the map while the flushing // thread might be adding to it: final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>(); final AtomicLong sumDocFreq = new AtomicLong(); final AtomicLong sumTotalTermFreq = new AtomicLong(); // TODO: would be better to use / delegate to the current // Codec returned by getCodec() iwc.setCodec(new AssertingCodec() { @Override public PostingsFormat getPostingsFormatForField(String field) { PostingsFormat p = getCodec().postingsFormat(); if (p instanceof PerFieldPostingsFormat) { p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field); } if (p instanceof RocanaPerFieldPostingsFormat) { p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field); } final PostingsFormat defaultPostingsFormat = p; final Thread mainThread = Thread.currentThread(); if (field.equals("body")) { // A PF that counts up some stats and then in // the end we verify the stats match what the // final IndexReader says, just to exercise the // new freedom of iterating the postings more // than once at flush/merge: return new PostingsFormat(defaultPostingsFormat.getName()) { @Override public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException { final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state); return new FieldsConsumer() { @Override public void write(Fields fields) throws IOException { fieldsConsumer.write(fields); boolean isMerge = state.context.context == IOContext.Context.MERGE; // We only use one thread for flushing // in this test: assert isMerge || Thread.currentThread() == mainThread; // We iterate the provided TermsEnum // twice, so we excercise this new freedom // with the inverted API; if // addOnSecondPass is true, we add up // term stats on the 2nd iteration: boolean addOnSecondPass = random().nextBoolean(); //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass); // Gather our own stats: Terms terms = fields.terms("body"); assert terms != null; TermsEnum termsEnum = terms.iterator(); PostingsEnum docs = null; while (termsEnum.next() != null) { BytesRef term = termsEnum.term(); // TODO: also sometimes ask for payloads/offsets? boolean noPositions = random().nextBoolean(); if (noPositions) { docs = termsEnum.postings(docs, PostingsEnum.FREQS); } else { docs = termsEnum.postings(null, PostingsEnum.POSITIONS); } int docFreq = 0; long totalTermFreq = 0; while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); int limit = TestUtil.nextInt(random(), 1, docs.freq()); if (!noPositions) { for (int i = 0; i < limit; i++) { docs.nextPosition(); } } } String termString = term.utf8ToString(); // During merge we should only see terms // we had already seen during a // previous flush: assertTrue(isMerge == false || termFreqs.containsKey(termString)); if (isMerge == false) { if (addOnSecondPass == false) { TermFreqs tf = termFreqs.get(termString); if (tf == null) { tf = new TermFreqs(); termFreqs.put(termString, tf); } tf.docFreq += docFreq; tf.totalTermFreq += totalTermFreq; sumDocFreq.addAndGet(docFreq); sumTotalTermFreq.addAndGet(totalTermFreq); } else if (termFreqs.containsKey(termString) == false) { // Add placeholder (2nd pass will // set its counts): termFreqs.put(termString, new TermFreqs()); } } } // Also test seeking the TermsEnum: for (String term : termFreqs.keySet()) { if (termsEnum.seekExact(new BytesRef(term))) { // TODO: also sometimes ask for payloads/offsets? boolean noPositions = random().nextBoolean(); if (noPositions) { docs = termsEnum.postings(docs, PostingsEnum.FREQS); } else { docs = termsEnum.postings(null, PostingsEnum.POSITIONS); } int docFreq = 0; long totalTermFreq = 0; while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) { docFreq++; totalTermFreq += docs.freq(); int limit = TestUtil.nextInt(random(), 1, docs.freq()); if (!noPositions) { for (int i = 0; i < limit; i++) { docs.nextPosition(); } } } if (isMerge == false && addOnSecondPass) { TermFreqs tf = termFreqs.get(term); assert tf != null; tf.docFreq += docFreq; tf.totalTermFreq += totalTermFreq; sumDocFreq.addAndGet(docFreq); sumTotalTermFreq.addAndGet(totalTermFreq); } //System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term)); assertTrue(docFreq <= termFreqs.get(term).docFreq); assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq); } } // Also test seekCeil for (int iter = 0; iter < 10; iter++) { BytesRef term = new BytesRef( TestUtil.randomRealisticUnicodeString(random())); SeekStatus status = termsEnum.seekCeil(term); if (status == SeekStatus.NOT_FOUND) { assertTrue(term.compareTo(termsEnum.term()) < 0); } } } @Override public void close() throws IOException { fieldsConsumer.close(); } }; } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { return defaultPostingsFormat.fieldsProducer(state); } }; } else { return defaultPostingsFormat; } } }); RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); LineFileDocs docs = new LineFileDocs(random()); int bytesToIndex = atLeast(100) * 1024; int bytesIndexed = 0; while (bytesIndexed < bytesToIndex) { Document doc = docs.nextDoc(); w.addDocument(doc); bytesIndexed += RamUsageTester.sizeOf(doc); } IndexReader r = w.getReader(); w.close(); Terms terms = MultiFields.getTerms(r, "body"); assertEquals(sumDocFreq.get(), terms.getSumDocFreq()); assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq()); TermsEnum termsEnum = terms.iterator(); long termCount = 0; boolean supportsOrds = true; while (termsEnum.next() != null) { BytesRef term = termsEnum.term(); assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq()); assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq()); if (supportsOrds) { long ord; try { ord = termsEnum.ord(); } catch (UnsupportedOperationException uoe) { supportsOrds = false; ord = -1; } if (ord != -1) { assertEquals(termCount, ord); } } termCount++; } assertEquals(termFreqs.size(), termCount); r.close(); dir.close(); }
From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTestCase.java
License:Apache License
public void testLineDocs() throws IOException { Random r = getRandom();//from w w w . j a v a 2 s .c o m LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 10; i++) { int numDocs = TestUtil.nextInt(r, 1, 200); ByteArrayOutputStream bos = new ByteArrayOutputStream(); for (int j = 0; j < numDocs; j++) { String s = lineFileDocs.nextDoc().get("body"); bos.write(s.getBytes(StandardCharsets.UTF_8)); } doTest(bos.toByteArray()); } lineFileDocs.close(); }
From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTestCase.java
License:Apache License
public void testLineDocsThreads() throws Exception { final Random r = getRandom(); int threadCount = TestUtil.nextInt(r, 2, 6); Thread[] threads = new Thread[threadCount]; final CountDownLatch startingGun = new CountDownLatch(1); for (int tid = 0; tid < threadCount; tid++) { final long seed = r.nextLong(); threads[tid] = new Thread() { @Override// ww w . j a va2s. c om public void run() { try { Random r = new Random(seed); startingGun.await(); LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 10; i++) { int numDocs = TestUtil.nextInt(r, 1, 200); ByteArrayOutputStream bos = new ByteArrayOutputStream(); for (int j = 0; j < numDocs; j++) { String s = lineFileDocs.nextDoc().get("body"); bos.write(s.getBytes(StandardCharsets.UTF_8)); } doTest(bos.toByteArray()); } lineFileDocs.close(); } catch (Exception e) { throw new RuntimeException(e); } } }; threads[tid].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } }
From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTestCase.java
License:Apache License
public void testMixed() throws IOException { Random r = getRandom();/*from ww w . j a va 2s . c om*/ LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 2; ++i) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); int prevInt = r.nextInt(); long prevLong = r.nextLong(); while (bos.size() < 400000) { switch (r.nextInt(4)) { case 0: addInt(r, prevInt, bos); break; case 1: addLong(r, prevLong, bos); break; case 2: addString(lineFileDocs, bos); break; case 3: addBytes(r, bos); break; default: throw new IllegalStateException("Random is broken"); } } doTest(bos.toByteArray()); } }
From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTests.java
License:Apache License
public void testLineDocsThreads() throws Exception { final Random r = getRandom(); int threadCount = TestUtil.nextInt(r, 2, 10); Thread[] threads = new Thread[threadCount]; final CountDownLatch startingGun = new CountDownLatch(1); for (int tid = 0; tid < threadCount; tid++) { final long seed = r.nextLong(); threads[tid] = new Thread() { @Override// ww w. j a v a 2 s. c om public void run() { try { Random r = new Random(seed); startingGun.await(); LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 10; i++) { int numDocs = TestUtil.nextInt(r, 1, 200); ByteArrayOutputStream bos = new ByteArrayOutputStream(); for (int j = 0; j < numDocs; j++) { String s = lineFileDocs.nextDoc().get("body"); bos.write(s.getBytes(StandardCharsets.UTF_8)); } doTest(bos.toByteArray()); } lineFileDocs.close(); } catch (Exception e) { throw new RuntimeException(e); } } }; threads[tid].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } }
From source file:org.elasticsearch.common.compress.DeflateCompressTests.java
License:Apache License
public void testLineDocs() throws IOException { Random r = random();//from ww w . j ava 2 s .com LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 10; i++) { int numDocs = TestUtil.nextInt(r, 1, 200); ByteArrayOutputStream bos = new ByteArrayOutputStream(); for (int j = 0; j < numDocs; j++) { String s = lineFileDocs.nextDoc().get("body"); bos.write(s.getBytes(StandardCharsets.UTF_8)); } doTest(bos.toByteArray()); } lineFileDocs.close(); }
From source file:org.elasticsearch.common.compress.DeflateCompressTests.java
License:Apache License
public void testLineDocsThreads() throws Exception { final Random r = random(); int threadCount = TestUtil.nextInt(r, 2, 6); Thread[] threads = new Thread[threadCount]; final CountDownLatch startingGun = new CountDownLatch(1); for (int tid = 0; tid < threadCount; tid++) { final long seed = r.nextLong(); threads[tid] = new Thread() { @Override//from ww w . ja v a2s . com public void run() { try { Random r = new Random(seed); startingGun.await(); LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 10; i++) { int numDocs = TestUtil.nextInt(r, 1, 200); ByteArrayOutputStream bos = new ByteArrayOutputStream(); for (int j = 0; j < numDocs; j++) { String s = lineFileDocs.nextDoc().get("body"); bos.write(s.getBytes(StandardCharsets.UTF_8)); } doTest(bos.toByteArray()); } lineFileDocs.close(); } catch (Exception e) { throw new RuntimeException(e); } } }; threads[tid].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } }
From source file:org.elasticsearch.common.compress.DeflateCompressTests.java
License:Apache License
public void testMixed() throws IOException { Random r = random();/*from w ww . j a v a 2 s . c o m*/ LineFileDocs lineFileDocs = new LineFileDocs(r); for (int i = 0; i < 2; ++i) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); int prevInt = r.nextInt(); long prevLong = r.nextLong(); while (bos.size() < 400000) { switch (r.nextInt(4)) { case 0: addInt(r, prevInt, bos); break; case 1: addLong(r, prevLong, bos); break; case 2: addString(lineFileDocs, bos); break; case 3: addBytes(r, bos); break; default: throw new IllegalStateException("Random is broken"); } } doTest(bos.toByteArray()); } }
From source file:org.elasticsearch.index.translog.TranslogTests.java
License:Apache License
public void testTranslogOpsCountIsCorrect() throws IOException { List<Translog.Location> locations = new ArrayList<>(); int numOps = randomIntBetween(100, 200); LineFileDocs lineFileDocs = new LineFileDocs(random()); // writes pretty big docs so we cross buffer boarders regularly for (int opsAdded = 0; opsAdded < numOps; opsAdded++) { locations.add(translog.add(new Translog.Index("test", "" + opsAdded, lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))))); Translog.Snapshot snapshot = this.translog.newSnapshot(); assertEquals(opsAdded + 1, snapshot.totalOperations()); for (int i = 0; i < opsAdded; i++) { assertEquals("expected operation" + i + " to be in the current translog but wasn't", translog.currentFileGeneration(), locations.get(i).generation); Translog.Operation next = snapshot.next(); assertNotNull("operation " + i + " must be non-null", next); }//from w ww .j a v a2 s . co m } }
From source file:org.elasticsearch.index.translog.TranslogTests.java
License:Apache License
public void testTragicEventCanBeAnyException() throws IOException { Path tempDir = createTempDir(); final FailSwitch fail = new FailSwitch(); TranslogConfig config = getTranslogConfig(tempDir); Translog translog = getFailableTranslog(fail, config, false, true, null); LineFileDocs lineFileDocs = new LineFileDocs(random()); // writes pretty big docs so we cross buffer boarders regularly translog.add(new Translog.Index("test", "1", lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8")))); fail.failAlways();//from w w w.ja v a 2 s. c o m try { Translog.Location location = translog.add(new Translog.Index("test", "2", lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8")))); if (randomBoolean()) { translog.ensureSynced(location); } else { translog.sync(); } //TODO once we have a mock FS that can simulate we can also fail on plain sync fail("WTF"); } catch (UnknownException ex) { // w00t } catch (TranslogException ex) { assertTrue(ex.getCause() instanceof UnknownException); } assertFalse(translog.isOpen()); assertTrue(translog.getTragicException() instanceof UnknownException); }