Example usage for org.apache.lucene.util LineFileDocs LineFileDocs

List of usage examples for org.apache.lucene.util LineFileDocs LineFileDocs

Introduction

In this page you can find the example usage for org.apache.lucene.util LineFileDocs LineFileDocs.

Prototype

public LineFileDocs(Random random) throws IOException 

Source Link

Usage

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();//from   www  .  j  a  v a  2 s.  co  m
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTestCase.java

License:Apache License

public void testLineDocs() throws IOException {
    Random r = getRandom();//from   w  w w . j a v  a  2 s  .c  o m
    LineFileDocs lineFileDocs = new LineFileDocs(r);
    for (int i = 0; i < 10; i++) {
        int numDocs = TestUtil.nextInt(r, 1, 200);
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        for (int j = 0; j < numDocs; j++) {
            String s = lineFileDocs.nextDoc().get("body");
            bos.write(s.getBytes(StandardCharsets.UTF_8));
        }
        doTest(bos.toByteArray());
    }
    lineFileDocs.close();
}

From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTestCase.java

License:Apache License

public void testLineDocsThreads() throws Exception {
    final Random r = getRandom();
    int threadCount = TestUtil.nextInt(r, 2, 6);
    Thread[] threads = new Thread[threadCount];
    final CountDownLatch startingGun = new CountDownLatch(1);
    for (int tid = 0; tid < threadCount; tid++) {
        final long seed = r.nextLong();
        threads[tid] = new Thread() {
            @Override// ww  w  .  j a va2s.  c  om
            public void run() {
                try {
                    Random r = new Random(seed);
                    startingGun.await();
                    LineFileDocs lineFileDocs = new LineFileDocs(r);
                    for (int i = 0; i < 10; i++) {
                        int numDocs = TestUtil.nextInt(r, 1, 200);
                        ByteArrayOutputStream bos = new ByteArrayOutputStream();
                        for (int j = 0; j < numDocs; j++) {
                            String s = lineFileDocs.nextDoc().get("body");
                            bos.write(s.getBytes(StandardCharsets.UTF_8));
                        }
                        doTest(bos.toByteArray());
                    }
                    lineFileDocs.close();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        threads[tid].start();
    }
    startingGun.countDown();
    for (Thread t : threads) {
        t.join();
    }
}

From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTestCase.java

License:Apache License

public void testMixed() throws IOException {
    Random r = getRandom();/*from  ww  w . j a  va  2s . c  om*/
    LineFileDocs lineFileDocs = new LineFileDocs(r);
    for (int i = 0; i < 2; ++i) {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        int prevInt = r.nextInt();
        long prevLong = r.nextLong();
        while (bos.size() < 400000) {
            switch (r.nextInt(4)) {
            case 0:
                addInt(r, prevInt, bos);
                break;
            case 1:
                addLong(r, prevLong, bos);
                break;
            case 2:
                addString(lineFileDocs, bos);
                break;
            case 3:
                addBytes(r, bos);
                break;
            default:
                throw new IllegalStateException("Random is broken");
            }
        }
        doTest(bos.toByteArray());
    }
}

From source file:org.elasticsearch.common.compress.AbstractCompressedStreamTests.java

License:Apache License

public void testLineDocsThreads() throws Exception {
    final Random r = getRandom();
    int threadCount = TestUtil.nextInt(r, 2, 10);
    Thread[] threads = new Thread[threadCount];
    final CountDownLatch startingGun = new CountDownLatch(1);
    for (int tid = 0; tid < threadCount; tid++) {
        final long seed = r.nextLong();
        threads[tid] = new Thread() {
            @Override//  ww w.  j  a v a 2  s.  c  om
            public void run() {
                try {
                    Random r = new Random(seed);
                    startingGun.await();
                    LineFileDocs lineFileDocs = new LineFileDocs(r);
                    for (int i = 0; i < 10; i++) {
                        int numDocs = TestUtil.nextInt(r, 1, 200);
                        ByteArrayOutputStream bos = new ByteArrayOutputStream();
                        for (int j = 0; j < numDocs; j++) {
                            String s = lineFileDocs.nextDoc().get("body");
                            bos.write(s.getBytes(StandardCharsets.UTF_8));
                        }
                        doTest(bos.toByteArray());
                    }
                    lineFileDocs.close();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        threads[tid].start();
    }
    startingGun.countDown();
    for (Thread t : threads) {
        t.join();
    }
}

From source file:org.elasticsearch.common.compress.DeflateCompressTests.java

License:Apache License

public void testLineDocs() throws IOException {
    Random r = random();//from   ww  w .  j ava  2 s  .com
    LineFileDocs lineFileDocs = new LineFileDocs(r);
    for (int i = 0; i < 10; i++) {
        int numDocs = TestUtil.nextInt(r, 1, 200);
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        for (int j = 0; j < numDocs; j++) {
            String s = lineFileDocs.nextDoc().get("body");
            bos.write(s.getBytes(StandardCharsets.UTF_8));
        }
        doTest(bos.toByteArray());
    }
    lineFileDocs.close();
}

From source file:org.elasticsearch.common.compress.DeflateCompressTests.java

License:Apache License

public void testLineDocsThreads() throws Exception {
    final Random r = random();
    int threadCount = TestUtil.nextInt(r, 2, 6);
    Thread[] threads = new Thread[threadCount];
    final CountDownLatch startingGun = new CountDownLatch(1);
    for (int tid = 0; tid < threadCount; tid++) {
        final long seed = r.nextLong();
        threads[tid] = new Thread() {
            @Override//from   ww  w .  ja  v  a2s .  com
            public void run() {
                try {
                    Random r = new Random(seed);
                    startingGun.await();
                    LineFileDocs lineFileDocs = new LineFileDocs(r);
                    for (int i = 0; i < 10; i++) {
                        int numDocs = TestUtil.nextInt(r, 1, 200);
                        ByteArrayOutputStream bos = new ByteArrayOutputStream();
                        for (int j = 0; j < numDocs; j++) {
                            String s = lineFileDocs.nextDoc().get("body");
                            bos.write(s.getBytes(StandardCharsets.UTF_8));
                        }
                        doTest(bos.toByteArray());
                    }
                    lineFileDocs.close();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        };
        threads[tid].start();
    }
    startingGun.countDown();
    for (Thread t : threads) {
        t.join();
    }
}

From source file:org.elasticsearch.common.compress.DeflateCompressTests.java

License:Apache License

public void testMixed() throws IOException {
    Random r = random();/*from   w ww  .  j a  v  a  2  s  .  c  o  m*/
    LineFileDocs lineFileDocs = new LineFileDocs(r);
    for (int i = 0; i < 2; ++i) {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        int prevInt = r.nextInt();
        long prevLong = r.nextLong();
        while (bos.size() < 400000) {
            switch (r.nextInt(4)) {
            case 0:
                addInt(r, prevInt, bos);
                break;
            case 1:
                addLong(r, prevLong, bos);
                break;
            case 2:
                addString(lineFileDocs, bos);
                break;
            case 3:
                addBytes(r, bos);
                break;
            default:
                throw new IllegalStateException("Random is broken");
            }
        }
        doTest(bos.toByteArray());
    }
}

From source file:org.elasticsearch.index.translog.TranslogTests.java

License:Apache License

public void testTranslogOpsCountIsCorrect() throws IOException {
    List<Translog.Location> locations = new ArrayList<>();
    int numOps = randomIntBetween(100, 200);
    LineFileDocs lineFileDocs = new LineFileDocs(random()); // writes pretty big docs so we cross buffer boarders regularly
    for (int opsAdded = 0; opsAdded < numOps; opsAdded++) {
        locations.add(translog.add(new Translog.Index("test", "" + opsAdded,
                lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8")))));
        Translog.Snapshot snapshot = this.translog.newSnapshot();
        assertEquals(opsAdded + 1, snapshot.totalOperations());
        for (int i = 0; i < opsAdded; i++) {
            assertEquals("expected operation" + i + " to be in the current translog but wasn't",
                    translog.currentFileGeneration(), locations.get(i).generation);
            Translog.Operation next = snapshot.next();
            assertNotNull("operation " + i + " must be non-null", next);
        }//from  w ww .j  a v a2 s . co m
    }
}

From source file:org.elasticsearch.index.translog.TranslogTests.java

License:Apache License

public void testTragicEventCanBeAnyException() throws IOException {
    Path tempDir = createTempDir();
    final FailSwitch fail = new FailSwitch();
    TranslogConfig config = getTranslogConfig(tempDir);
    Translog translog = getFailableTranslog(fail, config, false, true, null);
    LineFileDocs lineFileDocs = new LineFileDocs(random()); // writes pretty big docs so we cross buffer boarders regularly
    translog.add(new Translog.Index("test", "1",
            lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
    fail.failAlways();//from w w w.ja v a  2 s.  c o  m
    try {
        Translog.Location location = translog.add(new Translog.Index("test", "2",
                lineFileDocs.nextDoc().toString().getBytes(Charset.forName("UTF-8"))));
        if (randomBoolean()) {
            translog.ensureSynced(location);
        } else {
            translog.sync();
        }
        //TODO once we have a mock FS that can simulate we can also fail on plain sync
        fail("WTF");
    } catch (UnknownException ex) {
        // w00t
    } catch (TranslogException ex) {
        assertTrue(ex.getCause() instanceof UnknownException);
    }
    assertFalse(translog.isOpen());
    assertTrue(translog.getTragicException() instanceof UnknownException);
}