List of usage examples for org.apache.lucene.index RandomIndexWriter forceMerge
public void forceMerge(int maxSegmentCount) throws IOException
From source file:com.sindicetech.siren.util.SirenTestCase.java
License:Open Source License
protected void forceMerge(final RandomIndexWriter writer) throws IOException { writer.forceMerge(1); }
From source file:org.apache.solr.search.function.TestOrdValues.java
License:Apache License
protected static void createIndex(boolean doMultiSegment) throws Exception { if (VERBOSE) { System.out.println("TEST: setUp"); }/*from www. j a v a 2 s . c o m*/ // prepare a small index with just a few documents. dir = newDirectory(); anlzr = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy()); if (doMultiSegment) { iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7)); } RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); // add docs not exactly in natural ID order, to verify we do check the order of docs by scores int remaining = N_DOCS; boolean done[] = new boolean[N_DOCS]; int i = 0; while (remaining > 0) { if (done[i]) { throw new Exception( "to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!"); } addDoc(iw, i); done[i] = true; i = (i + 4) % N_DOCS; remaining--; } if (!doMultiSegment) { if (VERBOSE) { System.out.println("TEST: setUp full merge"); } iw.forceMerge(1); } iw.close(); if (VERBOSE) { System.out.println("TEST: setUp done close"); } }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testSortedTermsEnum() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("field", "hello", Field.Store.NO)); iwriter.addDocument(doc);/* ww w. j a v a 2 s. c o m*/ doc = new Document(); doc.add(new StringField("field", "world", Field.Store.NO)); // we need a second value for a doc, or we don't actually test DocTermOrds! doc.add(new StringField("field", "hello", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("field", "beer", Field.Store.NO)); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); LeafReader ar = getOnlyLeafReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null); assertEquals(3, dv.getValueCount()); TermsEnum termsEnum = dv.termsEnum(); // next() assertEquals("beer", termsEnum.next().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); // seekCeil() assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz"))); // seekExact() assertTrue(termsEnum.seekExact(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("hello"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("world"))); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); assertFalse(termsEnum.seekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.seekExact(0); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); termsEnum.seekExact(1); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); termsEnum.seekExact(2); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); // lookupTerm(BytesRef) assertEquals(-1, dv.lookupTerm(new BytesRef("apple"))); assertEquals(0, dv.lookupTerm(new BytesRef("beer"))); assertEquals(-2, dv.lookupTerm(new BytesRef("car"))); assertEquals(1, dv.lookupTerm(new BytesRef("hello"))); assertEquals(-3, dv.lookupTerm(new BytesRef("matter"))); assertEquals(2, dv.lookupTerm(new BytesRef("world"))); assertEquals(-4, dv.lookupTerm(new BytesRef("zany"))); ireader.close(); directory.close(); }
From source file:org.apache.solr.uninverting.TestFieldCache.java
License:Apache License
public void testLongFieldCache() throws IOException { Directory dir = newDirectory();//from w w w . j av a2 s. co m IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); Document doc = new Document(); LongPoint field = new LongPoint("f", 0L); StoredField field2 = new StoredField("f", 0L); doc.add(field); doc.add(field2); final long[] values = new long[TestUtil.nextInt(random(), 1, 10)]; Set<Integer> missing = new HashSet<>(); for (int i = 0; i < values.length; ++i) { final long v; switch (random().nextInt(10)) { case 0: v = Long.MIN_VALUE; break; case 1: v = 0; break; case 2: v = Long.MAX_VALUE; break; default: v = TestUtil.nextLong(random(), -10, 10); break; } values[i] = v; if (v == 0 && random().nextBoolean()) { // missing iw.addDocument(new Document()); missing.add(i); } else { field.setLongValue(v); field2.setLongValue(v); iw.addDocument(doc); } } iw.forceMerge(1); final DirectoryReader reader = iw.getReader(); final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER); for (int i = 0; i < values.length; ++i) { if (missing.contains(i) == false) { assertEquals(i, longs.nextDoc()); assertEquals(values[i], longs.longValue()); } } assertEquals(NO_MORE_DOCS, longs.nextDoc()); reader.close(); iw.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCache.java
License:Apache License
public void testIntFieldCache() throws IOException { Directory dir = newDirectory();/*from w w w . j a va 2s .c o m*/ IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); Document doc = new Document(); IntPoint field = new IntPoint("f", 0); doc.add(field); final int[] values = new int[TestUtil.nextInt(random(), 1, 10)]; Set<Integer> missing = new HashSet<>(); for (int i = 0; i < values.length; ++i) { final int v; switch (random().nextInt(10)) { case 0: v = Integer.MIN_VALUE; break; case 1: v = 0; break; case 2: v = Integer.MAX_VALUE; break; default: v = TestUtil.nextInt(random(), -10, 10); break; } values[i] = v; if (v == 0 && random().nextBoolean()) { // missing iw.addDocument(new Document()); missing.add(i); } else { field.setIntValue(v); iw.addDocument(doc); } } iw.forceMerge(1); final DirectoryReader reader = iw.getReader(); final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER); for (int i = 0; i < values.length; ++i) { if (missing.contains(i) == false) { assertEquals(i, ints.nextDoc()); assertEquals(values[i], ints.longValue()); } } assertEquals(NO_MORE_DOCS, ints.nextDoc()); reader.close(); iw.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java
License:Apache License
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception { Directory dir = newDirectory();/*from www .j a v a 2 s . co m*/ IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field idField = new StringField("id", Integer.toString(i), Field.Store.NO); doc.add(idField); final int length = TestUtil.nextInt(random(), minLength, maxLength); int numValues = random().nextInt(17); // create a random list of strings List<String> values = new ArrayList<>(); for (int v = 0; v < numValues; v++) { values.add(TestUtil.randomSimpleString(random(), minLength, length)); } // add in any order to the indexed field ArrayList<String> unordered = new ArrayList<>(values); Collections.shuffle(unordered, random()); for (String v : values) { doc.add(newStringField("indexed", v, Field.Store.NO)); } // add in any order to the dv field ArrayList<String> unordered2 = new ArrayList<>(values); Collections.shuffle(unordered2, random()); for (String v : unordered2) { doc.add(new SortedSetDocValuesField("dv", new BytesRef(v))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare per-segment DirectoryReader ir = writer.getReader(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null); SortedSetDocValues actual = r.getSortedSetDocValues("dv"); assertEquals(r.maxDoc(), expected, actual); } ir.close(); writer.forceMerge(1); // now compare again after the merge ir = writer.getReader(); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null); SortedSetDocValues actual = ar.getSortedSetDocValues("dv"); assertEquals(ir.maxDoc(), expected, actual); ir.close(); writer.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java
License:Apache License
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception { Directory dir = newDirectory();/* w ww . j a va 2 s . c o m*/ IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = newStringField("indexed", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 0); // index some docs int numDocs = atLeast(300); // numDocs should be always > 256 so that in case of a codec that optimizes // for numbers of values <= 256, all storage layouts are tested assert numDocs > 256; for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); long value = longs.next(); indexedField.setStringValue(Long.toString(value)); dvField.setLongValue(value); Document doc = new Document(); doc.add(idField); // 1/4 of the time we neglect to add the fields if (random().nextInt(4) > 0) { doc.add(indexedField); doc.add(dvField); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // merge some segments and ensure that at least one of them has more than // 256 values writer.forceMerge(numDocs / 256); writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null); Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null); assertEquals(expected, actual); } ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheWithThreads.java
License:Apache License
public void test2() throws Exception { Random random = random();/*from w w w . j a v a 2 s . c o m*/ final int NUM_DOCS = atLeast(100); final Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, dir); final boolean allowDups = random.nextBoolean(); final Set<String> seen = new HashSet<>(); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; final List<BytesRef> docValues = new ArrayList<>(); // TODO: deletions while (numDocs < NUM_DOCS) { final String s; if (random.nextBoolean()) { s = TestUtil.randomSimpleString(random); } else { s = TestUtil.randomUnicodeString(random); } final BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.contains(s)) { continue; } seen.add(s); } if (VERBOSE) { System.out.println(" " + numDocs + ": s=" + s); } final Document doc = new Document(); doc.add(new SortedDocValuesField("stringdv", br)); doc.add(new NumericDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; if (random.nextInt(40) == 17) { // force flush writer.getReader().close(); } } writer.forceMerge(1); final DirectoryReader r = writer.getReader(); writer.close(); final LeafReader sr = getOnlyLeafReader(r); final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS); final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10); Thread[] threads = new Thread[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new Thread() { @Override public void run() { Random random = random(); final SortedDocValues stringDVDirect; final NumericDocValues docIDToID; try { stringDVDirect = sr.getSortedDocValues("stringdv"); docIDToID = sr.getNumericDocValues("id"); assertNotNull(stringDVDirect); } catch (IOException ioe) { throw new RuntimeException(ioe); } int[] docIDToIDArray = new int[sr.maxDoc()]; for (int i = 0; i < sr.maxDoc(); i++) { try { assertEquals(i, docIDToID.nextDoc()); } catch (IOException ioe) { throw new RuntimeException(ioe); } try { docIDToIDArray[i] = (int) docIDToID.longValue(); } catch (IOException ioe) { throw new RuntimeException(ioe); } } while (System.nanoTime() < END_TIME) { for (int iter = 0; iter < 100; iter++) { final int docID = random.nextInt(sr.maxDoc()); try { SortedDocValues dvs = sr.getSortedDocValues("stringdv"); assertEquals(docID, dvs.advance(docID)); assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue()); } catch (IOException ioe) { throw new RuntimeException(ioe); } } } } }; threads[thread].start(); } for (Thread thread : threads) { thread.join(); } r.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestLegacyFieldCache.java
License:Apache License
public void testLongFieldCache() throws IOException { Directory dir = newDirectory();//from www. j a va2 s . co m IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); Document doc = new Document(); LegacyLongField field = new LegacyLongField("f", 0L, Store.YES); doc.add(field); final long[] values = new long[TestUtil.nextInt(random(), 1, 10)]; Set<Integer> missing = new HashSet<>(); for (int i = 0; i < values.length; ++i) { final long v; switch (random().nextInt(10)) { case 0: v = Long.MIN_VALUE; break; case 1: v = 0; break; case 2: v = Long.MAX_VALUE; break; default: v = TestUtil.nextLong(random(), -10, 10); break; } values[i] = v; if (v == 0 && random().nextBoolean()) { // missing iw.addDocument(new Document()); missing.add(i); } else { field.setLongValue(v); iw.addDocument(doc); } } iw.forceMerge(1); final DirectoryReader reader = iw.getReader(); final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER); for (int i = 0; i < values.length; ++i) { if (missing.contains(i) == false) { assertEquals(i, longs.nextDoc()); assertEquals(values[i], longs.longValue()); } } assertEquals(NO_MORE_DOCS, longs.nextDoc()); reader.close(); iw.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestLegacyFieldCache.java
License:Apache License
public void testIntFieldCache() throws IOException { Directory dir = newDirectory();/*from w w w. jav a 2 s. c om*/ IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); Document doc = new Document(); LegacyIntField field = new LegacyIntField("f", 0, Store.YES); doc.add(field); final int[] values = new int[TestUtil.nextInt(random(), 1, 10)]; Set<Integer> missing = new HashSet<>(); for (int i = 0; i < values.length; ++i) { final int v; switch (random().nextInt(10)) { case 0: v = Integer.MIN_VALUE; break; case 1: v = 0; break; case 2: v = Integer.MAX_VALUE; break; default: v = TestUtil.nextInt(random(), -10, 10); break; } values[i] = v; if (v == 0 && random().nextBoolean()) { // missing iw.addDocument(new Document()); missing.add(i); } else { field.setIntValue(v); iw.addDocument(doc); } } iw.forceMerge(1); final DirectoryReader reader = iw.getReader(); final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER); for (int i = 0; i < values.length; ++i) { if (missing.contains(i) == false) { assertEquals(i, ints.nextDoc()); assertEquals(values[i], ints.longValue()); } } assertEquals(NO_MORE_DOCS, ints.nextDoc()); reader.close(); iw.close(); dir.close(); }