Example usage for org.apache.lucene.index IndexWriter forceMerge

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter forceMerge.

Prototype

public void forceMerge(int maxNumSegments) throws IOException

Source Link

Document

Forces merge policy to merge segments until there are <= maxNumSegments .

Usage

From source file:org.apache.blur.utils.TableShardCountCollapser.java

License:Apache License

public void collapseShardsTo(int newShardCount) throws IOException {
    if (!validateCount(newShardCount)) {
        throw new RuntimeException("Count [" + newShardCount + "] is not valid, valid values are ["
                + getCollapsePossibilities() + "]");
    }/* w ww  .ja v a2 s .  c  om*/

    Path[] paths = getPaths();
    int numberOfShardsToMergePerPass = paths.length / newShardCount;
    for (int i = 0; i < newShardCount; i++) {
        System.out.println("Base Index [" + paths[i] + "]");
        IndexWriterConfig lconf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer());
        lconf.setCodec(new Blur024Codec());
        HdfsDirectory dir = new HdfsDirectory(getConf(), paths[i]);
        IndexWriter indexWriter = new IndexWriter(dir, lconf);
        Directory[] dirs = new Directory[numberOfShardsToMergePerPass - 1];
        Path[] pathsToDelete = new Path[numberOfShardsToMergePerPass - 1];
        for (int p = 1; p < numberOfShardsToMergePerPass; p++) {
            Path pathToMerge = paths[i + p * newShardCount];
            System.out.println("Merge [" + pathToMerge + "]");
            dirs[p - 1] = new HdfsDirectory(getConf(), pathToMerge);
            pathsToDelete[p - 1] = pathToMerge;
        }
        indexWriter.addIndexes(dirs);
        // Causes rewrite of of index and the symlinked files are
        // merged/rewritten.
        indexWriter.forceMerge(1);
        indexWriter.close();
        FileSystem fileSystem = path.getFileSystem(getConf());
        for (Path p : pathsToDelete) {
            fileSystem.delete(p, true);
        }
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.IndexMigration.java

License:Apache License

/**
 * Checks if the given <code>index</code> needs to be migrated.
 *
 * @param index the index to check and migration if needed.
 * @param directoryManager the directory manager.
 * @param oldSeparatorChar the old separator char that needs to be replaced.
 * @throws IOException if an error occurs while migrating the index.
 *//*from   w  w  w  .  j  a v a  2  s  .  c o m*/
public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar)
        throws IOException {
    Directory indexDir = index.getDirectory();
    log.debug("Checking {} ...", indexDir);
    ReadOnlyIndexReader reader = index.getReadOnlyIndexReader();
    try {
        if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) {
            // index was created with Jackrabbit 1.5 or higher
            // no need for migration
            log.debug("IndexFormatVersion >= V3, no migration needed");
            return;
        }
        // assert: there is at least one node in the index, otherwise the
        //         index format version would be at least V3
        TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, ""));
        try {
            Term t = terms.term();
            if (t.text().indexOf(oldSeparatorChar) == -1) {
                log.debug("Index already migrated");
                return;
            }
        } finally {
            terms.close();
        }
    } finally {
        reader.release();
        index.releaseWriterAndReaders();
    }

    // if we get here then the index must be migrated
    log.debug("Index requires migration {}", indexDir);

    String migrationName = index.getName() + "_v36";
    if (directoryManager.hasDirectory(migrationName)) {
        directoryManager.delete(migrationName);
    }

    Directory migrationDir = directoryManager.getDirectory(migrationName);
    final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer());
    c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy()));
    c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
    try {
        IndexWriter writer = new IndexWriter(migrationDir, c);
        try {
            IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar);
            try {
                writer.addIndexes(r);
                writer.forceMerge(1);
                writer.close();
            } finally {
                r.close();
            }
        } finally {
            writer.close();
        }
    } finally {
        migrationDir.close();
    }
    directoryManager.delete(index.getName());
    if (!directoryManager.rename(migrationName, index.getName())) {
        throw new IOException("failed to move migrated directory " + migrationDir);
    }
    log.info("Migrated " + index.getName());
}

From source file:org.apache.solr.codecs.test.testMergeSegments.java

License:Apache License

public static void main(String[] args) {
    try {/* w  w w . j  a v a  2s .  c o  m*/
        testUtil.initPropsONSQL();
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer);
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.forceMerge(1);
        writer.close();

    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.spelling.FileBasedSpellChecker.java

License:Apache License

private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
    try {// w  w w .  ja  v a  2  s.  c o m
        IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
        // Get the field's analyzer
        if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            // Do index-time analysis using the given fieldType's analyzer
            RAMDirectory ramDir = new RAMDirectory();

            LogMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setMergeFactor(300);

            IndexWriter writer = new IndexWriter(ramDir,
                    new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer())
                            .setMaxBufferedDocs(150).setMergePolicy(mp)
                            .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
            // TODO: if we enable this, codec gets angry since field won't exist in the schema
            // .setCodec(core.getCodec())
            );

            List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

            for (String s : lines) {
                Document d = new Document();
                d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
                writer.addDocument(d);
            }
            writer.forceMerge(1);
            writer.close();

            dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
        } else {
            // check if character encoding is defined
            if (characterEncoding == null) {
                dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
            } else {
                dictionary = new PlainTextDictionary(new InputStreamReader(
                        core.getResourceLoader().openResource(sourceLocation), characterEncoding));
            }
        }

    } catch (IOException e) {
        log.error("Unable to load spellings", e);
    }
}

From source file:org.apache.solr.spelling.IndexBasedSpellCheckerTest.java

License:Apache License

@Test
public void testAlternateLocation() throws Exception {
    String[] ALT_DOCS = new String[] { "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band",
            "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" };

    IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
    NamedList spellchecker = new NamedList();
    spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

    File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime());
    //create a standalone index
    File altIndexDir = new File(TEMP_DIR, "alternateIdx" + new Date().getTime());
    Directory dir = newFSDirectory(altIndexDir);
    IndexWriter iw = new IndexWriter(dir,
            new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
    for (int i = 0; i < ALT_DOCS.length; i++) {
        Document doc = new Document();
        doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
        iw.addDocument(doc);//  ww w .  ja  v  a 2 s . c o m
    }
    iw.forceMerge(1);
    iw.close();
    dir.close();
    indexDir.mkdirs();
    spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
    spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
    spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
    spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
    SolrCore core = h.getCore();
    String dictName = checker.init(spellchecker, core);
    assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
            dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
    RefCounted<SolrIndexSearcher> holder = core.getSearcher();
    SolrIndexSearcher searcher = holder.get();
    try {
        checker.build(core, searcher);

        IndexReader reader = searcher.getIndexReader();
        Collection<Token> tokens = queryConverter.convert("flesh");
        SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1,
                SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
        SpellingResult result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        //should be lowercased, b/c we are using a lowercasing analyzer
        Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("flesh is null and it shouldn't be", suggestions != null);
        assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
        Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
        assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
        assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

        //test something not in the spell checker
        spellOpts.tokens = queryConverter.convert("super");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions size should be 0", suggestions.size() == 0);

        spellOpts.tokens = queryConverter.convert("Caroline");
        result = checker.getSuggestions(spellOpts);
        assertTrue("result is null and it shouldn't be", result != null);
        suggestions = result.get(spellOpts.tokens.iterator().next());
        assertTrue("suggestions is not null and it should be", suggestions == null);
    } finally {
        holder.decref();
    }
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testNumericEncoded32() throws IOException {
    Directory dir = newDirectory();/*from  w w w . ja  v  a  2 s .  co  m*/
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
    doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));

    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testNumericEncoded64() throws IOException {
    Directory dir = newDirectory();/*from  ww  w  . j ava 2s. c  om*/
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));

    Document doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
    doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));

    value = v.lookupOrd(1);
    assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testActuallySingleValued() throws IOException {
    Directory dir = newDirectory();//from w w w  .j  av a  2s .  c  o  m
    IndexWriterConfig iwconfig = newIndexWriterConfig(null);
    iwconfig.setMergePolicy(newLogMergePolicy());
    IndexWriter iw = new IndexWriter(dir, iwconfig);

    Document doc = new Document();
    doc.add(new StringField("foo", "bar", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    doc = new Document();
    iw.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    doc.add(new StringField("foo", "baz", Field.Store.NO));
    iw.addDocument(doc);

    iw.forceMerge(1);
    iw.close();

    DirectoryReader ir = DirectoryReader.open(dir);
    LeafReader ar = getOnlyLeafReader(ir);

    SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
    assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
    assertEquals(2, v.getValueCount());

    assertEquals(0, v.nextDoc());
    assertEquals(0, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(1, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    assertEquals(3, v.nextDoc());
    assertEquals(1, v.nextOrd());
    assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());

    BytesRef value = v.lookupOrd(0);
    assertEquals("bar", value.utf8ToString());

    value = v.lookupOrd(1);
    assertEquals("baz", value.utf8ToString());

    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCache.java

License:Apache License

@BeforeClass
public static void beforeClass() throws Exception {
    NUM_DOCS = atLeast(500);//from  w ww  .j av a 2 s .  c  o  m
    NUM_ORDS = atLeast(2);
    directory = newDirectory();
    IndexWriter writer = new IndexWriter(directory,
            new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
    long theLong = Long.MAX_VALUE;
    double theDouble = Double.MAX_VALUE;
    int theInt = Integer.MAX_VALUE;
    float theFloat = Float.MAX_VALUE;
    unicodeStrings = new String[NUM_DOCS];
    multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
    if (VERBOSE) {
        System.out.println("TEST: setUp");
    }
    for (int i = 0; i < NUM_DOCS; i++) {
        Document doc = new Document();
        doc.add(new LongPoint("theLong", theLong--));
        doc.add(new DoublePoint("theDouble", theDouble--));
        doc.add(new IntPoint("theInt", theInt--));
        doc.add(new FloatPoint("theFloat", theFloat--));
        if (i % 2 == 0) {
            doc.add(new IntPoint("sparse", i));
        }

        if (i % 2 == 0) {
            doc.add(new IntPoint("numInt", i));
        }

        // sometimes skip the field:
        if (random().nextInt(40) != 17) {
            unicodeStrings[i] = generateString(i);
            doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
        }

        // sometimes skip the field:
        if (random().nextInt(10) != 8) {
            for (int j = 0; j < NUM_ORDS; j++) {
                String newValue = generateString(i);
                multiValued[i][j] = new BytesRef(newValue);
                doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
            }
            Arrays.sort(multiValued[i]);
        }
        writer.addDocument(doc);
    }
    writer.forceMerge(1); // this test relies on one segment and docid order
    IndexReader r = DirectoryReader.open(writer);
    assertEquals(1, r.leaves().size());
    reader = r.leaves().get(0).reader();
    TestUtil.checkReader(reader);
    writer.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheSort.java

License:Apache License

/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
    Directory indexStore = newDirectory();
    IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
    for (int i = 0; i < 5; i++) {
        Document doc = new Document();
        doc.add(new StringField("string", "a" + i, Field.Store.NO));
        doc.add(new StringField("string", "b" + i, Field.Store.NO));
        writer.addDocument(doc);/*from w w  w  .  ja  v  a2 s .com*/
    }
    writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
    writer.close();
    Sort sort = new Sort(new SortField("string", SortField.Type.STRING), SortField.FIELD_DOC);
    IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
            Collections.singletonMap("string", Type.SORTED));
    IndexSearcher searcher = new IndexSearcher(reader);
    expectThrows(IllegalStateException.class, () -> {
        searcher.search(new MatchAllDocsQuery(), 500, sort);
    });
    reader.close();
    indexStore.close();
}