Example usage for org.apache.lucene.index IndexWriterConfig setCodec

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setCodec.

Prototype

public IndexWriterConfig setCodec(Codec codec)

Source Link

Document

Set the Codec .

Usage

From source file:org.apache.solr.codecs.test.testMergeSegments.java

License:Apache License

public static void main(String[] args) {
    try {//from   w w w.java 2  s  . c o  m
        testUtil.initPropsONSQL();
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer);
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.forceMerge(1);
        writer.close();

    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.codecs.test.testONSQLCodec.java

License:Apache License

public static void main(String[] args) {
    try {//from  ww  w .  j av  a2  s.c  o m
        plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER));
        testUtil.initPropsONSQL();
        //----------- index documents -------
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer);
        // recreate the index on each execution
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        //config.setCodec(new SimpleTextCodec());            
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = FSDirectory.open(plaintextDir);
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES),
                new TextField("content", "The content of the first document", Store.YES),
                new IntField("intval", 111111, Store.YES), new LongField("longval", 1111111111L, Store.YES)));

        writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES),
                new TextField("content", "The content of the second document", Store.YES),
                new IntField("intval", 222222, Store.YES), new LongField("longval", 222222222L, Store.YES)));
        writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES),
                new TextField("content", "The content of the third document", Store.YES),
                new IntField("intval", 333333, Store.YES), new LongField("longval", 3333333333L, Store.YES)));
        writer.commit();
        writer.close();
        IndexReader reader = DirectoryReader.open(luceneDir);
        // now test for docs
        if (reader.numDocs() < 3)
            throw new IOException("amount of returned docs are less than indexed");
        else
            System.out.println("test passed");
        searchIndex("content", "third");
    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.codecs.test.testONSQLWrapperDirectory.java

License:Apache License

public static void main(String[] args) {
    try {/* ww w .  j  a va  2s .c om*/
        testUtil.initPropsONSQL();
        //----------- index documents -------
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1);
        //Codec cd = new ONSQLCodec("omega");
        //Codec.setDefault(cd);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer);
        // recreate the index on each execution
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        //config.setCodec(new SimpleTextCodec());            
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
        String[] list = luceneDir.listAll();
        System.out.println("listall length=" + list.length);
        for (int i = 0; i < list.length; i++) {
            System.out.println(list[i]);
        }
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.addDocument(Arrays.asList(new TextField("title", "The title of the first document", Store.YES),
                new TextField("content", "The content of the first document", Store.YES),
                new TextField("global_bu_id", "1b", Store.YES),
                new TextField("omega_order_num", "1n", Store.YES)));

        writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES),
                new TextField("content", "The content of the second document", Store.YES),
                new TextField("global_bu_id", "1k", Store.YES),
                new TextField("omega_order_num", "2b", Store.YES)));
        writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES),
                new TextField("content", "The content of the third document", Store.YES),
                new TextField("global_bu_id", "2k", Store.YES),
                new TextField("omega_order_num", "3b", Store.YES)));

        writer.addDocument(Arrays.asList(new TextField("title", "The title of the fourth document", Store.YES),
                new TextField("content", "The content of the fourth document", Store.YES),
                new TextField("global_bu_id", "2k", Store.YES),
                new TextField("omega_order_num", "4b", Store.YES)));

        //writer.commit();
        writer.close();
        /*
        IndexReader reader = DirectoryReader.open(luceneDir);   
        // now test for docs
        if (reader.numDocs() != 3)
        throw new IOException("amount of returned docs are less than indexed");
        else
        System.out.println("test passed");
        */

        searchIndex("content", "second");
        System.out.println("---- now we delete docs for second document----");
        deleteDocs("content", "second");
        System.out.println("--- and repeat search-----");
        searchIndex("content", "second");
    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.codecs.test.testONSQLWrapperDirectory.java

License:Apache License

public static void deleteDocs(String searchField, String searchString) throws IOException, ParseException {
    System.out.println("deleting docs for '" + searchString + "'");
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1,
            new StandardAnalyzer(Version.LUCENE_4_10_1));
    config.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    ONSQLCodec codec = new ONSQLCodec();

    config.setCodec(codec);
    config.setUseCompoundFile(false);/*  w  w  w .  j  a v a  2s.c  o  m*/
    Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
    IndexWriter writer = new IndexWriter(luceneDir, config);
    QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_1, searchField,
            new StandardAnalyzer(Version.LUCENE_4_10_1));
    Query query = queryParser.parse(searchString);
    writer.deleteDocuments(query);
    writer.commit();
    writer.close();
    luceneDir.close();
    System.out.println("docs were deleted");
}

From source file:org.apache.solr.codecs.test.testSimpleTextCodec.java

License:Apache License

public static void main(String[] args) {
    try {/*from   w w  w.  j  av  a 2s  . com*/
        plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER, "plaintext"));

        //----------- index documents -------
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer);
        // recreate the index on each execution
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        config.setCodec(new SimpleTextCodec());
        config.setUseCompoundFile(false);
        Directory luceneDir = FSDirectory.open(plaintextDir);
        IndexWriter writer = new IndexWriter(luceneDir, config);
        writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES),
                new TextField("content", "The content of the first document", Store.YES)));

        writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES),
                new TextField("content", "The content of the second document", Store.YES)));
        writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES),
                new TextField("content", "The content of the third document", Store.YES)));
        writer.commit();
        writer.close();
        IndexReader reader = DirectoryReader.open(luceneDir);
        // now test for docs
        if (reader.numDocs() != 3)
            throw new IOException("amount of returned docs are less than indexed");
        else
            System.out.println("test passed");
        searchIndex("content", "third");
    } catch (Throwable te) {
        te.printStackTrace();
    }
}

From source file:org.apache.solr.core.snapshots.SolrSnapshotManager.java

License:Apache License

/**
 * This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
 *
 * @param core The Solr core/*ww w. j a v  a2s. c  o  m*/
 * @param dir The index directory storing the snapshot.
 * @throws IOException in case of I/O errors.
 */
private static void deleteSnapshotIndexFiles(SolrCore core, Directory dir, IndexDeletionPolicy delPolicy)
        throws IOException {
    IndexWriterConfig conf = core.getSolrConfig().indexConfig.toIndexWriterConfig(core);
    conf.setOpenMode(OpenMode.APPEND);
    conf.setMergePolicy(NoMergePolicy.INSTANCE);//Don't want to merge any commits here!
    conf.setIndexDeletionPolicy(delPolicy);
    conf.setCodec(core.getCodec());

    try (SolrIndexWriter iw = new SolrIndexWriter("SolrSnapshotCleaner", dir, conf)) {
        // Do nothing. The only purpose of opening index writer is to invoke the Lucene IndexDeletionPolicy#onInit
        // method so that we can cleanup the files associated with specified index commit.
        // Note the index writer creates a new commit during the close() operation (which is harmless).
    }
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testRandom() throws Exception {
    Directory dir = newDirectory();/*from w ww .  j  av  a 2 s  .co m*/

    final int NUM_TERMS = atLeast(20);
    final Set<BytesRef> terms = new HashSet<>();
    while (terms.size() < NUM_TERMS) {
        final String s = TestUtil.randomRealisticUnicodeString(random());
        //final String s = _TestUtil.randomSimpleString(random);
        if (s.length() > 0) {
            terms.add(new BytesRef(s));
        }
    }
    final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
    Arrays.sort(termsArray);

    final int NUM_DOCS = atLeast(100);

    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));

    // Sometimes swap in codec that impls ord():
    if (random().nextInt(10) == 7) {
        // Make sure terms index has ords:
        Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
        conf.setCodec(codec);
    }

    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);

    final int[][] idToOrds = new int[NUM_DOCS][];
    final Set<Integer> ordsForDocSet = new HashSet<>();

    for (int id = 0; id < NUM_DOCS; id++) {
        Document doc = new Document();

        doc.add(new LegacyIntField("id", id, Field.Store.YES));

        final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
        while (ordsForDocSet.size() < termCount) {
            ordsForDocSet.add(random().nextInt(termsArray.length));
        }
        final int[] ordsForDoc = new int[termCount];
        int upto = 0;
        if (VERBOSE) {
            System.out.println("TEST: doc id=" + id);
        }
        for (int ord : ordsForDocSet) {
            ordsForDoc[upto++] = ord;
            Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
            if (VERBOSE) {
                System.out.println("  f=" + termsArray[ord].utf8ToString());
            }
            doc.add(field);
        }
        ordsForDocSet.clear();
        Arrays.sort(ordsForDoc);
        idToOrds[id] = ordsForDoc;
        w.addDocument(doc);
    }

    final DirectoryReader r = w.getReader();
    w.close();

    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
    }

    for (LeafReaderContext ctx : r.leaves()) {
        if (VERBOSE) {
            System.out.println("\nTEST: sub=" + ctx.reader());
        }
        verify(ctx.reader(), idToOrds, termsArray, null);
    }

    // Also test top-level reader: its enum does not support
    // ord, so this forces the OrdWrapper to run:
    if (VERBOSE) {
        System.out.println("TEST: top reader");
    }
    LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(slowR);
    verify(slowR, idToOrds, termsArray, null);

    FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());

    r.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

public void testRandomWithPrefix() throws Exception {
    Directory dir = newDirectory();//from   ww  w.  ja va 2 s.  c o m

    final Set<String> prefixes = new HashSet<>();
    final int numPrefix = TestUtil.nextInt(random(), 2, 7);
    if (VERBOSE) {
        System.out.println("TEST: use " + numPrefix + " prefixes");
    }
    while (prefixes.size() < numPrefix) {
        prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
        //prefixes.add(_TestUtil.randomSimpleString(random));
    }
    final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);

    final int NUM_TERMS = atLeast(20);
    final Set<BytesRef> terms = new HashSet<>();
    while (terms.size() < NUM_TERMS) {
        final String s = prefixesArray[random().nextInt(prefixesArray.length)]
                + TestUtil.randomRealisticUnicodeString(random());
        //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
        if (s.length() > 0) {
            terms.add(new BytesRef(s));
        }
    }
    final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
    Arrays.sort(termsArray);

    final int NUM_DOCS = atLeast(100);

    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));

    // Sometimes swap in codec that impls ord():
    if (random().nextInt(10) == 7) {
        Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
        conf.setCodec(codec);
    }

    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);

    final int[][] idToOrds = new int[NUM_DOCS][];
    final Set<Integer> ordsForDocSet = new HashSet<>();

    for (int id = 0; id < NUM_DOCS; id++) {
        Document doc = new Document();

        doc.add(new LegacyIntField("id", id, Field.Store.YES));

        final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
        while (ordsForDocSet.size() < termCount) {
            ordsForDocSet.add(random().nextInt(termsArray.length));
        }
        final int[] ordsForDoc = new int[termCount];
        int upto = 0;
        if (VERBOSE) {
            System.out.println("TEST: doc id=" + id);
        }
        for (int ord : ordsForDocSet) {
            ordsForDoc[upto++] = ord;
            Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
            if (VERBOSE) {
                System.out.println("  f=" + termsArray[ord].utf8ToString());
            }
            doc.add(field);
        }
        ordsForDocSet.clear();
        Arrays.sort(ordsForDoc);
        idToOrds[id] = ordsForDoc;
        w.addDocument(doc);
    }

    final DirectoryReader r = w.getReader();
    w.close();

    if (VERBOSE) {
        System.out.println("TEST: reader=" + r);
    }

    LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
    TestUtil.checkReader(slowR);
    for (String prefix : prefixesArray) {

        final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);

        final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
        for (int id = 0; id < NUM_DOCS; id++) {
            final int[] docOrds = idToOrds[id];
            final List<Integer> newOrds = new ArrayList<>();
            for (int ord : idToOrds[id]) {
                if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
                    newOrds.add(ord);
                }
            }
            final int[] newOrdsArray = new int[newOrds.size()];
            int upto = 0;
            for (int ord : newOrds) {
                newOrdsArray[upto++] = ord;
            }
            idToOrdsPrefix[id] = newOrdsArray;
        }

        for (LeafReaderContext ctx : r.leaves()) {
            if (VERBOSE) {
                System.out.println("\nTEST: sub=" + ctx.reader());
            }
            verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
        }

        // Also test top-level reader: its enum does not support
        // ord, so this forces the OrdWrapper to run:
        if (VERBOSE) {
            System.out.println("TEST: top reader");
        }
        verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
    }

    FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());

    r.close();
    dir.close();
}

From source file:org.elasticsearch.index.codec.postingformat.DefaultPostingsFormatTests.java

License:Apache License

@Test
public void testUseDefault() throws IOException {

    Codec codec = new TestCodec();
    Directory d = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, new WhitespaceAnalyzer(Lucene.VERSION));
    config.setCodec(codec);
    IndexWriter writer = new IndexWriter(d, config);
    writer.addDocument(Arrays.asList(new TextField("foo", "bar", Store.YES),
            new TextField(UidFieldMapper.NAME, "1234", Store.YES)));
    writer.commit();/*from  ww  w .  ja  v a 2  s.c  om*/
    DirectoryReader reader = DirectoryReader.open(writer, false);
    List<AtomicReaderContext> leaves = reader.leaves();
    assertThat(leaves.size(), equalTo(1));
    AtomicReader ar = leaves.get(0).reader();
    Terms terms = ar.terms("foo");
    Terms uidTerms = ar.terms(UidFieldMapper.NAME);

    assertThat(terms.size(), equalTo(1l));
    assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
    assertThat(uidTerms, instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class));

    reader.close();
    writer.close();
    d.close();
}

From source file:org.elasticsearch.index.codec.postingformat.DefaultPostingsFormatTests.java

License:Apache License

@Test
public void testNoUIDField() throws IOException {

    Codec codec = new TestCodec();
    Directory d = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, new WhitespaceAnalyzer(Lucene.VERSION));
    config.setCodec(codec);
    IndexWriter writer = new IndexWriter(d, config);
    for (int i = 0; i < 100; i++) {
        writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES),
                new TextField("some_other_field", "1234", Store.YES)));
    }/*  w ww .  j ava2s .  co  m*/
    writer.forceMerge(1);
    writer.commit();

    DirectoryReader reader = DirectoryReader.open(writer, false);
    List<AtomicReaderContext> leaves = reader.leaves();
    assertThat(leaves.size(), equalTo(1));
    AtomicReader ar = leaves.get(0).reader();
    Terms terms = ar.terms("foo");
    Terms some_other_field = ar.terms("some_other_field");

    assertThat(terms.size(), equalTo(2l));
    assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
    assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
    TermsEnum iterator = terms.iterator(null);
    Set<String> expected = new HashSet<String>();
    expected.add("foo");
    expected.add("bar");
    while (iterator.next() != null) {
        expected.remove(iterator.term().utf8ToString());
    }
    assertThat(expected.size(), equalTo(0));
    reader.close();
    writer.close();
    d.close();
}