List of usage examples for org.apache.lucene.index IndexWriterConfig setCodec
public IndexWriterConfig setCodec(Codec codec)
From source file:org.apache.solr.codecs.test.testMergeSegments.java
License:Apache License
public static void main(String[] args) { try {//from w w w.java 2 s . c o m testUtil.initPropsONSQL(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); writer.forceMerge(1); writer.close(); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testONSQLCodec.java
License:Apache License
public static void main(String[] args) { try {//from ww w . j av a2 s.c o m plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER)); testUtil.initPropsONSQL(); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //config.setCodec(new SimpleTextCodec()); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = FSDirectory.open(plaintextDir); IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.YES), new IntField("intval", 111111, Store.YES), new LongField("longval", 1111111111L, Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES), new IntField("intval", 222222, Store.YES), new LongField("longval", 222222222L, Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES), new IntField("intval", 333333, Store.YES), new LongField("longval", 3333333333L, Store.YES))); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() < 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); searchIndex("content", "third"); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testONSQLWrapperDirectory.java
License:Apache License
public static void main(String[] args) { try {/* ww w . j a va 2s .c om*/ testUtil.initPropsONSQL(); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); //Codec cd = new ONSQLCodec("omega"); //Codec.setDefault(cd); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //config.setCodec(new SimpleTextCodec()); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); String[] list = luceneDir.listAll(); System.out.println("listall length=" + list.length); for (int i = 0; i < list.length; i++) { System.out.println(list[i]); } IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of the first document", Store.YES), new TextField("content", "The content of the first document", Store.YES), new TextField("global_bu_id", "1b", Store.YES), new TextField("omega_order_num", "1n", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES), new TextField("global_bu_id", "1k", Store.YES), new TextField("omega_order_num", "2b", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES), new TextField("global_bu_id", "2k", Store.YES), new TextField("omega_order_num", "3b", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the fourth document", Store.YES), new TextField("content", "The content of the fourth document", Store.YES), new TextField("global_bu_id", "2k", Store.YES), new TextField("omega_order_num", "4b", Store.YES))); //writer.commit(); writer.close(); /* IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() != 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); */ searchIndex("content", "second"); System.out.println("---- now we delete docs for second document----"); deleteDocs("content", "second"); System.out.println("--- and repeat search-----"); searchIndex("content", "second"); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testONSQLWrapperDirectory.java
License:Apache License
public static void deleteDocs(String searchField, String searchString) throws IOException, ParseException { System.out.println("deleting docs for '" + searchString + "'"); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, new StandardAnalyzer(Version.LUCENE_4_10_1)); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false);/* w w w . j a v a 2s.c o m*/ Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_1, searchField, new StandardAnalyzer(Version.LUCENE_4_10_1)); Query query = queryParser.parse(searchString); writer.deleteDocuments(query); writer.commit(); writer.close(); luceneDir.close(); System.out.println("docs were deleted"); }
From source file:org.apache.solr.codecs.test.testSimpleTextCodec.java
License:Apache License
public static void main(String[] args) { try {/*from w w w. j av a 2s . com*/ plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER, "plaintext")); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setCodec(new SimpleTextCodec()); config.setUseCompoundFile(false); Directory luceneDir = FSDirectory.open(plaintextDir); IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES))); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() != 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); searchIndex("content", "third"); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.core.snapshots.SolrSnapshotManager.java
License:Apache License
/** * This method deletes index files of the {@linkplain IndexCommit} for the specified generation number. * * @param core The Solr core/*ww w. j a v a2s. c o m*/ * @param dir The index directory storing the snapshot. * @throws IOException in case of I/O errors. */ private static void deleteSnapshotIndexFiles(SolrCore core, Directory dir, IndexDeletionPolicy delPolicy) throws IOException { IndexWriterConfig conf = core.getSolrConfig().indexConfig.toIndexWriterConfig(core); conf.setOpenMode(OpenMode.APPEND); conf.setMergePolicy(NoMergePolicy.INSTANCE);//Don't want to merge any commits here! conf.setIndexDeletionPolicy(delPolicy); conf.setCodec(core.getCodec()); try (SolrIndexWriter iw = new SolrIndexWriter("SolrSnapshotCleaner", dir, conf)) { // Do nothing. The only purpose of opening index writer is to invoke the Lucene IndexDeletionPolicy#onInit // method so that we can cleanup the files associated with specified index commit. // Note the index writer creates a new commit during the close() operation (which is harmless). } }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testRandom() throws Exception { Directory dir = newDirectory();/*from w ww . j av a 2 s .co m*/ final int NUM_TERMS = atLeast(20); final Set<BytesRef> terms = new HashSet<>(); while (terms.size() < NUM_TERMS) { final String s = TestUtil.randomRealisticUnicodeString(random()); //final String s = _TestUtil.randomSimpleString(random); if (s.length() > 0) { terms.add(new BytesRef(s)); } } final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); Arrays.sort(termsArray); final int NUM_DOCS = atLeast(100); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { // Make sure terms index has ords: Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random())); conf.setCodec(codec); } final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); final int[][] idToOrds = new int[NUM_DOCS][]; final Set<Integer> ordsForDocSet = new HashSet<>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.add(new LegacyIntField("id", id, Field.Store.YES)); final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.size() < termCount) { ordsForDocSet.add(random().nextInt(termsArray.length)); } final int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { System.out.println("TEST: doc id=" + id); } for (int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } doc.add(field); } ordsForDocSet.clear(); Arrays.sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.addDocument(doc); } final DirectoryReader r = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + r); } for (LeafReaderContext ctx : r.leaves()) { if (VERBOSE) { System.out.println("\nTEST: sub=" + ctx.reader()); } verify(ctx.reader(), idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { System.out.println("TEST: top reader"); } LeafReader slowR = SlowCompositeReaderWrapper.wrap(r); TestUtil.checkReader(slowR); verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey()); r.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testRandomWithPrefix() throws Exception { Directory dir = newDirectory();//from ww w. ja va 2 s. c o m final Set<String> prefixes = new HashSet<>(); final int numPrefix = TestUtil.nextInt(random(), 2, 7); if (VERBOSE) { System.out.println("TEST: use " + numPrefix + " prefixes"); } while (prefixes.size() < numPrefix) { prefixes.add(TestUtil.randomRealisticUnicodeString(random())); //prefixes.add(_TestUtil.randomSimpleString(random)); } final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]); final int NUM_TERMS = atLeast(20); final Set<BytesRef> terms = new HashSet<>(); while (terms.size() < NUM_TERMS) { final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random()); //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random); if (s.length() > 0) { terms.add(new BytesRef(s)); } } final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); Arrays.sort(termsArray); final int NUM_DOCS = atLeast(100); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); // Sometimes swap in codec that impls ord(): if (random().nextInt(10) == 7) { Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random())); conf.setCodec(codec); } final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); final int[][] idToOrds = new int[NUM_DOCS][]; final Set<Integer> ordsForDocSet = new HashSet<>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.add(new LegacyIntField("id", id, Field.Store.YES)); final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.size() < termCount) { ordsForDocSet.add(random().nextInt(termsArray.length)); } final int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { System.out.println("TEST: doc id=" + id); } for (int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } doc.add(field); } ordsForDocSet.clear(); Arrays.sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.addDocument(doc); } final DirectoryReader r = w.getReader(); w.close(); if (VERBOSE) { System.out.println("TEST: reader=" + r); } LeafReader slowR = SlowCompositeReaderWrapper.wrap(r); TestUtil.checkReader(slowR); for (String prefix : prefixesArray) { final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); final int[][] idToOrdsPrefix = new int[NUM_DOCS][]; for (int id = 0; id < NUM_DOCS; id++) { final int[] docOrds = idToOrds[id]; final List<Integer> newOrds = new ArrayList<>(); for (int ord : idToOrds[id]) { if (StringHelper.startsWith(termsArray[ord], prefixRef)) { newOrds.add(ord); } } final int[] newOrdsArray = new int[newOrds.size()]; int upto = 0; for (int ord : newOrds) { newOrdsArray[upto++] = ord; } idToOrdsPrefix[id] = newOrdsArray; } for (LeafReaderContext ctx : r.leaves()) { if (VERBOSE) { System.out.println("\nTEST: sub=" + ctx.reader()); } verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { System.out.println("TEST: top reader"); } verify(slowR, idToOrdsPrefix, termsArray, prefixRef); } FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey()); r.close(); dir.close(); }
From source file:org.elasticsearch.index.codec.postingformat.DefaultPostingsFormatTests.java
License:Apache License
@Test public void testUseDefault() throws IOException { Codec codec = new TestCodec(); Directory d = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, new WhitespaceAnalyzer(Lucene.VERSION)); config.setCodec(codec); IndexWriter writer = new IndexWriter(d, config); writer.addDocument(Arrays.asList(new TextField("foo", "bar", Store.YES), new TextField(UidFieldMapper.NAME, "1234", Store.YES))); writer.commit();/*from ww w . ja v a 2 s.c om*/ DirectoryReader reader = DirectoryReader.open(writer, false); List<AtomicReaderContext> leaves = reader.leaves(); assertThat(leaves.size(), equalTo(1)); AtomicReader ar = leaves.get(0).reader(); Terms terms = ar.terms("foo"); Terms uidTerms = ar.terms(UidFieldMapper.NAME); assertThat(terms.size(), equalTo(1l)); assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); assertThat(uidTerms, instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)); reader.close(); writer.close(); d.close(); }
From source file:org.elasticsearch.index.codec.postingformat.DefaultPostingsFormatTests.java
License:Apache License
@Test public void testNoUIDField() throws IOException { Codec codec = new TestCodec(); Directory d = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, new WhitespaceAnalyzer(Lucene.VERSION)); config.setCodec(codec); IndexWriter writer = new IndexWriter(d, config); for (int i = 0; i < 100; i++) { writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES))); }/* w ww . j ava2s . co m*/ writer.forceMerge(1); writer.commit(); DirectoryReader reader = DirectoryReader.open(writer, false); List<AtomicReaderContext> leaves = reader.leaves(); assertThat(leaves.size(), equalTo(1)); AtomicReader ar = leaves.get(0).reader(); Terms terms = ar.terms("foo"); Terms some_other_field = ar.terms("some_other_field"); assertThat(terms.size(), equalTo(2l)); assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); TermsEnum iterator = terms.iterator(null); Set<String> expected = new HashSet<String>(); expected.add("foo"); expected.add("bar"); while (iterator.next() != null) { expected.remove(iterator.term().utf8ToString()); } assertThat(expected.size(), equalTo(0)); reader.close(); writer.close(); d.close(); }