List of usage examples for org.apache.lucene.index IndexWriter forceMerge
public void forceMerge(int maxNumSegments) throws IOException
From source file:org.apache.blur.utils.TableShardCountCollapser.java
License:Apache License
public void collapseShardsTo(int newShardCount) throws IOException { if (!validateCount(newShardCount)) { throw new RuntimeException("Count [" + newShardCount + "] is not valid, valid values are [" + getCollapsePossibilities() + "]"); }/* w ww .ja v a2 s . c om*/ Path[] paths = getPaths(); int numberOfShardsToMergePerPass = paths.length / newShardCount; for (int i = 0; i < newShardCount; i++) { System.out.println("Base Index [" + paths[i] + "]"); IndexWriterConfig lconf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()); lconf.setCodec(new Blur024Codec()); HdfsDirectory dir = new HdfsDirectory(getConf(), paths[i]); IndexWriter indexWriter = new IndexWriter(dir, lconf); Directory[] dirs = new Directory[numberOfShardsToMergePerPass - 1]; Path[] pathsToDelete = new Path[numberOfShardsToMergePerPass - 1]; for (int p = 1; p < numberOfShardsToMergePerPass; p++) { Path pathToMerge = paths[i + p * newShardCount]; System.out.println("Merge [" + pathToMerge + "]"); dirs[p - 1] = new HdfsDirectory(getConf(), pathToMerge); pathsToDelete[p - 1] = pathToMerge; } indexWriter.addIndexes(dirs); // Causes rewrite of of index and the symlinked files are // merged/rewritten. indexWriter.forceMerge(1); indexWriter.close(); FileSystem fileSystem = path.getFileSystem(getConf()); for (Path p : pathsToDelete) { fileSystem.delete(p, true); } } }
From source file:org.apache.jackrabbit.core.query.lucene.IndexMigration.java
License:Apache License
/** * Checks if the given <code>index</code> needs to be migrated. * * @param index the index to check and migration if needed. * @param directoryManager the directory manager. * @param oldSeparatorChar the old separator char that needs to be replaced. * @throws IOException if an error occurs while migrating the index. *//*from w w w . j a v a 2 s . c o m*/ public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar) throws IOException { Directory indexDir = index.getDirectory(); log.debug("Checking {} ...", indexDir); ReadOnlyIndexReader reader = index.getReadOnlyIndexReader(); try { if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) { // index was created with Jackrabbit 1.5 or higher // no need for migration log.debug("IndexFormatVersion >= V3, no migration needed"); return; } // assert: there is at least one node in the index, otherwise the // index format version would be at least V3 TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, "")); try { Term t = terms.term(); if (t.text().indexOf(oldSeparatorChar) == -1) { log.debug("Index already migrated"); return; } } finally { terms.close(); } } finally { reader.release(); index.releaseWriterAndReaders(); } // if we get here then the index must be migrated log.debug("Index requires migration {}", indexDir); String migrationName = index.getName() + "_v36"; if (directoryManager.hasDirectory(migrationName)) { directoryManager.delete(migrationName); } Directory migrationDir = directoryManager.getDirectory(migrationName); final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer()); c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy())); c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); try { IndexWriter writer = new IndexWriter(migrationDir, c); try { IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar); try { writer.addIndexes(r); writer.forceMerge(1); writer.close(); } finally { r.close(); } } finally { writer.close(); } } finally { migrationDir.close(); } directoryManager.delete(index.getName()); if (!directoryManager.rename(migrationName, index.getName())) { throw new IOException("failed to move migrated directory " + migrationDir); } log.info("Migrated " + index.getName()); }
From source file:org.apache.solr.codecs.test.testMergeSegments.java
License:Apache License
public static void main(String[] args) { try {/* w w w . j a v a 2s . c o m*/ testUtil.initPropsONSQL(); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); writer.forceMerge(1); writer.close(); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.spelling.FileBasedSpellChecker.java
License:Apache License
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) { try {// w w w . ja v a 2 s. c o m IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema(); // Get the field's analyzer if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) { FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); // Do index-time analysis using the given fieldType's analyzer RAMDirectory ramDir = new RAMDirectory(); LogMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMergeFactor(300); IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()) .setMaxBufferedDocs(150).setMergePolicy(mp) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) // TODO: if we enable this, codec gets angry since field won't exist in the schema // .setCodec(core.getCodec()) ); List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding); for (String s : lines) { Document d = new Document(); d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO)); writer.addDocument(d); } writer.forceMerge(1); writer.close(); dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f); } else { // check if character encoding is defined if (characterEncoding == null) { dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation)); } else { dictionary = new PlainTextDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), characterEncoding)); } } } catch (IOException e) { log.error("Unable to load spellings", e); } }
From source file:org.apache.solr.spelling.IndexBasedSpellCheckerTest.java
License:Apache License
@Test public void testAlternateLocation() throws Exception { String[] ALT_DOCS = new String[] { "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" }; IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime()); //create a standalone index File altIndexDir = new File(TEMP_DIR, "alternateIdx" + new Date().getTime()); Directory dir = newFSDirectory(altIndexDir); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES)); iw.addDocument(doc);// ww w . ja v a 2 s . c o m } iw.forceMerge(1); iw.close(); dir.close(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); RefCounted<SolrIndexSearcher> holder = core.getSearcher(); SolrIndexSearcher searcher = holder.get(); try { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("flesh is null and it shouldn't be", suggestions != null); assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true); assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size() == 0); spellOpts.tokens = queryConverter.convert("Caroline"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); } finally { holder.decref(); } }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testNumericEncoded32() throws IOException { Directory dir = newDirectory();/*from w w w . ja v a 2 s . co m*/ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); doc.add(new LegacyIntField("foo", 5, Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new LegacyIntField("foo", 5, Field.Store.NO)); doc.add(new LegacyIntField("foo", -3, Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX); assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value)); value = v.lookupOrd(1); assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value)); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testNumericEncoded64() throws IOException { Directory dir = newDirectory();/*from ww w . j ava 2s. c om*/ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); doc.add(new LegacyLongField("foo", 5, Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new LegacyLongField("foo", 5, Field.Store.NO)); doc.add(new LegacyLongField("foo", -3, Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX); assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value)); value = v.lookupOrd(1); assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value)); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestDocTermOrds.java
License:Apache License
public void testActuallySingleValued() throws IOException { Directory dir = newDirectory();//from w w w .j av a 2s . c o m IndexWriterConfig iwconfig = newIndexWriterConfig(null); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter iw = new IndexWriter(dir, iwconfig); Document doc = new Document(); doc.add(new StringField("foo", "bar", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); doc = new Document(); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "baz", Field.Store.NO)); doc.add(new StringField("foo", "baz", Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = DirectoryReader.open(dir); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null); assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(3, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals("bar", value.utf8ToString()); value = v.lookupOrd(1); assertEquals("baz", value.utf8ToString()); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestFieldCache.java
License:Apache License
@BeforeClass public static void beforeClass() throws Exception { NUM_DOCS = atLeast(500);//from w ww .j av a 2 s . c o m NUM_ORDS = atLeast(2); directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy())); long theLong = Long.MAX_VALUE; double theDouble = Double.MAX_VALUE; int theInt = Integer.MAX_VALUE; float theFloat = Float.MAX_VALUE; unicodeStrings = new String[NUM_DOCS]; multiValued = new BytesRef[NUM_DOCS][NUM_ORDS]; if (VERBOSE) { System.out.println("TEST: setUp"); } for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); doc.add(new LongPoint("theLong", theLong--)); doc.add(new DoublePoint("theDouble", theDouble--)); doc.add(new IntPoint("theInt", theInt--)); doc.add(new FloatPoint("theFloat", theFloat--)); if (i % 2 == 0) { doc.add(new IntPoint("sparse", i)); } if (i % 2 == 0) { doc.add(new IntPoint("numInt", i)); } // sometimes skip the field: if (random().nextInt(40) != 17) { unicodeStrings[i] = generateString(i); doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES)); } // sometimes skip the field: if (random().nextInt(10) != 8) { for (int j = 0; j < NUM_ORDS; j++) { String newValue = generateString(i); multiValued[i][j] = new BytesRef(newValue); doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES)); } Arrays.sort(multiValued[i]); } writer.addDocument(doc); } writer.forceMerge(1); // this test relies on one segment and docid order IndexReader r = DirectoryReader.open(writer); assertEquals(1, r.leaves().size()); reader = r.leaves().get(0).reader(); TestUtil.checkReader(reader); writer.close(); }
From source file:org.apache.solr.uninverting.TestFieldCacheSort.java
License:Apache License
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */ public void testMultiValuedField() throws IOException { Directory indexStore = newDirectory(); IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random()))); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.add(new StringField("string", "a" + i, Field.Store.NO)); doc.add(new StringField("string", "b" + i, Field.Store.NO)); writer.addDocument(doc);/*from w w w . ja v a2 s .com*/ } writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases writer.close(); Sort sort = new Sort(new SortField("string", SortField.Type.STRING), SortField.FIELD_DOC); IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore), Collections.singletonMap("string", Type.SORTED)); IndexSearcher searcher = new IndexSearcher(reader); expectThrows(IllegalStateException.class, () -> { searcher.search(new MatchAllDocsQuery(), 500, sort); }); reader.close(); indexStore.close(); }