List of usage examples for org.apache.lucene.index IndexWriterConfig setUseCompoundFile
@Override public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile)
From source file:org.apache.solr.codecs.test.testONSQLCodec.java
License:Apache License
public static void main(String[] args) { try {//from w w w. j av a 2s .c o m plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER)); testUtil.initPropsONSQL(); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //config.setCodec(new SimpleTextCodec()); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = FSDirectory.open(plaintextDir); IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.YES), new IntField("intval", 111111, Store.YES), new LongField("longval", 1111111111L, Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES), new IntField("intval", 222222, Store.YES), new LongField("longval", 222222222L, Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES), new IntField("intval", 333333, Store.YES), new LongField("longval", 3333333333L, Store.YES))); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() < 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); searchIndex("content", "third"); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testONSQLWrapperDirectory.java
License:Apache License
public static void main(String[] args) { try {// w w w . jav a 2 s.c o m testUtil.initPropsONSQL(); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_1); //Codec cd = new ONSQLCodec("omega"); //Codec.setDefault(cd); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //config.setCodec(new SimpleTextCodec()); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); String[] list = luceneDir.listAll(); System.out.println("listall length=" + list.length); for (int i = 0; i < list.length; i++) { System.out.println(list[i]); } IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of the first document", Store.YES), new TextField("content", "The content of the first document", Store.YES), new TextField("global_bu_id", "1b", Store.YES), new TextField("omega_order_num", "1n", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES), new TextField("global_bu_id", "1k", Store.YES), new TextField("omega_order_num", "2b", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES), new TextField("global_bu_id", "2k", Store.YES), new TextField("omega_order_num", "3b", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the fourth document", Store.YES), new TextField("content", "The content of the fourth document", Store.YES), new TextField("global_bu_id", "2k", Store.YES), new TextField("omega_order_num", "4b", Store.YES))); //writer.commit(); writer.close(); /* IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() != 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); */ searchIndex("content", "second"); System.out.println("---- now we delete docs for second document----"); deleteDocs("content", "second"); System.out.println("--- and repeat search-----"); searchIndex("content", "second"); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.codecs.test.testONSQLWrapperDirectory.java
License:Apache License
public static void deleteDocs(String searchField, String searchString) throws IOException, ParseException { System.out.println("deleting docs for '" + searchString + "'"); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_1, new StandardAnalyzer(Version.LUCENE_4_10_1)); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec);/* w w w.j a v a2 s . c om*/ config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_1, searchField, new StandardAnalyzer(Version.LUCENE_4_10_1)); Query query = queryParser.parse(searchString); writer.deleteDocuments(query); writer.commit(); writer.close(); luceneDir.close(); System.out.println("docs were deleted"); }
From source file:org.apache.solr.codecs.test.testSimpleTextCodec.java
License:Apache License
public static void main(String[] args) { try {//from www .j a v a2 s .com plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER, "plaintext")); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setCodec(new SimpleTextCodec()); config.setUseCompoundFile(false); Directory luceneDir = FSDirectory.open(plaintextDir); IndexWriter writer = new IndexWriter(luceneDir, config); writer.addDocument(Arrays.asList(new TextField("title", "The title of my first document", Store.YES), new TextField("content", "The content of the first document", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The tAtle of the second document", Store.YES), new TextField("content", "The content of the second document", Store.YES))); writer.addDocument(Arrays.asList(new TextField("title", "The title of the third document", Store.YES), new TextField("content", "The content of the third document", Store.YES))); writer.commit(); writer.close(); IndexReader reader = DirectoryReader.open(luceneDir); // now test for docs if (reader.numDocs() != 3) throw new IOException("amount of returned docs are less than indexed"); else System.out.println("test passed"); searchIndex("content", "third"); } catch (Throwable te) { te.printStackTrace(); } }
From source file:org.apache.solr.update.SolrIndexConfig.java
License:Apache License
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) { // so that we can update the analyzer on core reload, we pass null // for the default analyzer, and explicitly pass an analyzer on // appropriate calls to IndexWriter IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null); if (maxBufferedDocs != -1) iwc.setMaxBufferedDocs(maxBufferedDocs); if (ramBufferSizeMB != -1) iwc.setRAMBufferSizeMB(ramBufferSizeMB); if (termIndexInterval != -1) iwc.setTermIndexInterval(termIndexInterval); if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); iwc.setSimilarity(schema.getSimilarity()); iwc.setMergePolicy(buildMergePolicy(schema)); iwc.setMergeScheduler(buildMergeScheduler(schema)); iwc.setInfoStream(infoStream);//from ww w . j a v a 2s . c om // do this after buildMergePolicy since the backcompat logic // there may modify the effective useCompoundFile iwc.setUseCompoundFile(getUseCompoundFile()); if (maxIndexingThreads != -1) { iwc.setMaxThreadStates(maxIndexingThreads); } if (mergedSegmentWarmerInfo != null) { // TODO: add infostream -> normal logging system (there is an issue somewhere) IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className, IndexReaderWarmer.class, null, new Class[] { InfoStream.class }, new Object[] { iwc.getInfoStream() }); iwc.setMergedSegmentWarmer(warmer); } return iwc; }
From source file:org.apdplat.word.lucene.ChineseWordAnalyzerTest.java
License:Open Source License
@Test public void test3() { Analyzer analyzer = new ChineseWordAnalyzer(); List<String> sentences = new ArrayList<>(); sentences.add("??APDPlat???"); sentences.add("?"); sentences.add("??"); sentences.add("?"); sentences.add("??"); sentences.add("????"); sentences.add("?"); sentences.add(""); sentences.add("????"); sentences.add("?"); sentences.add("?"); sentences.add("??"); sentences.add(""); sentences.add("?,?"); sentences.add("???"); sentences.add(""); sentences.add("?"); sentences.add("????"); sentences.add(""); sentences.add("????"); sentences.add("??"); sentences.add("?"); sentences.add("?"); sentences.add("?"); sentences.add("????"); sentences.add("?"); sentences.add("?"); sentences.add("?"); sentences.add("?"); sentences.add("?"); sentences.add("???"); sentences.add("?"); sentences.add("?"); sentences.add(""); sentences.add("?"); sentences.add("??"); sentences.add("?"); sentences.add(""); sentences.add(""); sentences.add("?"); sentences.add("????"); sentences.add("word????????ysc"); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setUseCompoundFile(false); File index = new File("target/indexes"); Utils.deleteDir(index);// w ww . j a v a 2 s. c o m try (Directory directory = new SimpleFSDirectory(index.toPath()); IndexWriter indexWriter = new IndexWriter(directory, config)) { for (String sentence : sentences) { Document doc = new Document(); Field field = new TextField("text", sentence, Field.Store.YES); doc.add(field); indexWriter.addDocument(doc); } indexWriter.commit(); } catch (Exception e) { e.printStackTrace(); fail("" + e.getMessage()); } try (Directory directory = new SimpleFSDirectory(index.toPath()); DirectoryReader directoryReader = DirectoryReader.open(directory)) { IndexSearcher indexSearcher = new IndexSearcher(directoryReader); QueryParser queryParser = new QueryParser("text", analyzer); Query query = queryParser.parse("text:??"); TopDocs docs = indexSearcher.search(query, Integer.MAX_VALUE); assertEquals(2, docs.totalHits); assertEquals("word????????ysc", indexSearcher.doc(docs.scoreDocs[0].doc).get("text")); assertEquals("??APDPlat???", indexSearcher.doc(docs.scoreDocs[1].doc).get("text")); query = queryParser.parse("text:"); docs = indexSearcher.search(query, Integer.MAX_VALUE); assertEquals(1, docs.totalHits); assertEquals("?", indexSearcher.doc(docs.scoreDocs[0].doc).get("text")); } catch (Exception e) { fail("?" + e.getMessage()); } }
From source file:org.elasticsearch.common.lucene.LuceneTests.java
License:Apache License
public void testFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.setEnableVirusScanner(false);/* w w w . ja v a2s.co m*/ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); iwc.setUseCompoundFile(true); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); Set<String> files = new HashSet<>(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } final boolean simpleTextCFS = files.contains("_0.scf"); assertTrue(files.toString(), files.contains("segments_1")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); files.clear(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } assertFalse(files.toString(), files.contains("segments_1")); assertTrue(files.toString(), files.contains("segments_2")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_1.cfs")); assertFalse(files.toString(), files.contains("_1.cfe")); } else { assertTrue(files.toString(), files.contains("_1.cfs")); assertTrue(files.toString(), files.contains("_1.cfe")); } assertTrue(files.toString(), files.contains("_1.si")); writer.close(); dir.close(); }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {// w w w.ja va2s .c o m boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setInfoStream(new LoggerInfoStream(indexSettings, shardId)); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); config.setCheckIntegrityAtMerge(checksumOnMerge); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); if (warmer != null) { final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.internal.InternalEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/*from ww w. j a va2s . c om*/ // release locks when started if (IndexWriter.isLocked(store.directory())) { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.newMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new IndexUpgraderMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); if (warmer != null) warmer.warm(context); } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.elasticsearch.index.engine.InternalEngine.java
License:Apache License
private IndexWriter createWriter(boolean create) throws IOException { try {//ww w . j av a 2 s .c o m final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ iwc.setWriteLockTimeout(5000); iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush()); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh iwc.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { try { LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId); assert isMergedSegment(esLeafReader); if (warmer != null) { final Engine.Searcher searcher = new Searcher("warmer", searcherFactory.newSearcher(esLeafReader, null)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (isClosed.get() == false) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }