List of usage examples for org.apache.lucene.index IndexWriterConfig setCodec
public IndexWriterConfig setCodec(Codec codec)
From source file:org.apache.blur.lucene.codec.Blur022CodecTest.java
License:Apache License
@Test public void testLargeDocs() throws IOException { Random random = new Random(); Iterable<? extends IndexableField> doc = getLargeDoc(random); RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur022Codec()); IndexWriter writer1 = new IndexWriter(directory, conf1); writer1.addDocument(doc);/* ww w . java 2 s. c o m*/ writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur022Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); IndexWriter writer2 = new IndexWriter(directory, conf2); writer2.addDocument(doc); writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); Document document1 = reader1.document(0); long t2 = System.nanoTime(); Document document2 = reader2.document(1); long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + document1.hashCode()); System.out.println("doc2 " + document2.hashCode()); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
From source file:org.apache.blur.lucene.codec.Blur022CodecTest.java
License:Apache License
@Test public void testSmallDocs() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur022Codec()); Random random1 = new Random(1); IndexWriter writer1 = new IndexWriter(directory, conf1); for (int i = 0; i < 1000; i++) { writer1.addDocument(getSmallDoc(random1)); }/*www. j a va 2 s . co m*/ writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1000, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur022Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); Random random2 = new Random(1); IndexWriter writer2 = new IndexWriter(directory, conf2); for (int i = 0; i < 1000; i++) { writer2.addDocument(getSmallDoc(random2)); } writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2000, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); long hash1 = 0; long hash2 = 0; for (int d = 0; d < 1000; d++) { Document document1 = reader1.document(d); hash1 += document1.hashCode(); } long t2 = System.nanoTime(); for (int d = 0; d < 1000; d++) { Document document2 = reader2.document(d + 1000); hash2 += document2.hashCode(); } long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + hash1); System.out.println("doc2 " + hash2); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
From source file:org.apache.blur.lucene.codec.Blur024CodecTest.java
License:Apache License
@Test public void testDocValuesFormat() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf.setCodec(new Blur024Codec()); IndexWriter writer = new IndexWriter(directory, conf); Document doc = new Document(); doc.add(new StringField("f", "v", Store.YES)); doc.add(new SortedDocValuesField("f", new BytesRef("v"))); writer.addDocument(doc);/* w ww . j a v a 2s. co m*/ writer.close(); DirectoryReader reader = DirectoryReader.open(directory); AtomicReaderContext context = reader.leaves().get(0); AtomicReader atomicReader = context.reader(); SortedDocValues sortedDocValues = atomicReader.getSortedDocValues("f"); assertTrue(sortedDocValues.getClass().getName().startsWith(DiskDocValuesProducer.class.getName())); reader.close(); }
From source file:org.apache.blur.lucene.codec.Blur024CodecTest.java
License:Apache License
@Test public void testLargeDocs() throws IOException { Random random = new Random(); Iterable<? extends IndexableField> doc = getLargeDoc(random); RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur024Codec()); IndexWriter writer1 = new IndexWriter(directory, conf1); writer1.addDocument(doc);/*from w ww .j av a2s.com*/ writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); IndexWriter writer2 = new IndexWriter(directory, conf2); writer2.addDocument(doc); writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); Document document1 = reader1.document(0); long t2 = System.nanoTime(); Document document2 = reader2.document(1); long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + document1.hashCode()); System.out.println("doc2 " + document2.hashCode()); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
From source file:org.apache.blur.lucene.codec.Blur024CodecTest.java
License:Apache License
@Test public void testSmallDocs() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur024Codec()); Random random1 = new Random(1); IndexWriter writer1 = new IndexWriter(directory, conf1); for (int i = 0; i < 1000; i++) { writer1.addDocument(getSmallDoc(random1)); }/*from ww w . j a va 2 s .com*/ writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1000, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); Random random2 = new Random(1); IndexWriter writer2 = new IndexWriter(directory, conf2); for (int i = 0; i < 1000; i++) { writer2.addDocument(getSmallDoc(random2)); } writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2000, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); long hash1 = 0; long hash2 = 0; for (int d = 0; d < 1000; d++) { Document document1 = reader1.document(d); hash1 += document1.hashCode(); } long t2 = System.nanoTime(); for (int d = 0; d < 1000; d++) { Document document2 = reader2.document(d + 1000); hash2 += document2.hashCode(); } long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + hash1); System.out.println("doc2 " + hash2); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
From source file:org.apache.blur.utils.TableShardCountCollapser.java
License:Apache License
public void collapseShardsTo(int newShardCount) throws IOException { if (!validateCount(newShardCount)) { throw new RuntimeException("Count [" + newShardCount + "] is not valid, valid values are [" + getCollapsePossibilities() + "]"); }/*from w w w. ja va2 s . c o m*/ Path[] paths = getPaths(); int numberOfShardsToMergePerPass = paths.length / newShardCount; for (int i = 0; i < newShardCount; i++) { System.out.println("Base Index [" + paths[i] + "]"); IndexWriterConfig lconf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer()); lconf.setCodec(new Blur024Codec()); HdfsDirectory dir = new HdfsDirectory(getConf(), paths[i]); IndexWriter indexWriter = new IndexWriter(dir, lconf); Directory[] dirs = new Directory[numberOfShardsToMergePerPass - 1]; Path[] pathsToDelete = new Path[numberOfShardsToMergePerPass - 1]; for (int p = 1; p < numberOfShardsToMergePerPass; p++) { Path pathToMerge = paths[i + p * newShardCount]; System.out.println("Merge [" + pathToMerge + "]"); dirs[p - 1] = new HdfsDirectory(getConf(), pathToMerge); pathsToDelete[p - 1] = pathToMerge; } indexWriter.addIndexes(dirs); // Causes rewrite of of index and the symlinked files are // merged/rewritten. indexWriter.forceMerge(1); indexWriter.close(); FileSystem fileSystem = path.getFileSystem(getConf()); for (Path p : pathsToDelete) { fileSystem.delete(p, true); } } }
From source file:org.apache.carbondata.datamap.lucene.LuceneDataMapBuilder.java
License:Apache License
private IndexWriter createIndexWriter(String dataMapPath) throws IOException { Path indexPath = FileFactory.getPath(dataMapPath); FileSystem fs = FileFactory.getFileSystem(indexPath); // if index path exists, should delete it because we are // rebuilding the whole datamap for all segments if (fs.exists(indexPath)) { fs.delete(indexPath, true);//from w w w. j a v a 2 s. c o m } if (!fs.mkdirs(indexPath)) { LOGGER.error("Failed to create directory " + indexPath); } if (null == analyzer) { analyzer = new StandardAnalyzer(); } // create a index writer Directory indexDir = new HdfsDirectory(indexPath, FileFactory.getConfiguration()); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); if (CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_LUCENE_COMPRESSION_MODE, CarbonCommonConstants.CARBON_LUCENE_COMPRESSION_MODE_DEFAULT) .equalsIgnoreCase(CarbonCommonConstants.CARBON_LUCENE_COMPRESSION_MODE_DEFAULT)) { indexWriterConfig.setCodec(new Lucene62Codec(Lucene50StoredFieldsFormat.Mode.BEST_SPEED)); } else { indexWriterConfig.setCodec(new Lucene62Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION)); } return new IndexWriter(indexDir, new IndexWriterConfig(analyzer)); }
From source file:org.apache.carbondata.datamap.lucene.LuceneDataMapWriter.java
License:Apache License
/** * Start of new blocklet notification./*w ww . j ava2 s . c o m*/ */ public void onBlockletStart(int blockletId) throws IOException { if (null == analyzer) { if (CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_LUCENE_INDEX_STOP_WORDS, CarbonCommonConstants.CARBON_LUCENE_INDEX_STOP_WORDS_DEFAULT) .equalsIgnoreCase("true")) { analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET); } else { analyzer = new StandardAnalyzer(); } } // save index data into ram, write into disk after one page finished ramDir = new RAMDirectory(); ramIndexWriter = new IndexWriter(ramDir, new IndexWriterConfig(analyzer)); if (indexWriter != null) { return; } // get index path, put index data into segment's path String dataMapPath; if (storeBlockletWise) { dataMapPath = this.dataMapPath + File.separator + blockletId; } else { dataMapPath = this.dataMapPath; } Path indexPath = FileFactory.getPath(dataMapPath); FileSystem fs = FileFactory.getFileSystem(indexPath); // if index path not exists, create it if (!fs.exists(indexPath)) { if (!fs.mkdirs(indexPath)) { throw new IOException("Failed to create directory " + dataMapPath); } } // the indexWriter closes the FileSystem on closing the writer, so for a new configuration // and disable the cache for the index writer, it will be closed on closing the writer Configuration conf = FileFactory.getConfiguration(); conf.set("fs.hdfs.impl.disable.cache", "true"); // create a index writer Directory indexDir = new HdfsDirectory(indexPath, conf); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); if (CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_LUCENE_COMPRESSION_MODE, CarbonCommonConstants.CARBON_LUCENE_COMPRESSION_MODE_DEFAULT) .equalsIgnoreCase(CarbonCommonConstants.CARBON_LUCENE_COMPRESSION_MODE_DEFAULT)) { indexWriterConfig.setCodec(speedCodec); } else { indexWriterConfig.setCodec(compressionCodec); } indexWriter = new IndexWriter(indexDir, indexWriterConfig); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorContext.java
License:Apache License
static IndexWriterConfig getIndexWriterConfig(IndexDefinition definition, boolean remoteDir) { // FIXME: Hack needed to make Lucene work in an OSGi environment Thread thread = Thread.currentThread(); ClassLoader loader = thread.getContextClassLoader(); thread.setContextClassLoader(IndexWriterConfig.class.getClassLoader()); try {/*from w ww. j a v a 2s . c o m*/ Analyzer definitionAnalyzer = definition.getAnalyzer(); Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>(); analyzers.put(FieldNames.SPELLCHECK, new ShingleAnalyzerWrapper(LuceneIndexConstants.ANALYZER, 3)); if (!definition.isSuggestAnalyzed()) { analyzers.put(FieldNames.SUGGEST, SuggestHelper.getAnalyzer()); } Analyzer analyzer = new PerFieldAnalyzerWrapper(definitionAnalyzer, analyzers); IndexWriterConfig config = new IndexWriterConfig(VERSION, analyzer); if (remoteDir) { config.setMergeScheduler(new SerialMergeScheduler()); } if (definition.getCodec() != null) { config.setCodec(definition.getCodec()); } return config; } finally { thread.setContextClassLoader(loader); } }
From source file:org.apache.solr.codecs.test.testDeleteDocs.java
License:Apache License
public static void main(String[] args) { try {/*ww w. j a v a 2 s. c o m*/ plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER)); //----------- index documents ------- StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_0, analyzer); // recreate the index on each execution config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //config.setCodec(new SimpleTextCodec()); Properties props = new Properties(); FileInputStream fstream = new FileInputStream( "C:\\work\\search_engine\\codec\\solr410\\solr_codectest\\collection1\\conf\\kvstore.properties"); props.load(fstream); fstream.close(); ONSQLKVstoreHandler.getInstance().setKVStore("omega", props); ONSQLCodec codec = new ONSQLCodec(); config.setCodec(codec); config.setUseCompoundFile(false); Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER)); IndexWriter writer = new IndexWriter(luceneDir, config); QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_0, "title", analyzer); String search_word = "fourth"; Query query = queryParser.parse(search_word); writer.deleteDocuments(query); writer.commit(); writer.close(); searchIndex("title", search_word); } catch (Throwable te) { te.printStackTrace(); } }