List of usage examples for org.apache.lucene.index IndexWriterConfig setRAMBufferSizeMB
@Override public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB)
From source file:edu.cmu.geolocator.io.GetWriter.java
License:Apache License
public static IndexWriter getIndexWriter(String indexdirectory, double buffersize) throws IOException { Directory dir;//from w w w. j av a2 s . c o m if (OSUtil.isWindows()) dir = FSDirectory.open(new File(indexdirectory)); else dir = NIOFSDirectory.open(new File(indexdirectory)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setRAMBufferSizeMB(buffersize); LogDocMergePolicy mergePolicy = new LogDocMergePolicy(); mergePolicy.setMergeFactor(3); config.setMergePolicy(mergePolicy); IndexWriter writer = new IndexWriter(dir, config); return writer; }
From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.plain.PlainCorpusBuilder.java
License:Open Source License
private void initializeIndex(String index) { final File docDir = new File(index); Date start = new Date(); try {//from ww w .j av a 2 s. c o m System.out.println("Indexing to directory '" + docDir.getAbsolutePath() + "'..."); Directory dir = FSDirectory.open(new File(index)); Analyzer analyzer = new AnalyzerSpanish(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); //// in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // iwc.setRAMBufferSizeMB(1024.0); writer = new IndexWriter(dir, iwc); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.wikipedia.WikiCorpusBuilder.java
License:Open Source License
private void initializeIndex(String index) { final File docDir = new File(index); Date start = new Date(); try {//www . j ava 2 s . c o m System.out.println("Indexing to directory '" + docDir.getAbsolutePath() + "'..."); Directory dir = FSDirectory.open(new File(index)); Analyzer analyzer = new AnalyzerEnglish(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); //// in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // iwc.setRAMBufferSizeMB(1024.0); writer = new IndexWriter(dir, iwc); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexer.java
License:Open Source License
public void createIndex() { try {// w w w .j a va2 s. co m Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer(); PayloadEncoder encoder = new FloatEncoder(); EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder); Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>(); myAnalyzerMap.put("related_object", entropyAnalyzer); myAnalyzerMap.put("label", entropyAnalyzer); myAnalyzerMap.put("defaultLabel", entropyAnalyzer); myAnalyzerMap.put("analyzedLabel", stdAnalyzer); PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(4096); iwc.setMaxThreadStates(36); iwc.setSimilarity(new MySimilarity()); Directory dir = FSDirectory.open(new File(indexDirectory)); IndexWriter writer = new IndexWriter(dir, iwc); System.out.println("Indexing to directory '" + indexDirectory + "'..."); indexDocs(writer, new File(sourceDirectory)); System.out.println("Optimizing..."); writer.close(); System.out.println("Finished Indexing"); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexUpdater.java
License:Open Source License
public void updateIndex() { try {//from w w w .ja v a 2 s .c o m Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer(); PayloadEncoder encoder = new FloatEncoder(); EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder); Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>(); myAnalyzerMap.put("related_object", entropyAnalyzer); myAnalyzerMap.put("label", entropyAnalyzer); myAnalyzerMap.put("defaultLabel", entropyAnalyzer); myAnalyzerMap.put("analyzedLabel", stdAnalyzer); PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(4096); iwc.setMaxThreadStates(36); iwc.setSimilarity(new MySimilarity()); Directory dir = FSDirectory.open(new File(indexDirectory)); IndexWriter writer = new IndexWriter(dir, iwc); System.out.println("Update directory '" + indexDirectory + "'..."); indexDocs(writer, new File(sourceDirectory)); System.out.println("Optimizing..."); writer.close(); System.out.println("Finished Updating"); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.udel.ece.infolab.btc.Indexing.java
License:Apache License
/** * Create a index writer that uses a #TupleAnalyzer on the triples fields with * a tokenization of the URI's localname, and the default #WhitespaceAnalyzer * on the others.//from w w w . j av a 2 s . co m * @param dir * @return * @throws IOException */ @SuppressWarnings("deprecation") private IndexWriter initializeIndexWriter(final Directory dir) throws IOException { final Analyzer defaultAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_31); final Map<String, Analyzer> fieldAnalyzers = new HashMap<String, Analyzer>(); final TupleAnalyzer tuple = new TupleAnalyzer(new StandardAnalyzer(Version.LUCENE_31)); tuple.setURINormalisation(URINormalisation.LOCALNAME); fieldAnalyzers.put(OUTGOING_TRIPLE, tuple); fieldAnalyzers.put(INCOMING_TRIPLE, tuple); final IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, new PerFieldAnalyzerWrapper(defaultAnalyzer, fieldAnalyzers)); // Disable compound file ((LogMergePolicy) config.getMergePolicy()).setUseCompoundFile(false); // Increase merge factor to 20 - more adapted to batch creation ((LogMergePolicy) config.getMergePolicy()).setMergeFactor(20); config.setRAMBufferSizeMB(256); config.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); config.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH); final IndexWriter writer = new IndexWriter(dir, config); writer.setMaxFieldLength(Integer.MAX_VALUE); return writer; }
From source file:edu.utsa.sifter.Indexer.java
License:Apache License
static IndexWriter getIndexWriter(final String path, final String stopwords, final SifterConfig conf) throws IOException { Directory dir = FSDirectory.open(new File(path)); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_44, getStopList(stopwords)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE); iwc.setMaxThreadStates(conf.THREAD_POOL_SIZE); IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
From source file:edu.utsa.sifter.som.MainSOM.java
License:Apache License
IndexWriter createWriter(final File somIdx, final SifterConfig conf) throws CorruptIndexException, IOException { Directory dir = FSDirectory.open(somIdx); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(conf.INDEXING_BUFFER_SIZE); IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
From source file:edu.virginia.cs.index.AnswerIndexer.java
/** * Creates the initial index files on disk * * @param indexPath//w w w .j a v a 2 s .c om * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir = FSDirectory.open(new File(indexPath)); IndexWriter writer = new IndexWriter(dir, config); return writer; }
From source file:edu.virginia.cs.index.PostLinkIndexer.java
/** * Creates the initial index files on disk * * @param indexPath// www .j a va 2s . com * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir; IndexWriter writer = null; dir = FSDirectory.open(new File(indexPath)); writer = new IndexWriter(dir, config); return writer; }