List of usage examples for org.apache.lucene.index IndexWriterConfig setSimilarity
public IndexWriterConfig setSimilarity(Similarity similarity)
From source file:com.github.alvanson.xltsearch.IndexTask.java
License:Apache License
@Override protected Boolean call() { IndexWriter iwriter = null;/*from w w w. jav a 2 s.c o m*/ boolean result = false; updateMessage("started"); try { int count = 0; Docket docket; IndexWriterConfig iwconfig = new IndexWriterConfig(config.getVersion(), config.getAnalyzer()); iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwconfig.setSimilarity(config.getSimilarity()); iwriter = new IndexWriter(config.getDirectory(), iwconfig); while ((docket = inQueue.take()) != Docket.DONE) { count++; updateMessage(docket.relPath); switch (docket.status) { case PARSED: // index parsed file Document doc = new Document(); // store relative path ** must be indexed for updateDocument doc.add(new StringField(config.pathField, docket.relPath, Field.Store.YES)); // index content doc.add(new TextField(config.contentField, docket.content.toString(), Field.Store.NO)); // index standard metadata for (Map.Entry<String, Property> e : config.metadataFields.entrySet()) { for (String value : docket.metadata.getValues(e.getValue())) { doc.add(new TextField(e.getKey(), value, Field.Store.YES)); } } // store hashsum doc.add(new StringField(config.hashSumField, docket.hashSum, Field.Store.YES)); // add/update document iwriter.updateDocument(new Term(config.pathField, docket.relPath), doc); // fall through case PASS: break; case DELETE: iwriter.deleteDocuments(new Term(config.pathField, docket.relPath)); break; default: logger.error("Unexpected docket state while processing {}: {}", docket.relPath, docket.status.toString()); cancel(true); // cancel task } updateProgress(count, count + docket.workLeft); } // end of queue updateMessage("complete"); updateProgress(count, count + docket.workLeft); result = true; } catch (IOException ex) { updateMessage("I/O exception"); logger.error("I/O exception while writing to index", ex); } catch (InterruptedException ex) { if (isCancelled()) { updateMessage("cancelled"); } else { updateMessage("interrupted"); logger.error("Interrupted", ex); } } // close iwriter if (iwriter != null) { try { iwriter.close(); } catch (IOException ex) { logger.warn("I/O exception while closing index writer", ex); } } return result; }
From source file:com.jaeksoft.searchlib.index.WriterLocal.java
License:Open Source License
private final IndexWriter open(boolean create) throws CorruptIndexException, LockObtainFailedException, IOException, SearchLibException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, null); config.setOpenMode(create ? OpenMode.CREATE_OR_APPEND : OpenMode.APPEND); config.setMergeScheduler(new SerialMergeScheduler()); config.setWriteLockTimeout(indexConfig.getWriteLockTimeout()); Similarity similarity = indexConfig.getNewSimilarityInstance(); if (similarity != null) config.setSimilarity(similarity); Logging.debug("WriteLocal open " + indexDirectory.getDirectory()); return new IndexWriter(indexDirectory.getDirectory(), config); }
From source file:com.jaeksoft.searchlib.index.WriterLucene.java
License:Open Source License
private final IndexWriter open(boolean create) throws CorruptIndexException, LockObtainFailedException, IOException, SearchLibException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, null); config.setOpenMode(create ? OpenMode.CREATE_OR_APPEND : OpenMode.APPEND); config.setMergeScheduler(new SerialMergeScheduler()); Similarity similarity = indexConfig.getNewSimilarityInstance(); if (similarity != null) config.setSimilarity(similarity); Logging.debug("WriteLocal open " + indexDirectory.getDirectory()); return new IndexWriter(indexDirectory.getDirectory(), config); }
From source file:com.nearinfinity.blur.mapreduce.BlurReducer.java
License:Apache License
protected void setupWriter(Context context) throws IOException { nullCheck(_directory);/*from w w w .ja v a 2 s . c o m*/ nullCheck(_analyzer); IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, _analyzer); config.setSimilarity(new FairSimilarity()); config.setRAMBufferSizeMB(_blurTask.getRamBufferSizeMB()); TieredMergePolicy mergePolicy = (TieredMergePolicy) config.getMergePolicy(); mergePolicy.setUseCompoundFile(false); _writer = new IndexWriter(_directory, config); }
From source file:com.qwazr.search.index.IndexInstance.java
License:Apache License
/** * @param schema/* w w w. ja v a 2 s . c om*/ * @param indexDirectory * @return */ final static IndexInstance newInstance(SchemaInstance schema, File indexDirectory, IndexSettingsDefinition settings) throws ServerException, IOException, ReflectiveOperationException, InterruptedException { UpdatableAnalyzer indexAnalyzer = null; UpdatableAnalyzer queryAnalyzer = null; IndexWriter indexWriter = null; Directory dataDirectory = null; try { if (!indexDirectory.exists()) indexDirectory.mkdir(); if (!indexDirectory.isDirectory()) throw new IOException( "This name is not valid. No directory exists for this location: " + indexDirectory); FileSet fileSet = new FileSet(indexDirectory); //Loading the settings if (settings == null) { settings = fileSet.settingsFile.exists() ? JsonMapper.MAPPER.readValue(fileSet.settingsFile, IndexSettingsDefinition.class) : IndexSettingsDefinition.EMPTY; } else { JsonMapper.MAPPER.writeValue(fileSet.settingsFile, settings); } //Loading the fields File fieldMapFile = new File(indexDirectory, FIELDS_FILE); LinkedHashMap<String, FieldDefinition> fieldMap = fieldMapFile.exists() ? JsonMapper.MAPPER.readValue(fieldMapFile, FieldDefinition.MapStringFieldTypeRef) : new LinkedHashMap<>(); //Loading the fields File analyzerMapFile = new File(indexDirectory, ANALYZERS_FILE); LinkedHashMap<String, AnalyzerDefinition> analyzerMap = analyzerMapFile.exists() ? JsonMapper.MAPPER.readValue(analyzerMapFile, AnalyzerDefinition.MapStringAnalyzerTypeRef) : new LinkedHashMap<>(); AnalyzerContext context = new AnalyzerContext(analyzerMap, fieldMap); indexAnalyzer = new UpdatableAnalyzer(context, context.indexAnalyzerMap); queryAnalyzer = new UpdatableAnalyzer(context, context.queryAnalyzerMap); // Open and lock the data directory dataDirectory = FSDirectory.open(fileSet.dataDirectory.toPath()); // Set IndexWriterConfig indexWriterConfig = new IndexWriterConfig(indexAnalyzer); if (settings != null && settings.similarity_class != null) indexWriterConfig.setSimilarity(IndexUtils.findSimilarity(settings.similarity_class)); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy( indexWriterConfig.getIndexDeletionPolicy()); indexWriterConfig.setIndexDeletionPolicy(snapshotDeletionPolicy); indexWriter = new IndexWriter(dataDirectory, indexWriterConfig); if (indexWriter.hasUncommittedChanges()) indexWriter.commit(); // Finally we build the SearchSearcherManger SearcherManager searcherManager = new SearcherManager(indexWriter, true, null); return new IndexInstance(schema, dataDirectory, settings, analyzerMap, fieldMap, fileSet, indexWriter, searcherManager, queryAnalyzer); } catch (IOException | ServerException | ReflectiveOperationException | InterruptedException e) { // We failed in opening the index. We close everything we can if (queryAnalyzer != null) IOUtils.closeQuietly(queryAnalyzer); if (indexAnalyzer != null) IOUtils.closeQuietly(indexAnalyzer); if (indexWriter != null) IOUtils.closeQuietly(indexWriter); if (dataDirectory != null) IOUtils.closeQuietly(dataDirectory); throw e; } }
From source file:cs571.proj1.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from www . j ava2 s . c om boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } else if ("-tfidf".equals(args[i])) { tfidf = true; } else if ("-bm25".equals(args[i])) { bm25 = true; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (tfidf) iwc.setSimilarity(new TFIDF()); if (bm25) iwc.setSimilarity(new BM25()); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); System.out.println("Total # of Docs Indexed: " + numOfDocuments); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java
License:Open Source License
public LuceneIndexHandler(Configuration aConfiguration, AnalyzerCache aAnalyzerCache, ExecutorPool aExecutorPool, PreviewProcessor aPreviewProcessor) throws IOException { previewProcessor = aPreviewProcessor; configuration = aConfiguration;//from w w w . ja v a 2 s. c om analyzerCache = aAnalyzerCache; executorPool = aExecutorPool; contentFieldType = new FieldType(); contentFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); contentFieldType.setStored(true); contentFieldType.setTokenized(true); contentFieldType.setStoreTermVectorOffsets(true); contentFieldType.setStoreTermVectorPayloads(true); contentFieldType.setStoreTermVectorPositions(true); contentFieldType.setStoreTermVectors(true); analyzer = analyzerCache.getAnalyzer(); File theIndexDirectory = new File(aConfiguration.getConfigDirectory(), "index"); theIndexDirectory.mkdirs(); Directory theIndexFSDirectory = new NRTCachingDirectory(FSDirectory.open(theIndexDirectory.toPath()), 100, 100); IndexWriterConfig theConfig = new IndexWriterConfig(analyzer); theConfig.setSimilarity(new CustomSimilarity()); indexWriter = new IndexWriter(theIndexFSDirectory, theConfig); searcherManager = new SearcherManager(indexWriter, true, new SearcherFactory()); commitThread = new Thread("Lucene Commit Thread") { @Override public void run() { while (!isInterrupted()) { if (indexWriter.hasUncommittedChanges()) { try { indexWriter.commit(); } catch (IOException e) { throw new RuntimeException(e); } } try { Thread.sleep(2000); } catch (InterruptedException e) { // Do nothing here } } } }; commitThread.start(); facetsConfig = new FacetsConfig(); }
From source file:de.walware.statet.r.internal.core.rhelp.index.REnvIndexWriter.java
License:Open Source License
private IndexWriterConfig createWriterConfig() { final IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, WRITE_ANALYZER); config.setSimilarity(SIMILARITY); config.setMaxThreadStates(Math.min(Math.max(2, Runtime.getRuntime().availableProcessors() - 3), 8)); config.setRAMPerThreadHardLimitMB(512); return config; }
From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexer.java
License:Open Source License
public void createIndex() { try {/*from w w w. j a v a 2s. c om*/ Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer(); PayloadEncoder encoder = new FloatEncoder(); EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder); Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>(); myAnalyzerMap.put("related_object", entropyAnalyzer); myAnalyzerMap.put("label", entropyAnalyzer); myAnalyzerMap.put("defaultLabel", entropyAnalyzer); myAnalyzerMap.put("analyzedLabel", stdAnalyzer); PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(4096); iwc.setMaxThreadStates(36); iwc.setSimilarity(new MySimilarity()); Directory dir = FSDirectory.open(new File(indexDirectory)); IndexWriter writer = new IndexWriter(dir, iwc); System.out.println("Indexing to directory '" + indexDirectory + "'..."); indexDocs(writer, new File(sourceDirectory)); System.out.println("Optimizing..."); writer.close(); System.out.println("Finished Indexing"); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexUpdater.java
License:Open Source License
public void updateIndex() { try {// w ww . jav a2 s .c om Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer(); PayloadEncoder encoder = new FloatEncoder(); EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder); Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>(); myAnalyzerMap.put("related_object", entropyAnalyzer); myAnalyzerMap.put("label", entropyAnalyzer); myAnalyzerMap.put("defaultLabel", entropyAnalyzer); myAnalyzerMap.put("analyzedLabel", stdAnalyzer); PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(4096); iwc.setMaxThreadStates(36); iwc.setSimilarity(new MySimilarity()); Directory dir = FSDirectory.open(new File(indexDirectory)); IndexWriter writer = new IndexWriter(dir, iwc); System.out.println("Update directory '" + indexDirectory + "'..."); indexDocs(writer, new File(sourceDirectory)); System.out.println("Optimizing..."); writer.close(); System.out.println("Finished Updating"); } catch (Exception e) { e.printStackTrace(); } }