List of usage examples for org.apache.lucene.index IndexWriterConfig setSimilarity
public IndexWriterConfig setSimilarity(Similarity similarity)
From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java
License:Apache License
/** * Instantiates a new lucene indexer.// w w w. java2 s . com * * @param indexName the index name * @throws IOException Signals that an I/O exception has occurred. */ protected LuceneIndexer(String indexName) throws IOException { try { this.indexName = indexName; this.luceneWriterService = LookupService.getService(WorkExecutors.class).getIOExecutor(); this.luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1, new NamedThreadFactory(indexName + " Lucene future checker", false)); final Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath(); final File luceneRootFolder = new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER); luceneRootFolder.mkdirs(); this.indexFolder = new File(luceneRootFolder, indexName); if (!this.indexFolder.exists()) { this.databaseValidity = DatabaseValidity.MISSING_DIRECTORY; LOG.info("Index folder missing: " + this.indexFolder.getAbsolutePath()); } else if (this.indexFolder.list().length > 0) { this.databaseValidity = DatabaseValidity.POPULATED_DIRECTORY; } this.indexFolder.mkdirs(); LOG.info("Index: " + this.indexFolder.getAbsolutePath()); final MMapDirectory indexDirectory = new MMapDirectory(this.indexFolder.toPath()); // switch over to MMapDirectory - in theory - this gives us back some // room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge // performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because // the default value of SimpleFSDirectory is a huge bottleneck. final IndexWriterConfig config = new IndexWriterConfig(new PerFieldAnalyzer()); config.setRAMBufferSizeMB(256); final MergePolicy mergePolicy = new LogByteSizeMergePolicy(); config.setMergePolicy(mergePolicy); config.setSimilarity(new ShortTextSimilarity()); this.indexWriter = new IndexWriter(indexDirectory, config); final boolean applyAllDeletes = false; final boolean writeAllDeletes = false; this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, null); // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into // account the changes made to the index and tracked by the TrackingIndexWriter instance // The index is refreshed every 60sc when nobody is waiting // and every 100 millis whenever is someone waiting (see search method) // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html) this.reopenThread = new ControlledRealTimeReopenThread<>(this.indexWriter, this.searcherManager, 60.00, 0.1); this.startThread(); // Register for commits: LOG.info("Registering indexer " + indexName + " for commits"); this.changeListenerRef = new ChronologyChangeListener() { @Override public void handleCommit(CommitRecord commitRecord) { if (LuceneIndexer.this.dbBuildMode == null) { LuceneIndexer.this.dbBuildMode = Get.configurationService().inDBBuildMode(); } if (LuceneIndexer.this.dbBuildMode) { LOG.debug("Ignore commit due to db build mode"); return; } final int size = commitRecord.getSemanticNidsInCommit().size(); if (size < 100) { LOG.info("submitting semantic elements " + commitRecord.getSemanticNidsInCommit().toString() + " to indexer " + getIndexerName() + " due to commit"); } else { LOG.info("submitting " + size + " semantic elements to indexer " + getIndexerName() + " due to commit"); } commitRecord.getSemanticNidsInCommit().stream().forEach(sememeId -> { final SemanticChronology sc = Get.assemblageService().getSemanticChronology(sememeId); index(sc); }); LOG.info("Completed index of " + size + " semantics for " + getIndexerName()); } @Override public void handleChange(SemanticChronology sc) { // noop } @Override public void handleChange(ConceptChronology cc) { // noop } @Override public UUID getListenerUuid() { return UuidT5Generator.get(getIndexerName()); } }; Get.commitService().addChangeListener(this.changeListenerRef); } catch (final IOException e) { LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e); throw e; } }
From source file:Util.Index_Handler.java
/** * creates an index for the abstracts.//from w w w. j a va2 s .c o m * @return status of operation */ public static String createAbstract_Index() { try { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46); Directory dir = FSDirectory.open(new File("Abstract_Index")); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, analyzer); conf.setSimilarity(new CustomSimilarity()); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, conf); Scanner scan = new Scanner(Paths.get("abstract_clean.txt")); String[] line; Document doc; while (scan.hasNextLine()) { doc = new Document(); line = scan.nextLine().split("\\|"); doc.add(new Field("entity", line[0], TextField.TYPE_STORED)); doc.add(new Field("abstract", line[1], TextField.TYPE_STORED)); writer.addDocument(doc); } writer.prepareCommit(); writer.commit(); writer.close(); scan.close(); } catch (IOException ex) { System.out.println("Failed to creat Index for Abstracts:" + ex.getMessage()); return "Failed to creat Index for Abstracts:" + ex.getMessage(); } return "done"; }
From source file:Util.Index_Handler.java
/** * Creates a block index . Is necessary for blockjoinquerys. * @return status of operation//from w ww .j a v a2 s.c o m */ public static String createBlockIndex() { try { Scanner scan = new Scanner(Paths.get("combined.txt")); Scanner scan2 = new Scanner(Paths.get("entity_anchors.txt")); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46); Directory dir = FSDirectory.open(new File("Entity_Index")); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, analyzer); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); conf.setSimilarity(new CustomSimilarity()); IndexWriter writer = new IndexWriter(dir, conf); String[] line;//=scan.nextLine().split(" "); ArrayList<Document> group = new ArrayList<>(); Map<String, String> anchorMap = new HashMap<>(); Document doc; String[] anchors; //load anchors into ram while (scan2.hasNext()) { line = scan2.nextLine().split("\\|"); anchorMap.put(line[0], line[1]); } line = scan.nextLine().split("\\|"); String previous = line[0]; anchors = line[2].split(";"); for (String s : anchors) { doc = new Document(); doc.add(new Field("anchorN", s, TextField.TYPE_STORED)); group.add(doc); } while (scan.hasNext()) { line = scan.nextLine().split("\\|"); if (line.length == 0) { System.out.println("found end"); doc = new Document(); doc.add(new Field("entity", line[0], TextField.TYPE_STORED)); doc.add(new Field("anchor", anchorDeli(anchorMap.get(line[0])), TextField.TYPE_STORED)); doc.add(new Field("titel", delimeterString(getEntity(line[0])), TextField.TYPE_STORED)); //this is the marker for the parent field. // it needs to be a stringfield so only an exact match will hit // and does not need to be in the index since you dont search this field. doc.add(new Field("type", "Parent", StringField.TYPE_NOT_STORED)); group.add(doc); writer.addDocuments(group); group.clear(); break; } if (!line[0].equals(previous)) { doc = new Document(); doc.add(new Field("entity", line[0], TextField.TYPE_STORED)); doc.add(new Field("anchor", anchorDeli(anchorMap.get(line[0])), TextField.TYPE_STORED)); doc.add(new Field("title", delimeterString(getEntity(line[0])), TextField.TYPE_STORED)); //this is the marker for the parent field. // it needs to be a stringfield so only an exact match will hit // and does not need to be in the index since you dont search this field. doc.add(new Field("type", "Parent", StringField.TYPE_NOT_STORED)); group.add(doc); writer.addDocuments(group); group.clear(); previous = line[0]; } anchors = line[2].split(";"); for (String s : anchors) { doc = new Document(); doc.add(new Field("anchorN", s, TextField.TYPE_STORED)); group.add(doc); } } writer.prepareCommit(); writer.commit(); writer.close(); } catch (IOException ex) { System.out.println("Creating Blockindex failed :" + ex.getMessage()); return "Creating Blockindex failed :" + ex.getMessage(); } return "done"; }