Java tutorial
/** * Copyright (C) 2001-2019 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.search; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Collection; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import org.apache.commons.io.FileUtils; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.rapidminer.search.event.GlobalSearchManagerListener; import com.rapidminer.search.event.GlobalSearchRegistryEvent; import com.rapidminer.tools.FileSystemService; import com.rapidminer.tools.LogService; /** * Takes care of preparing the Global Search index. * * @author Marco Boeck * @since 8.1 */ public enum GlobalSearchIndexer { INSTANCE; private Path indexDirectoryPath; private final ExecutorService pool = Executors.newFixedThreadPool(2); private volatile boolean setupError = false; private final AtomicBoolean initialized = new AtomicBoolean(false); private IndexWriter indexWriter; /** this listener makes sure that updates to each GlobalSearchable are reflected in the index */ private final GlobalSearchManagerListener searchManagerListener = new GlobalSearchManagerListener() { @Override public void documentsAdded(final String categoryId, final Collection<Document> addedDocuments) { GlobalSearchCategory category = GlobalSearchRegistry.INSTANCE.getSearchCategoryById(categoryId); if (category != null) { pool.submit(() -> addDocuments(category, addedDocuments)); } } @Override public void documentsUpdated(final String categoryId, final Collection<Document> updatedDocuments) { GlobalSearchCategory category = GlobalSearchRegistry.INSTANCE.getSearchCategoryById(categoryId); if (category != null) { pool.submit(() -> updateDocuments(category, updatedDocuments)); } } @Override public void documentsRemoved(final String categoryId, final Collection<Document> removedDocuments) { GlobalSearchCategory category = GlobalSearchRegistry.INSTANCE.getSearchCategoryById(categoryId); if (category != null) { pool.submit(() -> removeDocuments(category, removedDocuments)); } } }; /** * Sets up the search indexer instance. Registers a listener to the {@link GlobalSearchRegistry} to be able to add/remove * documents to/from the index in case search categories are (un)registered. */ GlobalSearchIndexer() { try { indexDirectoryPath = FileSystemService.getUserRapidMinerDir().toPath() .resolve(FileSystemService.RAPIDMINER_INTERNAL_CACHE_SEARCH_FULL); // set up of Lucene is done in initialize() } catch (Exception e) { setupError = true; LogService.getRoot().log(Level.SEVERE, "com.rapidminer.global_search.searchindexer.setup_failed", e); } } /** * Initializes the {@link GlobalSearchIndexer}. Calling multiple times has no effect. */ public void initialize() { if (!initialized.get()) { if (setupError) { // should not happen at this point, but better be safe return; } // create the single index writer try { indexWriter = createIndexWriter(); } catch (Exception e) { // could not open the index. Try deleting the cache LogService.getRoot().log(Level.INFO, "com.rapidminer.global_search.searchindexer.setup_self_fix_start", e); boolean fixed = fixIndexCacheFolder(); if (fixed) { LogService.getRoot().log(Level.INFO, "com.rapidminer.global_search.searchindexer.setup_self_fix_success"); } else { // nothing worked, cannot setup Global Search setupError = true; LogService.getRoot().log(Level.SEVERE, "com.rapidminer.global_search.searchindexer.setup_failed", e); return; } } // add registry listener. If there is a setup error, we do not even get here GlobalSearchRegistry.INSTANCE .addEventListener((GlobalSearchRegistryEvent e, GlobalSearchCategory category) -> { // new registrations after initial indexing has been started/done if (e.getEventType() == GlobalSearchRegistryEvent.RegistrationEvent.SEARCH_CATEGORY_REGISTERED) { category.getManager().getSearchManagerEventHandler() .addEventListener(searchManagerListener); } else if (e .getEventType() == GlobalSearchRegistryEvent.RegistrationEvent.SEARCH_CATEGORY_UNREGISTERED) { category.getManager().getSearchManagerEventHandler() .removeEventListener(searchManagerListener); removeCategory(category); } }); initialized.set(true); } } /** * Returns whether the GlobalSearchIndexer was setup successfully. * * @return {@code true} if everything is ready; {@code false} otherwise */ public boolean isInitialized() { return !setupError && initialized.get(); } /** * Removes all documents of a search category from the index. * * @param category * the category for which all documents should be removed */ private void removeCategory(final GlobalSearchCategory category) { try { Term categoryToDeleteTerm = new Term(GlobalSearchUtilities.FIELD_CATEGORY, category.getCategoryId()); indexWriter.deleteDocuments(categoryToDeleteTerm); } catch (IOException e) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.global_search.searchindexer.remove_failed", new Object[] { category.getCategoryId(), e.getMessage() }); } } /** * Add the given documents to the index. * * @param category * the origin of the search documents * @param documents * the documents to add to the index */ private void addDocuments(final GlobalSearchCategory category, final Collection<Document> documents) { for (Document doc : documents) { // make sure doc has necessary fields if (!isDocValid(category.getCategoryId(), doc)) { continue; } // store category id to make searching only for specific categories possible doc.add(GlobalSearchUtilities.INSTANCE.createFieldForIdentifiers(GlobalSearchUtilities.FIELD_CATEGORY, category.getCategoryId())); doc.add(GlobalSearchUtilities.INSTANCE.createFieldForIdentifiers( GlobalSearchHandler.FIELD_INTERNAL_UNIQUE_ID, createInternalId(category.getCategoryId(), doc))); } try { indexWriter.addDocuments(documents); } catch (Exception e) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.global_search.searchindexer.add_failed", new Object[] { category.getCategoryId(), e.getMessage() }); } } /** * Updates the documents for the given search category. Call this method if new documents/updated documents should * be made available to the Global Search. * * @param category * the search category. Must already be registered to the {@link GlobalSearchRegistry}. * @param documents * the documents to update on the index * @throws IllegalStateException * if the search category is not registered to the {@link GlobalSearchRegistry} */ private void updateDocuments(final GlobalSearchCategory category, final Collection<Document> documents) { for (Document doc : documents) { // make sure doc has necessary fields if (!isDocValid(category.getCategoryId(), doc)) { continue; } // store category id to make searching only for specific categories possible doc.add(GlobalSearchUtilities.INSTANCE.createFieldForIdentifiers(GlobalSearchUtilities.FIELD_CATEGORY, category.getCategoryId())); doc.add(GlobalSearchUtilities.INSTANCE.createFieldForIdentifiers( GlobalSearchHandler.FIELD_INTERNAL_UNIQUE_ID, createInternalId(category.getCategoryId(), doc))); IndexableField field = doc.getField(GlobalSearchHandler.FIELD_INTERNAL_UNIQUE_ID); Term termToUpdate = new Term(field.name(), field.stringValue()); try { indexWriter.updateDocument(termToUpdate, doc); } catch (Exception e) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.global_search.searchindexer.update_failed", new Object[] { category.getCategoryId(), e.getMessage() }); } } } /** * Removes all documents for the given search category from the index. * * @param category * the origin of the search documents * @param documents * the documents to remove from the index */ private void removeDocuments(final GlobalSearchCategory category, final Collection<Document> documents) { Term[] termsToDelete = new Term[documents.size()]; int index = 0; // these docs will likely not have the internal unique id set -> set it if needed for (Document doc : documents) { IndexableField field = doc.getField(GlobalSearchHandler.FIELD_INTERNAL_UNIQUE_ID); if (field != null) { termsToDelete[index++] = new Term(GlobalSearchHandler.FIELD_INTERNAL_UNIQUE_ID, field.stringValue()); } else { termsToDelete[index++] = new Term(GlobalSearchHandler.FIELD_INTERNAL_UNIQUE_ID, createInternalId(category.getCategoryId(), doc)); } } try { indexWriter.deleteDocuments(termsToDelete); } catch (Exception e) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.global_search.searchindexer.remove_failed", new Object[] { category.getCategoryId(), e.getMessage() }); } } /** * Create the internal, application-unique id. * * @param categoryId * the category id of the document * @param document * the document for which to generate the id * @return the unique id */ private String createInternalId(final String categoryId, final Document document) { String uniqueId = document.getField(GlobalSearchUtilities.FIELD_UNIQUE_ID).stringValue(); return categoryId + "_" + uniqueId; } /** * Checks if the given {@link Document} adheres to the standards the Global Search needs. Logs if it is not. * * @param doc * the document to check * @return {@code true} if the document is considered valid; {@code false} otherwise */ private boolean isDocValid(final String categoryId, final Document doc) { if (doc.getField(GlobalSearchUtilities.FIELD_UNIQUE_ID) == null) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.global_search.searchindexer.discarded_document_missing_field", new Object[] { categoryId, GlobalSearchUtilities.FIELD_UNIQUE_ID }); return false; } if (doc.getField(GlobalSearchUtilities.FIELD_NAME) == null) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.global_search.searchindexer.discarded_document_missing_field", new Object[] { categoryId, GlobalSearchUtilities.FIELD_NAME }); return false; } return true; } /** * Tries to fix the index folder by deleting it and all its content. * @return {@code true} if the fix was successful, {@code false} otherwise */ private boolean fixIndexCacheFolder() { if (indexDirectoryPath != null && Files.exists(indexDirectoryPath)) { try { if (Files.isDirectory(indexDirectoryPath)) { FileUtils.deleteDirectory(indexDirectoryPath.toFile()); } else { // this should not happen, but in case it does, delete the file Files.delete(indexDirectoryPath); } // a fix was applied, now try if we can use the index indexWriter = createIndexWriter(); // all good, we fixed it! return true; } catch (Exception e) { LogService.getRoot().log(Level.SEVERE, "com.rapidminer.global_search.searchindexer.setup_self_fix_failed", e); } } return false; } /** * Creates an instance of {@link IndexWriter}. * * @return the writer, never {@code null} * @throws IOException * if something goes wrong */ private IndexWriter createIndexWriter() throws IOException { Directory dir = FSDirectory.open(indexDirectoryPath); IndexWriterConfig config = new IndexWriterConfig(GlobalSearchUtilities.ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); return new IndexWriter(dir, config); } /** * Creates an instance of {@link IndexReader}. * * @return the reader, never {@code null} * @throws IOException * if something goes wrong */ protected IndexReader createIndexReader() throws IOException { return DirectoryReader.open(indexWriter, true, false); } }