Example usage for org.apache.lucene.index IndexWriterConfig setSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setSimilarity.

Prototype

public IndexWriterConfig setSimilarity(Similarity similarity)

Source Link

Document

Expert: set the Similarity implementation used by this IndexWriter.

Usage

From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java

License:Apache License

/**
 * Instantiates a new lucene indexer.//  w w  w. java2  s .  com
 *
 * @param indexName the index name
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected LuceneIndexer(String indexName) throws IOException {
    try {
        this.indexName = indexName;
        this.luceneWriterService = LookupService.getService(WorkExecutors.class).getIOExecutor();
        this.luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1,
                new NamedThreadFactory(indexName + " Lucene future checker", false));

        final Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath();
        final File luceneRootFolder = new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER);

        luceneRootFolder.mkdirs();
        this.indexFolder = new File(luceneRootFolder, indexName);

        if (!this.indexFolder.exists()) {
            this.databaseValidity = DatabaseValidity.MISSING_DIRECTORY;
            LOG.info("Index folder missing: " + this.indexFolder.getAbsolutePath());
        } else if (this.indexFolder.list().length > 0) {
            this.databaseValidity = DatabaseValidity.POPULATED_DIRECTORY;
        }

        this.indexFolder.mkdirs();
        LOG.info("Index: " + this.indexFolder.getAbsolutePath());

        final MMapDirectory indexDirectory = new MMapDirectory(this.indexFolder.toPath()); // switch over to MMapDirectory - in theory - this gives us back some
        // room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge
        // performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because
        // the default value of SimpleFSDirectory is a huge bottleneck.

        final IndexWriterConfig config = new IndexWriterConfig(new PerFieldAnalyzer());

        config.setRAMBufferSizeMB(256);

        final MergePolicy mergePolicy = new LogByteSizeMergePolicy();

        config.setMergePolicy(mergePolicy);
        config.setSimilarity(new ShortTextSimilarity());

        this.indexWriter = new IndexWriter(indexDirectory, config);

        final boolean applyAllDeletes = false;
        final boolean writeAllDeletes = false;

        this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, null);

        // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into
        // account the changes made to the index and tracked by the TrackingIndexWriter instance
        // The index is refreshed every 60sc when nobody is waiting
        // and every 100 millis whenever is someone waiting (see search method)
        // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html)
        this.reopenThread = new ControlledRealTimeReopenThread<>(this.indexWriter, this.searcherManager, 60.00,
                0.1);
        this.startThread();

        // Register for commits:
        LOG.info("Registering indexer " + indexName + " for commits");
        this.changeListenerRef = new ChronologyChangeListener() {
            @Override
            public void handleCommit(CommitRecord commitRecord) {
                if (LuceneIndexer.this.dbBuildMode == null) {
                    LuceneIndexer.this.dbBuildMode = Get.configurationService().inDBBuildMode();
                }

                if (LuceneIndexer.this.dbBuildMode) {
                    LOG.debug("Ignore commit due to db build mode");
                    return;
                }

                final int size = commitRecord.getSemanticNidsInCommit().size();

                if (size < 100) {
                    LOG.info("submitting semantic elements " + commitRecord.getSemanticNidsInCommit().toString()
                            + " to indexer " + getIndexerName() + " due to commit");
                } else {
                    LOG.info("submitting " + size + " semantic elements to indexer " + getIndexerName()
                            + " due to commit");
                }

                commitRecord.getSemanticNidsInCommit().stream().forEach(sememeId -> {
                    final SemanticChronology sc = Get.assemblageService().getSemanticChronology(sememeId);

                    index(sc);
                });
                LOG.info("Completed index of " + size + " semantics for " + getIndexerName());
            }

            @Override
            public void handleChange(SemanticChronology sc) {
                // noop
            }

            @Override
            public void handleChange(ConceptChronology cc) {
                // noop
            }

            @Override
            public UUID getListenerUuid() {
                return UuidT5Generator.get(getIndexerName());
            }
        };
        Get.commitService().addChangeListener(this.changeListenerRef);
    } catch (final IOException e) {
        LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e);
        throw e;
    }
}

From source file:Util.Index_Handler.java

/**
 * creates an index for the abstracts.//from  w w w.  j a  va2  s  .c o  m
 * @return status of operation
 */
public static String createAbstract_Index() {
    try {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
        Directory dir = FSDirectory.open(new File("Abstract_Index"));
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, analyzer);
        conf.setSimilarity(new CustomSimilarity());
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = new IndexWriter(dir, conf);
        Scanner scan = new Scanner(Paths.get("abstract_clean.txt"));
        String[] line;
        Document doc;
        while (scan.hasNextLine()) {
            doc = new Document();
            line = scan.nextLine().split("\\|");
            doc.add(new Field("entity", line[0], TextField.TYPE_STORED));
            doc.add(new Field("abstract", line[1], TextField.TYPE_STORED));
            writer.addDocument(doc);

        }
        writer.prepareCommit();
        writer.commit();
        writer.close();
        scan.close();
    } catch (IOException ex) {
        System.out.println("Failed to creat Index for Abstracts:" + ex.getMessage());
        return "Failed to creat Index for Abstracts:" + ex.getMessage();
    }
    return "done";
}

From source file:Util.Index_Handler.java

/**
 * Creates a block index . Is necessary for blockjoinquerys.
 * @return status of operation//from w ww .j a  v a2  s.c  o m
 */
public static String createBlockIndex() {
    try {
        Scanner scan = new Scanner(Paths.get("combined.txt"));
        Scanner scan2 = new Scanner(Paths.get("entity_anchors.txt"));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
        Directory dir = FSDirectory.open(new File("Entity_Index"));
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, analyzer);
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        conf.setSimilarity(new CustomSimilarity());
        IndexWriter writer = new IndexWriter(dir, conf);
        String[] line;//=scan.nextLine().split(" ");
        ArrayList<Document> group = new ArrayList<>();
        Map<String, String> anchorMap = new HashMap<>();
        Document doc;

        String[] anchors;
        //load anchors into ram
        while (scan2.hasNext()) {
            line = scan2.nextLine().split("\\|");
            anchorMap.put(line[0], line[1]);
        }
        line = scan.nextLine().split("\\|");
        String previous = line[0];
        anchors = line[2].split(";");
        for (String s : anchors) {
            doc = new Document();
            doc.add(new Field("anchorN", s, TextField.TYPE_STORED));
            group.add(doc);
        }
        while (scan.hasNext()) {
            line = scan.nextLine().split("\\|");
            if (line.length == 0) {
                System.out.println("found end");
                doc = new Document();
                doc.add(new Field("entity", line[0], TextField.TYPE_STORED));
                doc.add(new Field("anchor", anchorDeli(anchorMap.get(line[0])), TextField.TYPE_STORED));
                doc.add(new Field("titel", delimeterString(getEntity(line[0])), TextField.TYPE_STORED));
                //this is the marker for the parent field.
                // it needs to be a stringfield so only an exact match will hit
                // and does not need to be in the index since you dont search this field.
                doc.add(new Field("type", "Parent", StringField.TYPE_NOT_STORED));
                group.add(doc);
                writer.addDocuments(group);
                group.clear();
                break;
            }
            if (!line[0].equals(previous)) {
                doc = new Document();
                doc.add(new Field("entity", line[0], TextField.TYPE_STORED));
                doc.add(new Field("anchor", anchorDeli(anchorMap.get(line[0])), TextField.TYPE_STORED));
                doc.add(new Field("title", delimeterString(getEntity(line[0])), TextField.TYPE_STORED));
                //this is the marker for the parent field.
                // it needs to be a stringfield so only an exact match will hit
                // and does not need to be in the index since you dont search this field.
                doc.add(new Field("type", "Parent", StringField.TYPE_NOT_STORED));
                group.add(doc);
                writer.addDocuments(group);
                group.clear();
                previous = line[0];
            }
            anchors = line[2].split(";");
            for (String s : anchors) {
                doc = new Document();
                doc.add(new Field("anchorN", s, TextField.TYPE_STORED));
                group.add(doc);
            }
        }
        writer.prepareCommit();
        writer.commit();
        writer.close();
    } catch (IOException ex) {
        System.out.println("Creating Blockindex failed :" + ex.getMessage());
        return "Creating Blockindex failed :" + ex.getMessage();
    }
    return "done";
}