Example usage for org.apache.lucene.index IndexWriterConfig setSimilarity

List of usage examples for org.apache.lucene.index IndexWriterConfig setSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setSimilarity.

Prototype

public IndexWriterConfig setSimilarity(Similarity similarity) 

Source Link

Document

Expert: set the Similarity implementation used by this IndexWriter.

Usage

From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java

License:Apache License

/**
 * Instantiates a new lucene indexer.//  w w  w. java2  s .  com
 *
 * @param indexName the index name
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected LuceneIndexer(String indexName) throws IOException {
    try {
        this.indexName = indexName;
        this.luceneWriterService = LookupService.getService(WorkExecutors.class).getIOExecutor();
        this.luceneWriterFutureCheckerService = Executors.newFixedThreadPool(1,
                new NamedThreadFactory(indexName + " Lucene future checker", false));

        final Path searchFolder = LookupService.getService(ConfigurationService.class).getSearchFolderPath();
        final File luceneRootFolder = new File(searchFolder.toFile(), DEFAULT_LUCENE_FOLDER);

        luceneRootFolder.mkdirs();
        this.indexFolder = new File(luceneRootFolder, indexName);

        if (!this.indexFolder.exists()) {
            this.databaseValidity = DatabaseValidity.MISSING_DIRECTORY;
            LOG.info("Index folder missing: " + this.indexFolder.getAbsolutePath());
        } else if (this.indexFolder.list().length > 0) {
            this.databaseValidity = DatabaseValidity.POPULATED_DIRECTORY;
        }

        this.indexFolder.mkdirs();
        LOG.info("Index: " + this.indexFolder.getAbsolutePath());

        final MMapDirectory indexDirectory = new MMapDirectory(this.indexFolder.toPath()); // switch over to MMapDirectory - in theory - this gives us back some
        // room on the JDK stack, letting the OS directly manage the caching of the index files - and more importantly, gives us a huge
        // performance boost during any operation that tries to do multi-threaded reads of the index (like the SOLOR rules processing) because
        // the default value of SimpleFSDirectory is a huge bottleneck.

        final IndexWriterConfig config = new IndexWriterConfig(new PerFieldAnalyzer());

        config.setRAMBufferSizeMB(256);

        final MergePolicy mergePolicy = new LogByteSizeMergePolicy();

        config.setMergePolicy(mergePolicy);
        config.setSimilarity(new ShortTextSimilarity());

        this.indexWriter = new IndexWriter(indexDirectory, config);

        final boolean applyAllDeletes = false;
        final boolean writeAllDeletes = false;

        this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, null);

        // [3]: Create the ControlledRealTimeReopenThread that reopens the index periodically taking into
        // account the changes made to the index and tracked by the TrackingIndexWriter instance
        // The index is refreshed every 60sc when nobody is waiting
        // and every 100 millis whenever is someone waiting (see search method)
        // (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html)
        this.reopenThread = new ControlledRealTimeReopenThread<>(this.indexWriter, this.searcherManager, 60.00,
                0.1);
        this.startThread();

        // Register for commits:
        LOG.info("Registering indexer " + indexName + " for commits");
        this.changeListenerRef = new ChronologyChangeListener() {
            @Override
            public void handleCommit(CommitRecord commitRecord) {
                if (LuceneIndexer.this.dbBuildMode == null) {
                    LuceneIndexer.this.dbBuildMode = Get.configurationService().inDBBuildMode();
                }

                if (LuceneIndexer.this.dbBuildMode) {
                    LOG.debug("Ignore commit due to db build mode");
                    return;
                }

                final int size = commitRecord.getSemanticNidsInCommit().size();

                if (size < 100) {
                    LOG.info("submitting semantic elements " + commitRecord.getSemanticNidsInCommit().toString()
                            + " to indexer " + getIndexerName() + " due to commit");
                } else {
                    LOG.info("submitting " + size + " semantic elements to indexer " + getIndexerName()
                            + " due to commit");
                }

                commitRecord.getSemanticNidsInCommit().stream().forEach(sememeId -> {
                    final SemanticChronology sc = Get.assemblageService().getSemanticChronology(sememeId);

                    index(sc);
                });
                LOG.info("Completed index of " + size + " semantics for " + getIndexerName());
            }

            @Override
            public void handleChange(SemanticChronology sc) {
                // noop
            }

            @Override
            public void handleChange(ConceptChronology cc) {
                // noop
            }

            @Override
            public UUID getListenerUuid() {
                return UuidT5Generator.get(getIndexerName());
            }
        };
        Get.commitService().addChangeListener(this.changeListenerRef);
    } catch (final IOException e) {
        LookupService.getService(SystemStatusService.class).notifyServiceConfigurationFailure(indexName, e);
        throw e;
    }
}

From source file:Util.Index_Handler.java

/**
 * creates an index for the abstracts.//from  w w w.  j a  va2  s  .c o  m
 * @return status of operation
 */
public static String createAbstract_Index() {
    try {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
        Directory dir = FSDirectory.open(new File("Abstract_Index"));
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, analyzer);
        conf.setSimilarity(new CustomSimilarity());
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = new IndexWriter(dir, conf);
        Scanner scan = new Scanner(Paths.get("abstract_clean.txt"));
        String[] line;
        Document doc;
        while (scan.hasNextLine()) {
            doc = new Document();
            line = scan.nextLine().split("\\|");
            doc.add(new Field("entity", line[0], TextField.TYPE_STORED));
            doc.add(new Field("abstract", line[1], TextField.TYPE_STORED));
            writer.addDocument(doc);

        }
        writer.prepareCommit();
        writer.commit();
        writer.close();
        scan.close();
    } catch (IOException ex) {
        System.out.println("Failed to creat Index for Abstracts:" + ex.getMessage());
        return "Failed to creat Index for Abstracts:" + ex.getMessage();
    }
    return "done";
}

From source file:Util.Index_Handler.java

/**
 * Creates a block index . Is necessary for blockjoinquerys.
 * @return status of operation//from w ww .j a  v a2  s.c  o m
 */
public static String createBlockIndex() {
    try {
        Scanner scan = new Scanner(Paths.get("combined.txt"));
        Scanner scan2 = new Scanner(Paths.get("entity_anchors.txt"));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
        Directory dir = FSDirectory.open(new File("Entity_Index"));
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, analyzer);
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        conf.setSimilarity(new CustomSimilarity());
        IndexWriter writer = new IndexWriter(dir, conf);
        String[] line;//=scan.nextLine().split(" ");
        ArrayList<Document> group = new ArrayList<>();
        Map<String, String> anchorMap = new HashMap<>();
        Document doc;

        String[] anchors;
        //load anchors into ram
        while (scan2.hasNext()) {
            line = scan2.nextLine().split("\\|");
            anchorMap.put(line[0], line[1]);
        }
        line = scan.nextLine().split("\\|");
        String previous = line[0];
        anchors = line[2].split(";");
        for (String s : anchors) {
            doc = new Document();
            doc.add(new Field("anchorN", s, TextField.TYPE_STORED));
            group.add(doc);
        }
        while (scan.hasNext()) {
            line = scan.nextLine().split("\\|");
            if (line.length == 0) {
                System.out.println("found end");
                doc = new Document();
                doc.add(new Field("entity", line[0], TextField.TYPE_STORED));
                doc.add(new Field("anchor", anchorDeli(anchorMap.get(line[0])), TextField.TYPE_STORED));
                doc.add(new Field("titel", delimeterString(getEntity(line[0])), TextField.TYPE_STORED));
                //this is the marker for the parent field.
                // it needs to be a stringfield so only an exact match will hit
                // and does not need to be in the index since you dont search this field.
                doc.add(new Field("type", "Parent", StringField.TYPE_NOT_STORED));
                group.add(doc);
                writer.addDocuments(group);
                group.clear();
                break;
            }
            if (!line[0].equals(previous)) {
                doc = new Document();
                doc.add(new Field("entity", line[0], TextField.TYPE_STORED));
                doc.add(new Field("anchor", anchorDeli(anchorMap.get(line[0])), TextField.TYPE_STORED));
                doc.add(new Field("title", delimeterString(getEntity(line[0])), TextField.TYPE_STORED));
                //this is the marker for the parent field.
                // it needs to be a stringfield so only an exact match will hit
                // and does not need to be in the index since you dont search this field.
                doc.add(new Field("type", "Parent", StringField.TYPE_NOT_STORED));
                group.add(doc);
                writer.addDocuments(group);
                group.clear();
                previous = line[0];
            }
            anchors = line[2].split(";");
            for (String s : anchors) {
                doc = new Document();
                doc.add(new Field("anchorN", s, TextField.TYPE_STORED));
                group.add(doc);
            }
        }
        writer.prepareCommit();
        writer.commit();
        writer.close();
    } catch (IOException ex) {
        System.out.println("Creating Blockindex failed :" + ex.getMessage());
        return "Creating Blockindex failed :" + ex.getMessage();
    }
    return "done";
}