Example usage for org.apache.lucene.index IndexWriter setLiveCommitData

List of usage examples for org.apache.lucene.index IndexWriter setLiveCommitData

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter setLiveCommitData.

Prototype

public final synchronized void setLiveCommitData(Iterable<Map.Entry<String, String>> commitUserData) 

Source Link

Document

Sets the iterator to provide the commit user data map at commit time.

Usage

From source file:org.elasticsearch.index.engine.EngineDiskUtils.java

License:Apache License

private static void updateCommitData(IndexWriter writer, Map<String, String> keysToUpdate) throws IOException {
    final Map<String, String> userData = getUserData(writer);
    userData.putAll(keysToUpdate);//from  www.ja v a  2 s .  c  o  m
    writer.setLiveCommitData(userData.entrySet());
    writer.commit();
}

From source file:perf.Indexer.java

License:Apache License

private static void _main(String[] clArgs) throws Exception {

    Args args = new Args(clArgs);

    // EG: -facets Date -facets characterCount ...
    FacetsConfig facetsConfig = new FacetsConfig();
    facetsConfig.setHierarchical("Date", true);
    final Set<String> facetFields = new HashSet<String>();
    if (args.hasArg("-facets")) {
        for (String arg : args.getStrings("-facets")) {
            facetFields.add(arg);/*from  www.  ja v a 2  s.com*/
        }
    }

    final String dirImpl = args.getString("-dirImpl");
    final String dirPath = args.getString("-indexPath") + "/index";

    final Directory dir;
    OpenDirectory od = OpenDirectory.get(dirImpl);

    dir = od.open(Paths.get(dirPath));

    final String analyzer = args.getString("-analyzer");
    final Analyzer a;
    if (analyzer.equals("EnglishAnalyzer")) {
        a = new EnglishAnalyzer();
    } else if (analyzer.equals("StandardAnalyzer")) {
        a = new StandardAnalyzer();
    } else if (analyzer.equals("StandardAnalyzerNoStopWords")) {
        a = new StandardAnalyzer(CharArraySet.EMPTY_SET);
    } else if (analyzer.equals("ShingleStandardAnalyzer")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(), 2, 2);
    } else if (analyzer.equals("ShingleStandardAnalyzerNoStopWords")) {
        a = new ShingleAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), 2, 2);
    } else {
        throw new RuntimeException("unknown analyzer " + analyzer);
    }

    final String lineFile = args.getString("-lineDocsFile");

    // -1 means all docs in the line file:
    final int docCountLimit = args.getInt("-docCountLimit");
    final int numThreads = args.getInt("-threadCount");

    final boolean doForceMerge = args.getFlag("-forceMerge");
    final boolean verbose = args.getFlag("-verbose");

    String indexSortField = null;
    SortField.Type indexSortType = null;

    if (args.hasArg("-indexSort")) {
        indexSortField = args.getString("-indexSort");

        int i = indexSortField.indexOf(':');
        if (i == -1) {
            throw new IllegalArgumentException(
                    "-indexSort should have form field:type; got: " + indexSortField);
        }
        String typeString = indexSortField.substring(i + 1, indexSortField.length());
        if (typeString.equals("long")) {
            indexSortType = SortField.Type.LONG;
        } else if (typeString.equals("string")) {
            indexSortType = SortField.Type.STRING;
        } else {
            throw new IllegalArgumentException("-indexSort can only handle 'long' sort; got: " + typeString);
        }
        indexSortField = indexSortField.substring(0, i);
    } else {
        indexSortType = null;
    }

    final double ramBufferSizeMB = args.getDouble("-ramBufferMB");
    final int maxBufferedDocs = args.getInt("-maxBufferedDocs");

    final String defaultPostingsFormat = args.getString("-postingsFormat");
    final boolean doDeletions = args.getFlag("-deletions");
    final boolean printDPS = args.getFlag("-printDPS");
    final boolean waitForMerges = args.getFlag("-waitForMerges");
    final boolean waitForCommit = args.getFlag("-waitForCommit");
    final String mergePolicy = args.getString("-mergePolicy");
    final Mode mode;
    final boolean doUpdate = args.getFlag("-update");
    if (doUpdate) {
        mode = Mode.UPDATE;
    } else {
        mode = Mode.valueOf(args.getString("-mode", "add").toUpperCase(Locale.ROOT));
    }
    int randomDocIDMax;
    if (mode == Mode.UPDATE) {
        randomDocIDMax = args.getInt("-randomDocIDMax");
    } else {
        randomDocIDMax = -1;
    }
    final String idFieldPostingsFormat = args.getString("-idFieldPostingsFormat");
    final boolean addGroupingFields = args.getFlag("-grouping");
    final boolean useCFS = args.getFlag("-cfs");
    final boolean storeBody = args.getFlag("-store");
    final boolean tvsBody = args.getFlag("-tvs");
    final boolean bodyPostingsOffsets = args.getFlag("-bodyPostingsOffsets");
    final int maxConcurrentMerges = args.getInt("-maxConcurrentMerges");
    final boolean addDVFields = args.getFlag("-dvfields");
    final boolean doRandomCommit = args.getFlag("-randomCommit");
    final boolean useCMS = args.getFlag("-useCMS");
    final boolean disableIOThrottle = args.getFlag("-disableIOThrottle");

    if (waitForCommit == false && waitForMerges) {
        throw new RuntimeException("pass -waitForCommit if you pass -waitForMerges");
    }

    if (waitForCommit == false && doForceMerge) {
        throw new RuntimeException("pass -waitForCommit if you pass -forceMerge");
    }

    if (waitForCommit == false && doDeletions) {
        throw new RuntimeException("pass -waitForCommit if you pass -deletions");
    }

    if (useCMS == false && disableIOThrottle) {
        throw new RuntimeException("-disableIOThrottle only makes sense with -useCMS");
    }

    final double nrtEverySec;
    if (args.hasArg("-nrtEverySec")) {
        nrtEverySec = args.getDouble("-nrtEverySec");
    } else {
        nrtEverySec = -1.0;
    }

    // True to start back at the beginning if we run out of
    // docs from the line file source:
    final boolean repeatDocs = args.getFlag("-repeatDocs");

    final String facetDVFormatName;
    if (facetFields.isEmpty()) {
        facetDVFormatName = "Lucene54";
    } else {
        facetDVFormatName = args.getString("-facetDVFormat");
    }

    if (addGroupingFields && docCountLimit == -1) {
        a.close();
        throw new RuntimeException("cannot add grouping fields unless docCount is set");
    }

    args.check();

    System.out.println("Dir: " + dirImpl);
    System.out.println("Index path: " + dirPath);
    System.out.println("Analyzer: " + analyzer);
    System.out.println("Line file: " + lineFile);
    System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit));
    System.out.println("Threads: " + numThreads);
    System.out.println("Force merge: " + (doForceMerge ? "yes" : "no"));
    System.out.println("Verbose: " + (verbose ? "yes" : "no"));
    System.out.println("RAM Buffer MB: " + ramBufferSizeMB);
    System.out.println("Max buffered docs: " + maxBufferedDocs);
    System.out.println("Default postings format: " + defaultPostingsFormat);
    System.out.println("Do deletions: " + (doDeletions ? "yes" : "no"));
    System.out.println("Wait for merges: " + (waitForMerges ? "yes" : "no"));
    System.out.println("Wait for commit: " + (waitForCommit ? "yes" : "no"));
    System.out.println("IO throttle: " + (disableIOThrottle ? "no" : "yes"));
    System.out.println("Merge policy: " + mergePolicy);
    System.out.println("Mode: " + mode);
    if (mode == Mode.UPDATE) {
        System.out.println("DocIDMax: " + randomDocIDMax);
    }
    System.out.println("ID field postings format: " + idFieldPostingsFormat);
    System.out.println("Add grouping fields: " + (addGroupingFields ? "yes" : "no"));
    System.out.println("Compound file format: " + (useCFS ? "yes" : "no"));
    System.out.println("Store body field: " + (storeBody ? "yes" : "no"));
    System.out.println("Term vectors for body field: " + (tvsBody ? "yes" : "no"));
    System.out.println("Facet DV Format: " + facetDVFormatName);
    System.out.println("Facet fields: " + facetFields);
    System.out.println("Body postings offsets: " + (bodyPostingsOffsets ? "yes" : "no"));
    System.out.println("Max concurrent merges: " + maxConcurrentMerges);
    System.out.println("Add DocValues fields: " + addDVFields);
    System.out.println("Use ConcurrentMergeScheduler: " + useCMS);
    if (nrtEverySec > 0.0) {
        System.out.println("Open & close NRT reader every: " + nrtEverySec + " sec");
    } else {
        System.out.println("Open & close NRT reader every: never");
    }
    System.out.println("Repeat docs: " + repeatDocs);

    if (verbose) {
        InfoStream.setDefault(new PrintStreamInfoStream(System.out));
    }

    final IndexWriterConfig iwc = new IndexWriterConfig(a);

    if (indexSortField != null) {
        iwc.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
    }

    if (mode == Mode.UPDATE) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    iwc.setMaxBufferedDocs(maxBufferedDocs);
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

    // So flushed segments do/don't use CFS:
    iwc.setUseCompoundFile(useCFS);

    final AtomicBoolean indexingFailed = new AtomicBoolean();

    iwc.setMergeScheduler(getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
    iwc.setMergePolicy(getMergePolicy(mergePolicy, useCFS));

    // Keep all commit points:
    if (doDeletions || doForceMerge) {
        iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE);
    }

    final Codec codec = new Lucene62Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return PostingsFormat.forName(field.equals("id") ? idFieldPostingsFormat : defaultPostingsFormat);
        }

        private final DocValuesFormat facetsDVFormat = DocValuesFormat.forName(facetDVFormatName);
        //private final DocValuesFormat lucene42DVFormat = DocValuesFormat.forName("Lucene42");
        //private final DocValuesFormat diskDVFormat = DocValuesFormat.forName("Disk");
        //        private final DocValuesFormat lucene45DVFormat = DocValuesFormat.forName("Lucene45");
        private final DocValuesFormat directDVFormat = DocValuesFormat.forName("Direct");

        @Override
        public DocValuesFormat getDocValuesFormatForField(String field) {
            if (facetFields.contains(field) || field.equals("$facets")) {
                return facetsDVFormat;
                //} else if (field.equals("$facets_sorted_doc_values")) {
                //return diskDVFormat;
            } else {
                // Use default DVFormat for all else:
                // System.out.println("DV: field=" + field + " format=" + super.getDocValuesFormatForField(field));
                return super.getDocValuesFormatForField(field);
            }
        }
    };

    iwc.setCodec(codec);

    System.out.println("IW config=" + iwc);

    IndexWriter w = new IndexWriter(dir, iwc);

    System.out.println("Index has " + w.maxDoc() + " docs");

    final TaxonomyWriter taxoWriter;
    if (facetFields.isEmpty() == false) {
        taxoWriter = new DirectoryTaxonomyWriter(od.open(Paths.get(args.getString("-indexPath"), "facets")),
                IndexWriterConfig.OpenMode.CREATE);
    } else {
        taxoWriter = null;
    }

    // Fixed seed so group field values are always consistent:
    final Random random = new Random(17);

    LineFileDocs lineFileDocs = new LineFileDocs(lineFile, repeatDocs, storeBody, tvsBody, bodyPostingsOffsets,
            false, taxoWriter, facetFields, facetsConfig, addDVFields);

    float docsPerSecPerThread = -1f;
    //float docsPerSecPerThread = 100f;

    IndexThreads threads = new IndexThreads(random, w, indexingFailed, lineFileDocs, numThreads, docCountLimit,
            addGroupingFields, printDPS, mode, docsPerSecPerThread, null, nrtEverySec, randomDocIDMax);

    System.out.println("\nIndexer: start");
    final long t0 = System.currentTimeMillis();

    threads.start();

    while (!threads.done() && indexingFailed.get() == false) {
        Thread.sleep(100);

        // Commits once per minute on average:
        if (doRandomCommit && random.nextInt(600) == 17) {
            System.out.println("Indexer: now commit");
            long commitStartNS = System.nanoTime();
            w.commit();
            System.out.println(String.format(Locale.ROOT, "Indexer: commit took %.1f msec",
                    (System.nanoTime() - commitStartNS) / 1000000.));
        }
    }

    threads.stop();

    final long t1 = System.currentTimeMillis();
    System.out.println("\nIndexer: indexing done (" + (t1 - t0) + " msec); total " + w.maxDoc() + " docs");
    // if we update we can not tell how many docs
    if (threads.failed.get()) {
        throw new RuntimeException("exceptions during indexing");
    }
    if (mode != Mode.UPDATE && docCountLimit != -1 && w.maxDoc() != docCountLimit) {
        throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit);
    }

    final Map<String, String> commitData = new HashMap<String, String>();

    if (waitForMerges) {
        w.close();
        IndexWriterConfig iwc2 = new IndexWriterConfig(a);
        iwc2.setMergeScheduler(
                getMergeScheduler(indexingFailed, useCMS, maxConcurrentMerges, disableIOThrottle));
        iwc2.setMergePolicy(getMergePolicy(mergePolicy, useCFS));
        iwc2.setCodec(codec);
        iwc2.setUseCompoundFile(useCFS);
        iwc2.setMaxBufferedDocs(maxBufferedDocs);
        iwc2.setRAMBufferSizeMB(ramBufferSizeMB);
        if (indexSortField != null) {
            iwc2.setIndexSort(new Sort(new SortField(indexSortField, indexSortType)));
        }

        w = new IndexWriter(dir, iwc2);
        long t2 = System.currentTimeMillis();
        System.out.println("\nIndexer: waitForMerges done (" + (t2 - t1) + " msec)");
    }

    if (waitForCommit) {
        commitData.put("userData", "multi");
        w.setLiveCommitData(commitData.entrySet());
        long t2 = System.currentTimeMillis();
        w.commit();
        long t3 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit multi (took " + (t3 - t2) + " msec)");
    } else {
        w.rollback();
        w = null;
    }

    if (doForceMerge) {
        long forceMergeStartMSec = System.currentTimeMillis();
        w.forceMerge(1);
        long forceMergeEndMSec = System.currentTimeMillis();
        System.out.println(
                "\nIndexer: force merge done (took " + (forceMergeEndMSec - forceMergeStartMSec) + " msec)");

        commitData.put("userData", "single");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t5 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit single done (took " + (t5 - forceMergeEndMSec) + " msec)");
    }

    if (doDeletions) {
        final long t5 = System.currentTimeMillis();
        // Randomly delete 5% of the docs
        final Set<Integer> deleted = new HashSet<Integer>();
        final int maxDoc = w.maxDoc();
        final int toDeleteCount = (int) (maxDoc * 0.05);
        System.out.println("\nIndexer: delete " + toDeleteCount + " docs");
        while (deleted.size() < toDeleteCount) {
            final int id = random.nextInt(maxDoc);
            if (!deleted.contains(id)) {
                deleted.add(id);
                w.deleteDocuments(new Term("id", LineFileDocs.intToID(id)));
            }
        }
        final long t6 = System.currentTimeMillis();
        System.out.println("\nIndexer: deletes done (took " + (t6 - t5) + " msec)");

        commitData.put("userData", doForceMerge ? "delsingle" : "delmulti");
        w.setLiveCommitData(commitData.entrySet());
        w.commit();
        final long t7 = System.currentTimeMillis();
        System.out.println("\nIndexer: commit delmulti done (took " + (t7 - t6) + " msec)");

        if (doUpdate || w.numDocs() != maxDoc - toDeleteCount) {
            throw new RuntimeException(
                    "count mismatch: w.numDocs()=" + w.numDocs() + " but expected " + (maxDoc - toDeleteCount));
        }
    }

    if (taxoWriter != null) {
        System.out.println("Taxonomy has " + taxoWriter.getSize() + " ords");
        taxoWriter.commit();
        taxoWriter.close();
    }

    final long tCloseStart = System.currentTimeMillis();
    if (w != null) {
        w.close();
        w = null;
    }
    if (waitForCommit) {
        System.out.println("\nIndexer: at close: " + SegmentInfos.readLatestCommit(dir));
        System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) + " msec");
    }

    dir.close();
    final long tFinal = System.currentTimeMillis();
    System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed());

    final long indexingTime;
    if (waitForCommit) {
        indexingTime = tFinal - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec)");
    } else {
        indexingTime = t1 - t0;
        System.out.println("\nIndexer: finished (" + indexingTime + " msec), excluding commit");
    }
    System.out.println(
            "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / (indexingTime / 3600000.))
                    + " GB/hour plain text");
}