Example usage for org.apache.lucene.index IndexWriterConfig setInfoStream

List of usage examples for org.apache.lucene.index IndexWriterConfig setInfoStream

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setInfoStream.

Prototype

public IndexWriterConfig setInfoStream(PrintStream printStream) 

Source Link

Document

Convenience method that uses PrintStreamInfoStream .

Usage

From source file:org.elasticsearch.index.engine.InternalEngine.java

License:Apache License

private IndexWriter createWriter(boolean create) throws IOException {
    try {/*from   w  w  w.  j av  a2  s.  c o  m*/
        final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
        iwc.setCommitOnClose(false); // we by default don't commit on close
        iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
        iwc.setIndexDeletionPolicy(deletionPolicy);
        // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
        boolean verbose = false;
        try {
            verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
        } catch (Throwable ignore) {
        }
        iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
        iwc.setMergeScheduler(mergeScheduler);
        MergePolicy mergePolicy = config().getMergePolicy();
        // Give us the opportunity to upgrade old segments while performing
        // background merges
        mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
        iwc.setMergePolicy(mergePolicy);
        iwc.setSimilarity(engineConfig.getSimilarity());
        iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac());
        iwc.setCodec(engineConfig.getCodec());
        /* We set this timeout to a highish value to work around
         * the default poll interval in the Lucene lock that is
         * 1000ms by default. We might need to poll multiple times
         * here but with 1s poll this is only executed twice at most
         * in combination with the default writelock timeout*/
        iwc.setWriteLockTimeout(5000);
        iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush());
        // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end
        // of the merge operation and won't slow down _refresh
        iwc.setMergedSegmentWarmer(new IndexReaderWarmer() {
            @Override
            public void warm(LeafReader reader) throws IOException {
                try {
                    LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId);
                    assert isMergedSegment(esLeafReader);
                    if (warmer != null) {
                        final Engine.Searcher searcher = new Searcher("warmer",
                                searcherFactory.newSearcher(esLeafReader, null));
                        final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId,
                                searcher);
                        warmer.warmNewReaders(context);
                    }
                } catch (Throwable t) {
                    // Don't fail a merge if the warm-up failed
                    if (isClosed.get() == false) {
                        logger.warn("Warm-up failed", t);
                    }
                    if (t instanceof Error) {
                        // assertion/out-of-memory error, don't ignore those
                        throw (Error) t;
                    }
                }
            }
        });
        return new IndexWriter(store.directory(), iwc);
    } catch (LockObtainFailedException ex) {
        boolean isLocked = IndexWriter.isLocked(store.directory());
        logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked);
        throw ex;
    }
}

From source file:org.eu.bitzone.Leia.java

License:Apache License

/**
 * Optimize the index./*from   www . j a  va  2 s.co m*/
 */
public void optimize(final Object dialog) {
    final Thread t = new Thread() {

        @Override
        public void run() {
            IndexWriter iw = null;
            final Object optimizeButton = find(dialog, "optimizeButton");
            setBoolean(optimizeButton, "enabled", false);
            final Object closeButton = find(dialog, "closeButton");
            setBoolean(closeButton, "enabled", false);
            final Object msg = find(dialog, "msg");
            final Object stat = find(dialog, "stat");
            setString(stat, "text", "Running ...");
            final PanelPrintWriter ppw = new PanelPrintWriter(Leia.this, msg);
            final boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected");
            final boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected");
            final boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected");
            final boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected");
            final Object tiiSpin = find(dialog, "tii");
            final Object segnumSpin = find(dialog, "segnum");
            final int tii = Integer.parseInt(getString(tiiSpin, "text"));
            final int segnum = Integer.parseInt(getString(segnumSpin, "text"));
            try {
                if (is != null) {
                    is = null;
                }
                if (ir != null) {
                    ir.close();
                }
                if (ar != null) {
                    ar.close();
                }
                IndexDeletionPolicy policy;
                if (keep) {
                    policy = new KeepAllIndexDeletionPolicy();
                } else {
                    policy = new KeepLastIndexDeletionPolicy();
                }
                final IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV));
                if (!useLast) {
                    final IndexCommit ic = ((DirectoryReader) ir).getIndexCommit();
                    if (ic != null) {
                        cfg.setIndexCommit(ic);
                    }
                }
                cfg.setIndexDeletionPolicy(policy);
                cfg.setTermIndexInterval(tii);
                final MergePolicy p = cfg.getMergePolicy();
                cfg.setUseCompoundFile(useCompound);
                if (useCompound) {
                    p.setNoCFSRatio(1.0);
                }
                cfg.setInfoStream(ppw);
                iw = new IndexWriter(dir, cfg);
                final long startSize = Util.calcTotalFileSize(pName, dir);
                final long startTime = System.currentTimeMillis();
                if (expunge) {
                    iw.forceMergeDeletes();
                } else {
                    if (segnum > 1) {
                        iw.forceMerge(segnum, true);
                    } else {
                        iw.forceMerge(1, true);
                    }
                }
                iw.commit();
                final long endTime = System.currentTimeMillis();
                final long endSize = Util.calcTotalFileSize(pName, dir);
                final long deltaSize = startSize - endSize;
                final String sign = deltaSize < 0 ? " Increased " : " Reduced ";
                final String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize))
                        + Util.normalizeUnit(Math.abs(deltaSize));
                final String timeMsg = String.valueOf(endTime - startTime) + " ms";
                showStatus(sizeMsg + " in " + timeMsg);
                iw.close();
                setString(stat, "text", "Finished OK.");
            } catch (final Exception e) {
                e.printStackTrace(ppw);
                setString(stat, "text", "ERROR - aborted.");
                errorMsg("ERROR optimizing: " + e.toString());
                if (iw != null) {
                    try {
                        iw.close();
                    } catch (final Exception e1) {
                    }
                }
            } finally {
                setBoolean(closeButton, "enabled", true);
            }
            try {
                actionReopen();
                is = new IndexSearcher(ir);
                // add dialog again
                add(dialog);
            } catch (final Exception e) {
                e.printStackTrace(ppw);
                errorMsg("ERROR reopening after optimize:\n" + e.getMessage());
            }
        }
    };
    t.start();
}

From source file:org.getopt.luke.Luke.java

License:Apache License

/**
 * Optimize the index.//from  w  w  w.j av a  2s  .  c  o m
 */
public void optimize(final Object dialog) {
    Thread t = new Thread() {
        public void run() {
            IndexWriter iw = null;
            Object optimizeButton = find(dialog, "optimizeButton");
            setBoolean(optimizeButton, "enabled", false);
            Object closeButton = find(dialog, "closeButton");
            setBoolean(closeButton, "enabled", false);
            Object msg = find(dialog, "msg");
            Object stat = find(dialog, "stat");
            setString(stat, "text", "Running ...");
            PanelPrintWriter ppw = new PanelPrintWriter(Luke.this, msg);
            boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected");
            boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected");
            boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected");
            boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected");
            Object tiiSpin = find(dialog, "tii");
            Object segnumSpin = find(dialog, "segnum");
            int tii = Integer.parseInt(getString(tiiSpin, "text"));
            int segnum = Integer.parseInt(getString(segnumSpin, "text"));
            try {
                if (is != null)
                    is = null;
                if (ir != null)
                    ir.close();
                if (ar != null)
                    ar.close();
                IndexDeletionPolicy policy;
                if (keep) {
                    policy = new KeepAllIndexDeletionPolicy();
                } else {
                    policy = new KeepLastIndexDeletionPolicy();
                }
                IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV));
                if (!useLast) {
                    IndexCommit ic = ((DirectoryReader) ir).getIndexCommit();
                    if (ic != null) {
                        cfg.setIndexCommit(ic);
                    }
                }
                cfg.setIndexDeletionPolicy(policy);
                cfg.setTermIndexInterval(tii);
                cfg.setUseCompoundFile(useCompound);
                cfg.setInfoStream(ppw);
                iw = new IndexWriter(dir, cfg);
                long startSize = Util.calcTotalFileSize(pName, dir);
                long startTime = System.currentTimeMillis();
                if (expunge) {
                    iw.forceMergeDeletes();
                } else {
                    if (segnum > 1) {
                        iw.forceMerge(segnum, true);
                    } else {
                        iw.forceMerge(1, true);
                    }
                }
                iw.commit();
                long endTime = System.currentTimeMillis();
                long endSize = Util.calcTotalFileSize(pName, dir);
                long deltaSize = startSize - endSize;
                String sign = deltaSize < 0 ? " Increased " : " Reduced ";
                String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize))
                        + Util.normalizeUnit(Math.abs(deltaSize));
                String timeMsg = String.valueOf(endTime - startTime) + " ms";
                showStatus(sizeMsg + " in " + timeMsg);
                iw.close();
                setString(stat, "text", "Finished OK.");
            } catch (Exception e) {
                e.printStackTrace(ppw);
                setString(stat, "text", "ERROR - aborted.");
                errorMsg("ERROR optimizing: " + e.toString());
                if (iw != null)
                    try {
                        iw.close();
                    } catch (Exception e1) {
                    }
            } finally {
                setBoolean(closeButton, "enabled", true);
            }
            try {
                actionReopen();
                is = new IndexSearcher(ir);
                // add dialog again
                add(dialog);
            } catch (Exception e) {
                e.printStackTrace(ppw);
                errorMsg("ERROR reopening after optimize:\n" + e.getMessage());
            }
        }
    };
    t.start();
}

From source file:org.hibernate.search.test.util.logging.LoggerInfoStreamTest.java

License:LGPL

@Test
public void testEnableInfoStream() throws Exception {
    LoggerInfoStream infoStream = new LoggerInfoStream();

    RAMDirectory directory = new RAMDirectory();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer());
    indexWriterConfig.setInfoStream(infoStream);

    IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
    Document doc = new Document();
    doc.add(new StringField("f1", "value1", Field.Store.YES));

    indexWriter.addDocument(doc);// w  ww. j  av  a2 s  . c om
    indexWriter.commit();
    indexWriter.close();

    List<LoggingEvent> loggingEvents = testAppender
            .searchByLoggerAndMessage(LogCategory.INFOSTREAM_LOGGER_CATEGORY.getName(), "IW:");

    assertFalse(loggingEvents.isEmpty());
}

From source file:perf.AutoPrefixPerf.java

License:Apache License

public static void main(String[] args) throws Exception {
    String numbersFile = args[0];
    String queriesFile = args[1];
    Path indexPath = Paths.get(args[2]);

    int precStep = Integer.parseInt(args[3]);
    boolean useNumericField = (precStep != 0);
    int maxTermsInPrefix;
    int minTermsInPrefix;
    if (useNumericField == false) {
        minTermsInPrefix = Integer.parseInt(args[4]);
        maxTermsInPrefix = Integer.parseInt(args[5]);
    } else {//w w  w  . ja  v  a2  s .com
        minTermsInPrefix = 0;
        maxTermsInPrefix = 0;
    }

    BytesRefBuilder binaryToken = new BytesRefBuilder();
    binaryToken.grow(8);
    binaryToken.setLength(8);

    Directory dir = FSDirectory.open(indexPath);
    if (Files.notExists(indexPath) == false) {
        IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
        iwc.setMaxBufferedDocs(30000);
        iwc.setRAMBufferSizeMB(-1);
        iwc.setMergePolicy(new LogDocMergePolicy());

        final PostingsFormat pf;

        if (useNumericField) {
            // Disable auto-prefix when testing NumericField!
            if (minTermsInPrefix != 0) {
                throw new IllegalArgumentException("only precStep or minTermsInPrefix should be non-zero");
            }
            pf = new Lucene50PostingsFormat(25, 48, 0, 0);
        } else {
            /*
            if (minTermsInPrefix == 0) {
              throw new IllegalArgumentException("one of precStep or minTermsInPrefix must be non-zero");
            }
            */
            pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, maxTermsInPrefix);
            //pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, Integer.MAX_VALUE);
        }

        iwc.setCodec(new Lucene53Codec() {
            @Override
            public PostingsFormat getPostingsFormatForField(String field) {
                return pf;
            }
        });

        iwc.setInfoStream(new PrintStreamInfoStream(System.out));
        iwc.setMergeScheduler(new SerialMergeScheduler());

        //TieredMergePolicy tmp = (TieredMergePolicy) iwc.getMergePolicy();
        //tmp.setFloorSegmentMB(.1);
        //ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler();
        // More concurrency (for SSD)
        //cms.setMaxMergesAndThreads(5, 3);
        final IndexWriter w = new IndexWriter(dir, iwc);

        Document doc = new Document();
        Field field;
        if (useNumericField) {
            FieldType longFieldType = new FieldType(LongField.TYPE_NOT_STORED);
            longFieldType.setNumericPrecisionStep(precStep);
            longFieldType.freeze();
            field = new LongField("number", 0L, longFieldType);
            doc.add(field);
        } else {
            FieldType longFieldType = new FieldType(TextField.TYPE_NOT_STORED);
            longFieldType.setIndexOptions(IndexOptions.DOCS_ONLY);
            longFieldType.setOmitNorms(true);
            longFieldType.setIndexRanges(true);
            longFieldType.freeze();
            field = new Field("number", new BinaryTokenStream(binaryToken.get()), longFieldType);
            doc.add(field);
        }

        long startMS = System.currentTimeMillis();

        // 64K buffer:
        InputStream is = new FileInputStream(numbersFile);
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);

        int count = 0;
        while (true) {
            String line = reader.readLine();
            if (line == null) {
                break;
            }
            Long v = Long.parseLong(line.trim());
            if (useNumericField) {
                field.setLongValue(v);
            } else {
                //NumericUtils.longToPrefixCoded(v, 0, binaryToken);
                longToBytes(v, binaryToken);
                //if (bytesToLong(binaryToken.get()) != v) {
                //  throw new RuntimeException("wrong long: v=" + v + " vs " + bytesToLong(binaryToken.get()));
                //}
            }
            w.addDocument(doc);
            count++;
            if (count % 200000 == 0) {
                long ms = System.currentTimeMillis();
                System.out.println("Indexed " + count + ": " + ((ms - startMS) / 1000.0) + " sec");
            }
        }
        reader.close();

        System.out.println(
                "Final Indexed " + count + ": " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec");

        // nocommit just to make debugging easier:
        //System.out.println("Optimize...");
        //w.forceMerge(1);

        System.out.println("Close...");
        w.close();
        System.out.println("After close: " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec");

        // Print CheckIndex:
        ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
        CheckIndex checker = new CheckIndex(dir);
        checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), true);
        CheckIndex.Status status = checker.checkIndex();
        System.out.println("Done CheckIndex:");
        System.out.println(bos.toString(IOUtils.UTF_8));
        if (status.clean == false) {
            throw new IllegalStateException("CheckIndex failed");
        }

        SegmentInfos infos = new SegmentInfos();
        infos.read(dir);

        long totBytes = 0;
        for (SegmentCommitInfo info : infos) {
            totBytes += info.sizeInBytes();
        }
        System.out.println("\nTotal index size: " + totBytes + " bytes");
    } else {
        System.out.println("Skip indexing: index already exists");
    }

    List<Query> queries = new ArrayList<>();
    InputStream is = new FileInputStream(queriesFile);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);
    while (true) {
        String line = reader.readLine();
        if (line == null) {
            break;
        }
        String[] numbers = line.trim().split(" ");
        if (numbers.length != 2) {
            throw new IllegalArgumentException("could not parse query line: " + line);
        }
        long minValue = Long.parseLong(numbers[0]);
        long maxValue = Long.parseLong(numbers[1]);
        if (useNumericField) {
            queries.add(NumericRangeQuery.newLongRange("number", precStep, minValue, maxValue, true, true));
        } else {
            longToBytes(minValue, binaryToken);
            BytesRef minTerm = binaryToken.toBytesRef();
            longToBytes(maxValue, binaryToken);
            BytesRef maxTerm = binaryToken.toBytesRef();
            queries.add(new TermRangeQuery("number", minTerm, maxTerm, true, true));
        }

        if (queries.size() == 200) {
            break;
        }
    }

    DirectoryReader r = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(r);
    s.setQueryCache(null); // don't bench the cache

    printQueryTerms((MultiTermQuery) queries.get(0), s);

    long bestMS = Long.MAX_VALUE;
    for (int iter = 0; iter < 10; iter++) {
        long startMS = System.currentTimeMillis();
        long totalHits = 0;
        long hash = 0;
        for (Query query : queries) {
            TopDocs hits = s.search(query, 10);
            totalHits += hits.totalHits;
            hash = hash * 31 + hits.totalHits;
        }
        long ms = System.currentTimeMillis() - startMS;
        System.out.println("iter " + iter + ": " + ms + " msec; totalHits=" + totalHits + " hash=" + hash);
        if (ms < bestMS) {
            System.out.println("  **");
            bestMS = ms;
        }
    }

    /*
    long t0 = System.currentTimeMillis();
    long bytesUsed = 0;
    for(int i=0;i<1000;i++) {
      for(AtomicReaderContext ctx : r.leaves()) {
        bytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed();
      }
    }
    System.out.println((System.currentTimeMillis() - t0) + " msec for 1000 ramBytesUsed: " + (bytesUsed / 1000));
    */

    r.close();
    dir.close();
}

From source file:perf.IndexAndSearchOpenStreetMaps.java

License:Apache License

private static void createIndex(boolean fast, boolean doForceMerge, boolean doDistanceSort)
        throws IOException, InterruptedException {

    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT)
            .onUnmappableCharacter(CodingErrorAction.REPORT);

    int BUFFER_SIZE = 1 << 16; // 64K
    InputStream is;//from  w  w  w .  j  av  a2s .co  m
    if (SMALL) {
        is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.subsetPlusAllLondon.txt"));
    } else {
        is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.txt"));
    }
    BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);

    int NUM_THREADS;
    if (fast) {
        NUM_THREADS = 4;
    } else {
        NUM_THREADS = 1;
    }

    int CHUNK = 10000;

    long t0 = System.nanoTime();
    AtomicLong totalCount = new AtomicLong();

    for (int part = 0; part < NUM_PARTS; part++) {
        Directory dir = FSDirectory.open(Paths.get(getName(part, doDistanceSort)));

        IndexWriterConfig iwc = new IndexWriterConfig(null);
        iwc.setCodec(getCodec(fast));
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        if (fast) {
            ((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(Double.POSITIVE_INFINITY);
            iwc.setRAMBufferSizeMB(1024);
        } else {
            iwc.setMaxBufferedDocs(109630);
            iwc.setMergePolicy(new LogDocMergePolicy());
            iwc.setMergeScheduler(new SerialMergeScheduler());
        }
        iwc.setInfoStream(new PrintStreamInfoStream(System.out));
        IndexWriter w = new IndexWriter(dir, iwc);

        Thread[] threads = new Thread[NUM_THREADS];
        AtomicBoolean finished = new AtomicBoolean();
        Object lock = new Object();

        final int finalPart = part;

        for (int t = 0; t < NUM_THREADS; t++) {
            threads[t] = new Thread() {
                @Override
                public void run() {
                    String[] lines = new String[CHUNK];
                    int chunkCount = 0;
                    while (finished.get() == false) {
                        try {
                            int count = CHUNK;
                            synchronized (lock) {
                                for (int i = 0; i < CHUNK; i++) {
                                    String line = reader.readLine();
                                    if (line == null) {
                                        count = i;
                                        finished.set(true);
                                        break;
                                    }
                                    lines[i] = line;
                                }
                                if (finalPart == 0 && totalCount.get() + count >= 2000000000) {
                                    finished.set(true);
                                }
                            }

                            for (int i = 0; i < count; i++) {
                                String[] parts = lines[i].split(",");
                                //long id = Long.parseLong(parts[0]);
                                double lat = Double.parseDouble(parts[1]);
                                double lon = Double.parseDouble(parts[2]);
                                Document doc = new Document();
                                if (useGeoPoint) {
                                    doc.add(new GeoPointField("point", lat, lon, Field.Store.NO));
                                } else if (useGeo3D || useGeo3DLarge) {
                                    doc.add(new Geo3DPoint("point", lat, lon));
                                } else {
                                    doc.add(new LatLonPoint("point", lat, lon));
                                    if (doDistanceSort) {
                                        doc.add(new LatLonDocValuesField("point", lat, lon));
                                    }
                                }
                                w.addDocument(doc);
                                long x = totalCount.incrementAndGet();
                                if (x % 1000000 == 0) {
                                    System.out.println(x + "...");
                                }
                            }
                            chunkCount++;
                            if (false && SMALL == false && chunkCount == 20000) {
                                System.out.println("NOW BREAK EARLY");
                                break;
                            }
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                }
            };
            threads[t].start();
        }

        for (Thread thread : threads) {
            thread.join();
        }

        System.out.println("Part " + part + " is done: w.maxDoc()=" + w.maxDoc());
        w.commit();
        System.out.println("done commit");
        long t1 = System.nanoTime();
        System.out.println(((t1 - t0) / 1000000000.0) + " sec to index part " + part);
        if (doForceMerge) {
            w.forceMerge(1);
            long t2 = System.nanoTime();
            System.out.println(((t2 - t1) / 1000000000.0) + " sec to force merge part " + part);
        }
        w.close();
    }

    //System.out.println(totalCount.get() + " total docs");
    //System.out.println("Force merge...");
    //w.forceMerge(1);
    //long t2 = System.nanoTime();
    //System.out.println(((t2-t1)/1000000000.0) + " sec to force merge");

    //w.close();
    //long t3 = System.nanoTime();
    //System.out.println(((t3-t2)/1000000000.0) + " sec to close");
    //System.out.println(((t3-t2)/1000000000.0) + " sec to close");
}

From source file:perf.IndexGeoNames.java

License:Apache License

public static void main(String[] args) throws Exception {
    String geoNamesFile = args[0];
    File indexPath = new File(args[1]);
    int numThreads = Integer.parseInt(args[2]);
    int precStep = Integer.parseInt(args[3]);
    if (indexPath.exists()) {
        throw new IllegalArgumentException("please remove indexPath \"" + indexPath + "\" before running");
    }// w  w  w  .  java 2  s .  c om

    Directory dir = FSDirectory.open(indexPath);
    //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
    //iwc.setRAMBufferSizeMB(350);
    iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    if (normal == false) {
        iwc.setRAMBufferSizeMB(1024);
        iwc.setMergePolicy(NoMergePolicy.INSTANCE);
        //iwc.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
    } else {
        // 5/5 segments:
        iwc.setMaxBufferedDocs(157234);
        iwc.setRAMBufferSizeMB(-1);
    }
    //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(3, 1);
    final IndexWriter w = new IndexWriter(dir, iwc);

    final Field.Store store = Field.Store.NO;

    final FieldType doubleFieldType = new FieldType(
            store == Field.Store.NO ? DoubleField.TYPE_NOT_STORED : DoubleField.TYPE_STORED);
    doubleFieldType.setNumericPrecisionStep(precStep);
    doubleFieldType.freeze();

    final FieldType longFieldType = new FieldType(
            store == Field.Store.NO ? LongField.TYPE_NOT_STORED : LongField.TYPE_STORED);
    longFieldType.setNumericPrecisionStep(precStep);
    longFieldType.freeze();

    final FieldType intFieldType = new FieldType(
            store == Field.Store.NO ? IntField.TYPE_NOT_STORED : IntField.TYPE_STORED);
    intFieldType.setNumericPrecisionStep(precStep);
    intFieldType.freeze();

    // 64K buffer:
    InputStream is = new FileInputStream(geoNamesFile);
    final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16);
    final AtomicInteger docsIndexed = new AtomicInteger();

    final long startMS = System.currentTimeMillis();
    Thread[] threads = new Thread[numThreads];

    // With reuse it's ~ 38% faster (41.8 sec vs 67.0 sec):
    final boolean reuseDocAndFields = false;

    for (int i = 0; i < numThreads; i++) {
        threads[i] = new Thread() {
            @Override
            public void run() {
                ParsePosition datePos = new ParsePosition(0);
                SimpleDateFormat dateParser = new SimpleDateFormat("yyyy-MM-dd", Locale.US);

                if (reuseDocAndFields) {
                    Document doc = new Document();
                    IntField geoNameID = new IntField("geoNameID", 0, intFieldType);
                    doc.add(geoNameID);
                    TextField nameField = new TextField("name", "", store);
                    doc.add(nameField);
                    TextField asciiNameField = new TextField("asciiName", "", store);
                    doc.add(asciiNameField);
                    TextField alternateNameField = new TextField("alternateNames", "", store);
                    doc.add(alternateNameField);
                    StringField featureClassField = new StringField("featureClass", "", store);
                    doc.add(featureClassField);
                    StringField featureCodeField = new StringField("featureCode", "", store);
                    doc.add(featureCodeField);
                    StringField countryCodeField = new StringField("countryCode", "", store);
                    doc.add(countryCodeField);
                    StringField cc2Field = new StringField("cc2", "", store);
                    doc.add(cc2Field);
                    StringField admin1Field = new StringField("admin1", "", store);
                    doc.add(admin1Field);
                    StringField admin2Field = new StringField("admin2", "", store);
                    doc.add(admin2Field);
                    StringField admin3Field = new StringField("admin3", "", store);
                    doc.add(admin3Field);
                    StringField admin4Field = new StringField("admin4", "", store);
                    doc.add(admin4Field);
                    StringField tzField = new StringField("timezone", "", store);
                    doc.add(tzField);

                    while (true) {
                        try {

                            // Curiously BufferedReader.readLine seems to be thread-safe...
                            String line = reader.readLine();
                            if (line == null) {
                                break;
                            }
                            String[] values = line.split("\t");

                            geoNameID.setIntValue(Integer.parseInt(values[0]));
                            nameField.setStringValue(values[1]);
                            asciiNameField.setStringValue(values[2]);
                            alternateNameField.setStringValue(values[3]);

                            /*
                            if (values[4].isEmpty() == false) {
                              double v = Double.parseDouble(values[4]);
                              doc.add(new DoubleField("latitude", v, doubleFieldType));
                              doc.add(new DoubleDocValuesField("latitude", v));
                            }
                            if (values[5].isEmpty() == false) {
                              double v = Double.parseDouble(values[5]);
                              doc.add(new DoubleField("longitude", v, doubleFieldType));
                              doc.add(new DoubleDocValuesField("longitude", v));
                            }
                            */

                            featureClassField.setStringValue(values[6]);
                            featureCodeField.setStringValue(values[7]);
                            countryCodeField.setStringValue(values[8]);
                            cc2Field.setStringValue(values[9]);
                            admin1Field.setStringValue(values[10]);
                            admin2Field.setStringValue(values[11]);
                            admin3Field.setStringValue(values[12]);
                            admin4Field.setStringValue(values[13]);

                            /*
                            if (values[14].isEmpty() == false) {
                              long v = Long.parseLong(values[14]);
                              doc.add(new LongField("population", v, longFieldType));
                              doc.add(new NumericDocValuesField("population", v));
                            }
                            if (values[15].isEmpty() == false) {
                              long v = Long.parseLong(values[15]);
                              doc.add(new LongField("elevation", v, longFieldType));
                              doc.add(new NumericDocValuesField("elevation", v));
                            }
                            if (values[16].isEmpty() == false) {
                              doc.add(new IntField("dem", Integer.parseInt(values[16]), intFieldType));
                            }
                            */

                            tzField.setStringValue(values[17]);
                            /*
                            if (values[18].isEmpty() == false) {
                              datePos.setIndex(0);
                              Date date = dateParser.parse(values[18], datePos);
                              doc.add(new LongField("modified", date.getTime(), longFieldType));
                            }
                            */
                            w.addDocument(doc);
                            int count = docsIndexed.incrementAndGet();
                            if (count % 200000 == 0) {
                                long ms = System.currentTimeMillis();
                                System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec");
                            }
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }
                } else {
                    while (true) {
                        try {

                            // Curiously BufferedReader.readLine seems to be thread-safe...
                            String line = reader.readLine();
                            if (line == null) {
                                break;
                            }
                            String[] values = line.split("\t");

                            Document doc = new Document();

                            doc.add(new IntField("geoNameID", Integer.parseInt(values[0]), intFieldType));
                            doc.add(new TextField("name", values[1], store));
                            doc.add(new TextField("asciiName", values[2], store));
                            doc.add(new TextField("alternateNames", values[3], store));

                            if (values[4].isEmpty() == false) {
                                double v = Double.parseDouble(values[4]);
                                doc.add(new DoubleField("latitude", v, doubleFieldType));
                                doc.add(new DoubleDocValuesField("latitude", v));
                            }
                            if (values[5].isEmpty() == false) {
                                double v = Double.parseDouble(values[5]);
                                doc.add(new DoubleField("longitude", v, doubleFieldType));
                                doc.add(new DoubleDocValuesField("longitude", v));
                            }

                            doc.add(new StringField("featureClass", values[6], store));
                            doc.add(new StringField("featureCode", values[7], store));
                            doc.add(new StringField("countryCode", values[8], store));
                            doc.add(new StringField("cc2", values[9], store));
                            doc.add(new StringField("admin1Code", values[10], store));
                            doc.add(new StringField("admin2Code", values[11], store));
                            doc.add(new StringField("admin3Code", values[12], store));
                            doc.add(new StringField("admin4Code", values[13], store));

                            if (values[14].isEmpty() == false) {
                                long v = Long.parseLong(values[14]);
                                doc.add(new LongField("population", v, longFieldType));
                                doc.add(new NumericDocValuesField("population", v));
                            }
                            if (values[15].isEmpty() == false) {
                                long v = Long.parseLong(values[15]);
                                doc.add(new LongField("elevation", v, longFieldType));
                                doc.add(new NumericDocValuesField("elevation", v));
                            }
                            if (values[16].isEmpty() == false) {
                                doc.add(new IntField("dem", Integer.parseInt(values[16]), intFieldType));
                            }

                            doc.add(new StringField("timezone", values[17], store));

                            if (values[18].isEmpty() == false) {
                                datePos.setIndex(0);
                                Date date = dateParser.parse(values[18], datePos);
                                doc.add(new LongField("modified", date.getTime(), longFieldType));
                            }
                            w.addDocument(doc);
                            int count = docsIndexed.incrementAndGet();
                            if (count % 200000 == 0) {
                                long ms = System.currentTimeMillis();
                                System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec");
                            }
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }
                }
            }
        };
        threads[i].start();
    }
    DirectoryReader r = DirectoryReader.open(w, true);
    for (int i = 0; i < 100; i++) {
        DirectoryReader r2 = DirectoryReader.openIfChanged(r);
        if (r2 != null) {
            r.close();
            r = r2;
        }
        Thread.sleep(500);
    }
    if (r != null) {
        r.close();
        r = null;
    }
    for (int i = 0; i < numThreads; i++) {
        threads[i].join();
    }
    long ms = System.currentTimeMillis();
    System.out.println(docsIndexed + ": " + ((ms - startMS) / 1000.0) + " sec");
    //System.out.println("tot conflicts: " + BytesRefHash.totConflict);
    //w.shutdown(normal);
    w.close();
    dir.close();
}

From source file:stroom.index.server.IndexShardWriterImpl.java

License:Apache License

private synchronized boolean doOpen(final boolean create) {
    boolean success = false;

    try {//from   www . ja v  a2s  .  c om
        // Never open deleted index shards.
        if (IndexShardStatus.DELETED.equals(indexShard.getStatus())) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Shard is deleted " + indexShard);
            }
            return false;
        }

        // Don't open old index shards for writing.
        final Version currentVersion = LuceneVersionUtil
                .getLuceneVersion(LuceneVersionUtil.getCurrentVersion());
        final Version shardVersion = LuceneVersionUtil.getLuceneVersion(indexShard.getIndexVersion());
        if (!shardVersion.equals(currentVersion)) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("Shard version is different to current version " + indexShard);
            }
            return false;
        }

        final long startMs = System.currentTimeMillis();
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Opening " + indexShard);
        }

        if (create) {
            // Make sure the index directory does not exist. If one does
            // then throw an exception
            // as we don't want to overwrite an index.
            if (Files.isDirectory(dir)) {
                // This is a workaround for lingering .nfs files.
                Files.list(dir).forEach(file -> {
                    if (Files.isDirectory(file) || !file.getFileName().startsWith(".")) {
                        throw new IndexException("Attempting to create a new index in \""
                                + dir.toAbsolutePath().toString() + "\" but one already exists.");
                    }
                });
            } else {
                // Try and make all required directories.
                try {
                    Files.createDirectories(dir);
                } catch (final IOException e) {
                    throw new IndexException("Unable to create directories for new index in \""
                            + dir.toAbsolutePath().toString() + "\"");
                }
            }
        }

        // Create lucene directory object.
        directory = new NIOFSDirectory(dir, SimpleFSLockFactory.INSTANCE);

        analyzerWrapper.setVersion(shardVersion);
        final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzerWrapper);

        // In debug mode we do extra trace in LUCENE and we also count
        // certain logging info like merge and flush
        // counts, so you can get this later using the trace method.
        if (LOGGER.isDebugEnabled()) {
            loggerPrintStream = new LoggerPrintStream(LOGGER);
            for (final String term : LOG_WATCH_TERMS.values()) {
                loggerPrintStream.addWatchTerm(term);
            }
            indexWriterConfig.setInfoStream(loggerPrintStream);
        }

        // IndexWriter to use for adding data to the index.
        indexWriter = new IndexWriter(directory, indexWriterConfig);

        final LiveIndexWriterConfig liveIndexWriterConfig = indexWriter.getConfig();
        liveIndexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);

        // TODO : We might still want to write separate segments I'm not
        // sure on pros/cons?
        liveIndexWriterConfig.setUseCompoundFile(false);
        liveIndexWriterConfig.setMaxBufferedDocs(Integer.MAX_VALUE);

        // Check the number of committed docs in this shard.
        documentCount.set(indexWriter.numDocs());
        lastDocumentCount = documentCount.get();
        if (create) {
            if (lastDocumentCount != 0) {
                LOGGER.error("Index should be new but already contains docs: " + lastDocumentCount);
            }
        } else if (indexShard.getDocumentCount() != lastDocumentCount) {
            LOGGER.error("Mismatch document count.  Index says " + lastDocumentCount + " DB says "
                    + indexShard.getDocumentCount());
        }

        // We have opened the index so update the DB object.
        setStatus(IndexShardStatus.OPEN);

        // Output some debug.
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("getIndexWriter() - Opened " + indexShard + " in "
                    + (System.currentTimeMillis() - startMs) + "ms");
        }

        success = true;
    } catch (final LockObtainFailedException t) {
        LOGGER.warn(t.getMessage());
    } catch (final Throwable t) {
        LOGGER.error(t.getMessage(), t);
    }

    return success;
}